From 7aca2c03121df14e900050af09315045a9f97890 Mon Sep 17 00:00:00 2001 From: JordanTheToaster Date: Fri, 23 Aug 2024 04:45:54 +0100 Subject: [PATCH] 3rdparty: Update rapidyml to 0.7.1 --- 3rdparty/rapidyaml/include/c4/charconv.hpp | 4 +- 3rdparty/rapidyaml/include/c4/compiler.hpp | 3 + 3rdparty/rapidyaml/include/c4/error.hpp | 3 +- .../rapidyaml/include/c4/ext/fast_float.hpp | 33 + .../rapidyaml/include/c4/ext/fast_float_all.h | 3677 ++++++++ 3rdparty/rapidyaml/include/c4/format.hpp | 11 +- 3rdparty/rapidyaml/include/c4/szconv.hpp | 2 +- 3rdparty/rapidyaml/include/c4/yml/common.hpp | 252 +- .../include/c4/yml/detail/checks.hpp | 20 +- .../include/c4/yml/detail/parser_dbg.hpp | 141 +- .../rapidyaml/include/c4/yml/detail/print.hpp | 146 +- .../rapidyaml/include/c4/yml/detail/stack.hpp | 106 +- .../rapidyaml/include/c4/yml/emit.def.hpp | 847 +- 3rdparty/rapidyaml/include/c4/yml/emit.hpp | 661 +- .../include/c4/yml/event_handler_stack.hpp | 186 + .../include/c4/yml/event_handler_tree.hpp | 754 ++ .../include/c4/yml/filter_processor.hpp | 512 ++ 3rdparty/rapidyaml/include/c4/yml/fwd.hpp | 24 + 3rdparty/rapidyaml/include/c4/yml/node.hpp | 631 +- .../rapidyaml/include/c4/yml/node_type.hpp | 271 + 3rdparty/rapidyaml/include/c4/yml/parse.hpp | 950 +- .../include/c4/yml/parse_engine.def.hpp | 8142 +++++++++++++++++ .../rapidyaml/include/c4/yml/parse_engine.hpp | 778 ++ .../rapidyaml/include/c4/yml/parser_state.hpp | 205 + .../include/c4/yml/reference_resolver.hpp | 74 + .../rapidyaml/include/c4/yml/std/vector.hpp | 20 +- 3rdparty/rapidyaml/include/c4/yml/tag.hpp | 84 + 3rdparty/rapidyaml/include/c4/yml/tree.hpp | 806 +- 3rdparty/rapidyaml/include/c4/yml/writer.hpp | 48 +- 3rdparty/rapidyaml/include/c4/yml/yml.hpp | 5 + 3rdparty/rapidyaml/include/ryml.natvis | 97 +- 3rdparty/rapidyaml/src/c4/error.cpp | 29 +- 3rdparty/rapidyaml/src/c4/yml/common.cpp | 14 +- 3rdparty/rapidyaml/src/c4/yml/parse.cpp | 5839 +----------- 3rdparty/rapidyaml/src/c4/yml/tree.cpp | 977 +- 35 files changed, 17762 insertions(+), 8590 deletions(-) create mode 100644 3rdparty/rapidyaml/include/c4/ext/fast_float.hpp create mode 100644 3rdparty/rapidyaml/include/c4/ext/fast_float_all.h create mode 100644 3rdparty/rapidyaml/include/c4/yml/event_handler_stack.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/event_handler_tree.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/filter_processor.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/fwd.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/node_type.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/parse_engine.def.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/parse_engine.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/parser_state.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/reference_resolver.hpp create mode 100644 3rdparty/rapidyaml/include/c4/yml/tag.hpp diff --git a/3rdparty/rapidyaml/include/c4/charconv.hpp b/3rdparty/rapidyaml/include/c4/charconv.hpp index 71d50ff6707ccb..41a513bad04eaa 100644 --- a/3rdparty/rapidyaml/include/c4/charconv.hpp +++ b/3rdparty/rapidyaml/include/c4/charconv.hpp @@ -60,8 +60,7 @@ # if defined(__GNUC__) && __GNUC__ >= 5 C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow") # endif -//# include "c4/ext/fast_float.hpp" -#include "fast_float/fast_float.h" +# include "c4/ext/fast_float.hpp" C4_SUPPRESS_WARNING_GCC_POP # endif #elif (C4_CPP >= 17) @@ -2172,6 +2171,7 @@ inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept */ C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept { + C4_ASSERT(str.len > 0); C4_ASSERT(str.triml(" \r\t\n").len == str.len); #if C4CORE_HAVE_FAST_FLOAT // fastfloat cannot parse hexadecimal floats diff --git a/3rdparty/rapidyaml/include/c4/compiler.hpp b/3rdparty/rapidyaml/include/c4/compiler.hpp index 5833913cf19e3c..07c5e91aa3f306 100644 --- a/3rdparty/rapidyaml/include/c4/compiler.hpp +++ b/3rdparty/rapidyaml/include/c4/compiler.hpp @@ -93,6 +93,9 @@ # define C4_CLANG_VERSION __apple_build_version__ # endif # elif defined(__GNUC__) +# ifdef __MINGW32__ +# define C4_MINGW +# endif # define C4_GCC # if defined(__GNUC_PATCHLEVEL__) # define C4_GCC_VERSION C4_VERSION_ENCODED(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) diff --git a/3rdparty/rapidyaml/include/c4/error.hpp b/3rdparty/rapidyaml/include/c4/error.hpp index 93fa8b60777db7..95d7c0df71b639 100644 --- a/3rdparty/rapidyaml/include/c4/error.hpp +++ b/3rdparty/rapidyaml/include/c4/error.hpp @@ -177,7 +177,8 @@ struct ScopedErrorSettings /** source location */ struct srcloc; -C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...); +// watchout: for VS the [[noreturn]] needs to come before other annotations like C4CORE_EXPORT +[[noreturn]] C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...); C4CORE_EXPORT void handle_warning(srcloc s, const char *fmt, ...); diff --git a/3rdparty/rapidyaml/include/c4/ext/fast_float.hpp b/3rdparty/rapidyaml/include/c4/ext/fast_float.hpp new file mode 100644 index 00000000000000..4bf3c0d67387a0 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/ext/fast_float.hpp @@ -0,0 +1,33 @@ +#ifndef _C4_EXT_FAST_FLOAT_HPP_ +#define _C4_EXT_FAST_FLOAT_HPP_ + +#if defined(_MSC_VER) && !defined(__clang__) +# pragma warning(push) +# pragma warning(disable: 4365) // '=': conversion from 'const _Ty' to 'fast_float::limb', signed/unsigned mismatch +# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe +#elif defined(__clang__) || defined(__APPLE_CC__) || defined(_LIBCPP_VERSION) +# pragma clang diagnostic push +# if (defined(__clang_major__) && (__clang_major__ >= 9)) || defined(__APPLE_CC__) +# pragma clang diagnostic ignored "-Wfortify-source" +# endif +# pragma clang diagnostic ignored "-Wshift-count-overflow" +# pragma clang diagnostic ignored "-Wold-style-cast" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wnarrowing" +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wuseless-cast" +# pragma GCC diagnostic ignored "-Wold-style-cast" +#endif + +#include "c4/ext/fast_float_all.h" + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) || defined(__APPLE_CC__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif // _C4_EXT_FAST_FLOAT_HPP_ diff --git a/3rdparty/rapidyaml/include/c4/ext/fast_float_all.h b/3rdparty/rapidyaml/include/c4/ext/fast_float_all.h new file mode 100644 index 00000000000000..a6088c522d9986 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/ext/fast_float_all.h @@ -0,0 +1,3677 @@ +// fast_float by Daniel Lemire +// fast_float by João Paulo Magalhaes +// +// +// with contributions from Eugene Golushkov +// with contributions from Maksim Kita +// with contributions from Marcin Wojdyr +// with contributions from Neal Richardson +// with contributions from Tim Paine +// with contributions from Fabio Pellacini +// with contributions from Lénárd Szolnoki +// with contributions from Jan Pharago +// with contributions from Maya Warrier +// with contributions from Taha Khokhar +// +// +// MIT License Notice +// +// MIT License +// +// Copyright (c) 2021 The fast_float authors +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H +#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +// Testing for https://wg21.link/N3652, adopted in C++14 +#if __cpp_constexpr >= 201304 +#define FASTFLOAT_CONSTEXPR14 constexpr +#else +#define FASTFLOAT_CONSTEXPR14 +#endif + +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#define FASTFLOAT_HAS_BIT_CAST 1 +#else +#define FASTFLOAT_HAS_BIT_CAST 0 +#endif + +#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1 +#else +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0 +#endif + +// Testing for relevant C++20 constexpr library features +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED \ + && FASTFLOAT_HAS_BIT_CAST \ + && __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/ +#define FASTFLOAT_CONSTEXPR20 constexpr +#define FASTFLOAT_IS_CONSTEXPR 1 +#else +#define FASTFLOAT_CONSTEXPR20 +#define FASTFLOAT_IS_CONSTEXPR 0 +#endif + +#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifndef FASTFLOAT_FLOAT_COMMON_H +#define FASTFLOAT_FLOAT_COMMON_H + +#include +#include +#include +#include +#include +#include +#ifdef __has_include + #if __has_include() && (__cplusplus > 202002L || _MSVC_LANG > 202002L) + #include + #endif +#endif + +namespace fast_float { + +#define FASTFLOAT_JSONFMT (1 << 5) +#define FASTFLOAT_FORTRANFMT (1 << 6) + +enum chars_format { + scientific = 1 << 0, + fixed = 1 << 2, + hex = 1 << 3, + no_infnan = 1 << 4, + // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, + // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, + fortran = FASTFLOAT_FORTRANFMT | fixed | scientific, + general = fixed | scientific +}; + +template +struct from_chars_result_t { + UC const* ptr; + std::errc ec; +}; +using from_chars_result = from_chars_result_t; + +template +struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + UC dot = UC('.')) + : format(fmt), decimal_point(dot) {} + + /** Which number formats are accepted */ + chars_format format; + /** The character used as decimal point */ + UC decimal_point; +}; +using parse_options = parse_options_t; + +} + +#if FASTFLOAT_HAS_BIT_CAST +#include +#endif + +#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \ + || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ + || defined(__MINGW64__) \ + || defined(__s390x__) \ + || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \ + || defined(__loongarch64) ) +#define FASTFLOAT_64BIT 1 +#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ + || defined(__arm__) || defined(_M_ARM) || defined(__ppc__) \ + || defined(__MINGW32__) || defined(__EMSCRIPTEN__)) +#define FASTFLOAT_32BIT 1 +#else + // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. + // We can never tell the register width, but the SIZE_MAX is a good approximation. + // UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max portability. + #if SIZE_MAX == 0xffff + #error Unknown platform (16-bit, unsupported) + #elif SIZE_MAX == 0xffffffff + #define FASTFLOAT_32BIT 1 + #elif SIZE_MAX == 0xffffffffffffffff + #define FASTFLOAT_64BIT 1 + #else + #error Unknown platform (not 32-bit, not 64-bit?) + #endif +#endif + +#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define FASTFLOAT_VISUAL_STUDIO 1 +#endif + +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif defined(__MVS__) +#include +#else +#ifdef __has_include +#if __has_include() +#include +#endif //__has_include() +#endif //__has_include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#define FASTFLOAT_IS_BIG_ENDIAN 1 +#endif +#endif + +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON) +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + + + +#ifdef FASTFLOAT_VISUAL_STUDIO +#define fastfloat_really_inline __forceinline +#else +#define fastfloat_really_inline inline __attribute__((always_inline)) +#endif + +#ifndef FASTFLOAT_ASSERT +#define FASTFLOAT_ASSERT(x) { ((void)(x)); } +#endif + +#ifndef FASTFLOAT_DEBUG_ASSERT +#define FASTFLOAT_DEBUG_ASSERT(x) { ((void)(x)); } +#endif + +// rust style `try!()` macro, or `?` operator +#define FASTFLOAT_TRY(x) { if (!(x)) return false; } + +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type + + +namespace fast_float { + +fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED + return std::is_constant_evaluated(); +#else + return false; +#endif +} + +template +fastfloat_really_inline constexpr bool is_supported_float_type() { + return std::is_same::value || std::is_same::value +#if __STDCPP_FLOAT32_T__ + || std::is_same::value +#endif +#if __STDCPP_FLOAT64_T__ + || std::is_same::value +#endif + ; +} + +template +fastfloat_really_inline constexpr bool is_supported_char_type() { + return + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value; +} + +// Compares two ASCII strings in a case insensitive manner. +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp(UC const * input1, UC const * input2, size_t length) { + char running_diff{0}; + for (size_t i = 0; i < length; ++i) { + running_diff |= (char(input1[i]) ^ char(input2[i])); + } + return (running_diff == 0) || (running_diff == 32); +} + +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif + +// a pointer and a length to a contiguous block of memory +template +struct span { + const T* ptr; + size_t length; + constexpr span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {} + constexpr span() : ptr(nullptr), length(0) {} + + constexpr size_t len() const noexcept { + return length; + } + + FASTFLOAT_CONSTEXPR14 const T& operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return ptr[index]; + } +}; + +struct value128 { + uint64_t low; + uint64_t high; + constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + constexpr value128() : low(0), high(0) {} +}; + +/* Helper C++14 constexpr generic implementation of leading_zeroes */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { + if(input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; last_bit |= 32; } + if(input_num & uint64_t( 0xffff0000)) { input_num >>= 16; last_bit |= 16; } + if(input_num & uint64_t( 0xff00)) { input_num >>= 8; last_bit |= 8; } + if(input_num & uint64_t( 0xf0)) { input_num >>= 4; last_bit |= 4; } + if(input_num & uint64_t( 0xc)) { input_num >>= 2; last_bit |= 2; } + if(input_num & uint64_t( 0x2)) { /* input_num >>= 1; */ last_bit |= 1; } + return 63 - last_bit; +} + +/* result might be undefined when input_num is zero */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +int leading_zeroes(uint64_t input_num) { + assert(input_num > 0); + if (cpp20_and_in_constexpr()) { + return leading_zeroes_generic(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO + #if defined(_M_X64) || defined(_M_ARM64) + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + _BitScanReverse64(&leading_zero, input_num); + return (int)(63 - leading_zero); + #else + return leading_zeroes_generic(input_num); + #endif +#else + return __builtin_clzll(input_num); +#endif +} + +// slow emulation routine for 32-bit +fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = (uint64_t)(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + (uint64_t)(lo < bd); + return lo; +} + +#ifdef FASTFLOAT_32BIT + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + return umul128_generic(ab, cd, hi); +} +#endif // !__MINGW64__ + +#endif // FASTFLOAT_32BIT + + +// compute 64-bit a*b +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +value128 full_multiplication(uint64_t a, uint64_t b) { + if (cpp20_and_in_constexpr()) { + value128 answer; + answer.low = umul128_generic(a, b, &answer.high); + return answer; + } + value128 answer; +#if defined(_M_ARM64) && !defined(__MINGW32__) + // ARM64 has native support for 64-bit multiplications, no need to emulate + // But MinGW on ARM64 doesn't have native support for 64-bit multiplications + answer.high = __umulh(a, b); + answer.low = a * b; +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__)) + answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 +#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) + __uint128_t r = ((__uint128_t)a) * b; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#else + answer.low = umul128_generic(a, b, &answer.high); +#endif + return answer; +} + +struct adjusted_mantissa { + uint64_t mantissa{0}; + int32_t power2{0}; // a negative value indicates an invalid result + adjusted_mantissa() = default; + constexpr bool operator==(const adjusted_mantissa &o) const { + return mantissa == o.mantissa && power2 == o.power2; + } + constexpr bool operator!=(const adjusted_mantissa &o) const { + return mantissa != o.mantissa || power2 != o.power2; + } +}; + +// Bias so we can get the real exponent with an invalid adjusted_mantissa. +constexpr static int32_t invalid_am_bias = -0x8000; + +// used for binary_format_lookup_tables::max_mantissa +constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; + +template +struct binary_format_lookup_tables; + +template struct binary_format : binary_format_lookup_tables { + using equiv_uint = typename std::conditional::type; + + static inline constexpr int mantissa_explicit_bits(); + static inline constexpr int minimum_exponent(); + static inline constexpr int infinite_power(); + static inline constexpr int sign_index(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int max_exponent_fast_path(); + static inline constexpr int max_exponent_round_to_even(); + static inline constexpr int min_exponent_round_to_even(); + static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int largest_power_of_ten(); + static inline constexpr int smallest_power_of_ten(); + static inline constexpr T exact_power_of_ten(int64_t power); + static inline constexpr size_t max_digits(); + static inline constexpr equiv_uint exponent_mask(); + static inline constexpr equiv_uint mantissa_mask(); + static inline constexpr equiv_uint hidden_bit_mask(); +}; + +template +struct binary_format_lookup_tables { + static constexpr double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + + // Largest integer value v so that (5**index * v) <= 1<<53. + // 0x10000000000000 == 1 << 53 + static constexpr uint64_t max_mantissa[] = { + 0x10000000000000, + 0x10000000000000 / 5, + 0x10000000000000 / (5 * 5), + 0x10000000000000 / (5 * 5 * 5), + 0x10000000000000 / (5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555), + 0x10000000000000 / (constant_55555 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5)}; +}; + +template +constexpr double binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +template +struct binary_format_lookup_tables { + static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, + 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; + + // Largest integer value v so that (5**index * v) <= 1<<24. + // 0x1000000 == 1<<24 + static constexpr uint64_t max_mantissa[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; +}; + +template +constexpr float binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + +template <> inline constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} +template <> inline constexpr int binary_format::mantissa_explicit_bits() { + return 23; +} + +template <> inline constexpr int binary_format::max_exponent_round_to_even() { + return 23; +} + +template <> inline constexpr int binary_format::max_exponent_round_to_even() { + return 10; +} + +template <> inline constexpr int binary_format::min_exponent_round_to_even() { + return -4; +} + +template <> inline constexpr int binary_format::min_exponent_round_to_even() { + return -17; +} + +template <> inline constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> inline constexpr int binary_format::minimum_exponent() { + return -127; +} + +template <> inline constexpr int binary_format::infinite_power() { + return 0x7FF; +} +template <> inline constexpr int binary_format::infinite_power() { + return 0xFF; +} + +template <> inline constexpr int binary_format::sign_index() { return 63; } +template <> inline constexpr int binary_format::sign_index() { return 31; } + +template <> inline constexpr int binary_format::max_exponent_fast_path() { + return 22; +} +template <> inline constexpr int binary_format::max_exponent_fast_path() { + return 10; +} + +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 22 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 10 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr double binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} +template <> +inline constexpr float binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + + +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 308; +} +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 38; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -342; +} +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -64; +} + +template <> inline constexpr size_t binary_format::max_digits() { + return 769; +} +template <> inline constexpr size_t binary_format::max_digits() { + return 114; +} + +template <> inline constexpr binary_format::equiv_uint + binary_format::exponent_mask() { + return 0x7F800000; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::exponent_mask() { + return 0x7FF0000000000000; +} + +template <> inline constexpr binary_format::equiv_uint + binary_format::mantissa_mask() { + return 0x007FFFFF; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::mantissa_mask() { + return 0x000FFFFFFFFFFFFF; +} + +template <> inline constexpr binary_format::equiv_uint + binary_format::hidden_bit_mask() { + return 0x00800000; +} +template <> inline constexpr binary_format::equiv_uint + binary_format::hidden_bit_mask() { + return 0x0010000000000000; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void to_float(bool negative, adjusted_mantissa am, T &value) { + using fastfloat_uint = typename binary_format::equiv_uint; + fastfloat_uint word = (fastfloat_uint)am.mantissa; + word |= fastfloat_uint(am.power2) << binary_format::mantissa_explicit_bits(); + word |= fastfloat_uint(negative) << binary_format::sign_index(); +#if FASTFLOAT_HAS_BIT_CAST + value = std::bit_cast(word); +#else + ::memcpy(&value, &word, sizeof(T)); +#endif +} + +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default +template +struct space_lut { + static constexpr bool value[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +}; + +template +constexpr bool space_lut::value[]; + +inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } +#endif + +template +static constexpr uint64_t int_cmp_zeros() +{ + static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), "Unsupported character size"); + return (sizeof(UC) == 1) ? 0x3030303030303030 : (sizeof(UC) == 2) ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | uint64_t(UC('0')) << 16 | UC('0')) : (uint64_t(UC('0')) << 32 | UC('0')); +} +template +static constexpr int int_cmp_len() +{ + return sizeof(uint64_t) / sizeof(UC); +} +template +static constexpr UC const * str_const_nan() +{ + return nullptr; +} +template<> +constexpr char const * str_const_nan() +{ + return "nan"; +} +template<> +constexpr wchar_t const * str_const_nan() +{ + return L"nan"; +} +template<> +constexpr char16_t const * str_const_nan() +{ + return u"nan"; +} +template<> +constexpr char32_t const * str_const_nan() +{ + return U"nan"; +} +template +static constexpr UC const * str_const_inf() +{ + return nullptr; +} +template<> +constexpr char const * str_const_inf() +{ + return "infinity"; +} +template<> +constexpr wchar_t const * str_const_inf() +{ + return L"infinity"; +} +template<> +constexpr char16_t const * str_const_inf() +{ + return u"infinity"; +} +template<> +constexpr char32_t const * str_const_inf() +{ + return U"infinity"; +} + + +template +struct int_luts { + static constexpr uint8_t chdigit[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 + }; + + static constexpr size_t maxdigits_u64[] = { + 64, 41, 32, 28, 25, 23, 22, 21, + 20, 19, 18, 18, 17, 17, 16, 16, + 16, 16, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13 + }; + + static constexpr uint64_t min_safe_u64[] = { + 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, 7450580596923828125, 4738381338321616896, + 3909821048582988049, 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, 5559917313492231481, + 2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976, + 2862423051509815793, 6746640616477458432, 15181127029874798299ull, 1638400000000000000, 3243919932521508681, + 6221821273427820544, 11592836324538749809ull, 876488338465357824, 1490116119384765625, 2481152873203736576, + 4052555153018976267, 6502111422497947648, 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, + 1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896 + }; +}; + +template +constexpr uint8_t int_luts::chdigit[]; + +template +constexpr size_t int_luts::maxdigits_u64[]; + +template +constexpr uint64_t int_luts::min_safe_u64[]; + +template +fastfloat_really_inline +constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[static_cast(c)]; } + +fastfloat_really_inline +constexpr size_t max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; } + +// If a u64 is exactly max_digits_u64() in length, this is +// the value below which it has definitely overflowed. +fastfloat_really_inline +constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; } + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_FAST_FLOAT_H +#define FASTFLOAT_FAST_FLOAT_H + + +namespace fast_float { +/** + * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting + * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. + * The resulting floating-point value is the closest floating-point values (using either float or double), + * using the "round to even" convention for values that would otherwise fall right in-between two values. + * That is, we provide exact parsing according to the IEEE standard. + * + * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the + * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned + * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored. + * + * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`). + * + * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of + * the type `fast_float::chars_format`. It is a bitset value: we check whether + * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set + * to determine whether we allow the fixed point and scientific notation respectively. + * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. + */ +template())> +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, + T &value, chars_format fmt = chars_format::general) noexcept; + +/** + * Like from_chars, but accepts an `options` argument to govern number parsing. + */ +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept; +/** +* from_chars for integer types. +*/ +template ())> +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept; + +} // namespace fast_float +#endif // FASTFLOAT_FAST_FLOAT_H + +#ifndef FASTFLOAT_ASCII_NUMBER_H +#define FASTFLOAT_ASCII_NUMBER_H + +#include +#include +#include +#include +#include +#include + + +#ifdef FASTFLOAT_SSE2 +#include +#endif + +#ifdef FASTFLOAT_NEON +#include +#endif + +namespace fast_float { + +template +fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +template +fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { + return !(c > UC('9') || c < UC('0')); +} + +fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 + | (val & 0x00FF000000000000) >> 40 + | (val & 0x0000FF0000000000) >> 24 + | (val & 0x000000FF00000000) >> 8 + | (val & 0x00000000FF000000) << 8 + | (val & 0x0000000000FF0000) << 24 + | (val & 0x000000000000FF00) << 40 + | (val & 0x00000000000000FF) << 56; +} + +// Read 8 UC into a u64. Truncates UC if not char. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t read8_to_u64(const UC *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { + uint64_t val = 0; + for(int i = 0; i < 8; ++i) { + val |= uint64_t(uint8_t(*chars)) << (i*8); + ++chars; + } + return val; + } + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const __m128i data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed); + return value; +#endif +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#elif defined(FASTFLOAT_NEON) + + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const uint16x8_t data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(vld1q_u16(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +uint64_t simd_read8_to_u64(UC const*) { + return 0; +} + +// credit @aqrit +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + + +// Call this if chars are definitely 8 digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); +} + + +// credit @aqrit +fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); +} + + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#elif defined(FASTFLOAT_NEON) +FASTFLOAT_SIMD_DISABLE_WARNINGS + const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); + const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#else + (void)chars; (void)i; + return false; +#endif // FASTFLOAT_SSE2 +} + +#endif // FASTFLOAT_HAS_SIMD + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { + return 0; +} + + +template ::value) = 0> +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +template +struct parsed_number_string_t { + int64_t exponent{0}; + uint64_t mantissa{0}; + UC const * lastmatch{nullptr}; + bool negative{false}; + bool valid{false}; + bool too_many_digits{false}; + // contains the range of the significant digits + span integer{}; // non-nullable + span fraction{}; // nullable +}; + +using byte_span = span; +using parsed_number_string = parsed_number_string_t; + +// Assuming that you use no more than 19 digits, this will +// parse an ASCII string. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +parsed_number_string_t parse_number_string(UC const *p, UC const * pend, parse_options_t options) noexcept { + chars_format const fmt = options.format; + UC const decimal_point = options.decimal_point; + + parsed_number_string_t answer; + answer.valid = false; + answer.too_many_digits = false; + answer.negative = (*p == UC('-')); +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { +#else + if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here +#endif + ++p; + if (p == pend) { + return answer; + } + if (fmt & FASTFLOAT_JSONFMT) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return answer; + } + } else { + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } + } + } + UC const * const start_digits = p; + + uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + + while ((p != pend) && is_integer(*p)) { + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + + uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later + ++p; + } + UC const * const end_of_integer_part = p; + int64_t digit_count = int64_t(end_of_integer_part - start_digits); + answer.integer = span(start_digits, size_t(digit_count)); + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { + return answer; + } + } + + int64_t exponent = 0; + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { + ++p; + UC const * before = p; + // can occur at most twice without overflowing, but let it occur more, since + // for integers with many digits, digit parsing is the primary bottleneck. + loop_parse_if_eight_digits(p, pend, i); + + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + } + exponent = before - p; + answer.fraction = span(before, size_t(p - before)); + digit_count -= exponent; + } + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return answer; + } + } + else if (digit_count == 0) { // we must have encountered at least one integer! + return answer; + } + int64_t exp_number = 0; // explicit exponential part + if ( ((fmt & chars_format::scientific) && + (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) + || + ((fmt & FASTFLOAT_FORTRANFMT) && + (p != pend) && + ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || (UC('D') == *p)))) { + UC const * location_of_e = p; + if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) { + ++p; + } + bool neg_exp = false; + if ((p != pend) && (UC('-') == *p)) { + neg_exp = true; + ++p; + } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + ++p; + } + if ((p == pend) || !is_integer(*p)) { + if(!(fmt & chars_format::fixed)) { + // We are in error. + return answer; + } + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + if (exp_number < 0x10000000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + if(neg_exp) { exp_number = - exp_number; } + exponent += exp_number; + } + } else { + // If it scientific and not fixed, we have to bail out. + if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + } + answer.lastmatch = p; + answer.valid = true; + + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon. + // + // We can deal with up to 19 digits. + if (digit_count > 19) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + // We need to be mindful of the case where we only have zeroes... + // E.g., 0.000000000...000. + UC const * start = start_digits; + while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + if(*start == UC('0')) { digit_count --; } + start++; + } + + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + UC const* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 }; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = end_of_integer_part - p + exp_number; + } + else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const* frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } + } + answer.exponent = exponent; + answer.mantissa = i; + return answer; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +from_chars_result_t parse_int_string(UC const* p, UC const* pend, T& value, int base) { + from_chars_result_t answer; + + UC const* const first = p; + + bool negative = (*p == UC('-')); + if (!std::is_signed::value && negative) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (*p == UC('+'))) { +#else + if (*p == UC('-')) { +#endif + ++p; + } + + UC const* const start_num = p; + + while (p!= pend && *p == UC('0')) { + ++p; + } + + const bool has_leading_zeros = p > start_num; + + UC const* const start_digits = p; + + uint64_t i = 0; + if (base == 10) { + loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible + } + while (p != pend) { + uint8_t digit = ch_to_digit(*p); + if (digit >= base) { + break; + } + i = uint64_t(base) * i + digit; // might overflow, check this later + p++; + } + + size_t digit_count = size_t(p - start_digits); + + if (digit_count == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } + else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + answer.ptr = p; + + // check u64 overflow + size_t max_digits = max_digits_u64(base); + if (digit_count > max_digits) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + // this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent + if (digit_count == max_digits && i < min_safe_u64(base)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + + // check other types overflow + if (!std::is_same::value) { + if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + } + + if (negative) { +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +#pragma warning(disable: 4146) +#endif + // this weird workaround is required because: + // - converting unsigned to signed when its value is greater than signed max is UB pre-C++23. + // - reinterpret_casting (~i + 1) would work, but it is not constexpr + // this is always optimized into a neg instruction (note: T is an integer type) + value = T(-std::numeric_limits::max() - T(i - uint64_t(std::numeric_limits::max()))); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#endif + } + else { value = T(i); } + + answer.ec = std::errc(); + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_FAST_TABLE_H +#define FASTFLOAT_FAST_TABLE_H + +#include + +namespace fast_float { + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +template +struct powers_template { + +constexpr static int smallest_power_of_five = binary_format::smallest_power_of_ten(); +constexpr static int largest_power_of_five = binary_format::largest_power_of_ten(); +constexpr static int number_of_entries = 2 * (largest_power_of_five - smallest_power_of_five + 1); +// Powers of five from 5^-342 all the way to 5^308 rounded toward one. +constexpr static uint64_t power_of_five_128[number_of_entries] = { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a819e, + 0xf79687aed3eec551,0x3a83ddbd83f52205, + 0x9abe14cd44753b52,0xc4926a9672793543, + 0xc16d9a0095928a27,0x75b7053c0f178294, + 0xf1c90080baf72cb1,0x5324c68b12dd6339, + 0x971da05074da7bee,0xd3f6fc16ebca5e04, + 0xbce5086492111aea,0x88f4bb1ca6bcf585, + 0xec1e4a7db69561a5,0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07,0x3aff322e62439fd0, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; +}; + +template +constexpr uint64_t powers_template::power_of_five_128[number_of_entries]; + +using powers = powers_template<>; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H +#define FASTFLOAT_DECIMAL_TO_BINARY_H + +#include +#include +#include +#include +#include +#include + +namespace fast_float { + +// This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating +// the result, with the "high" part corresponding to the most significant bits and the +// low part corresponding to the least significant bits. +// +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +value128 compute_product_approximation(int64_t q, uint64_t w) { + const int index = 2 * int(q - powers::smallest_power_of_five); + // For small values of q, e.g., q in [0,27], the answer is always exact because + // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); + // gives the exact answer. + value128 firstproduct = full_multiplication(w, powers::power_of_five_128[index]); + static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); + constexpr uint64_t precision_mask = (bit_precision < 64) ? + (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) + : uint64_t(0xFFFFFFFFFFFFFFFF); + if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) + // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. + value128 secondproduct = full_multiplication(w, powers::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +namespace detail { +/** + * For q in (0,350), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * floor(p) + q + * where + * p = log(5**q)/log(2) = q * log(5)/log(2) + * + * For negative values of q in (-400,0), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * -ceil(p) + q + * where + * p = log(5**-q)/log(2) = -q * log(5)/log(2) + */ + constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { + return (((152170 + 65536) * q) >> 16) + 63; + } +} // namespace detail + +// create an adjusted mantissa, biased by the invalid power2 +// for significant digits already multiplied by 10 ** q. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { + int hilz = int(w >> 63) ^ 1; + adjusted_mantissa answer; + answer.mantissa = w << hilz; + int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); + answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias); + return answer; +} + +// w * 10 ** q, without rounding the representation up. +// the power2 in the exponent will be adjusted by invalid_am_bias. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept { + int lz = leading_zeroes(w); + w <<= lz; + value128 product = compute_product_approximation(q, w); + return compute_error_scaled(q, product.high, lz); +} + +// w * 10 ** q +// The returned value should be a valid ieee64 number that simply need to be packed. +// However, in some very rare cases, the computation will fail. In such cases, we +// return an adjusted_mantissa with a negative power of 2: the caller should recompute +// in such cases. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { + adjusted_mantissa answer; + if ((w == 0) || (q < binary::smallest_power_of_ten())) { + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + if (q > binary::largest_power_of_ten()) { + // we want to get infinity: + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + // At this point in time q is in [powers::smallest_power_of_five, powers::largest_power_of_five]. + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(w); + w <<= lz; + + // The required precision is binary::mantissa_explicit_bits() + 3 because + // 1. We need the implicit bit + // 2. We need an extra bit for rounding purposes + // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift) + + value128 product = compute_product_approximation(q, w); + // The computed 'product' is always sufficient. + // Mathematical proof: + // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear) + // See script/mushtak_lemire.py + + // The "compute_product_approximation" function can be slightly slower than a branchless approach: + // value128 product = compute_product(q, w); + // but in practice, we can win big with the compute_product_approximation if its additional branch + // is easily predicted. Which is best is data specific. + int upperbit = int(product.high >> 63); + int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3; + + answer.mantissa = product.high >> shift; + + answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); + if (answer.power2 <= 0) { // we have a subnormal? + // Here have that answer.power2 <= 0 so -answer.power2 >= 0 + if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + // next line is safe because -answer.power2 + 1 < 64 + answer.mantissa >>= -answer.power2 + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + answer.power2 = (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) ? 0 : 1; + return answer; + } + + // usually, we round *up*, but if we fall right in between and and we have an + // even basis, we need to round down + // We are only concerned with the cases where 5**q fits in single 64-bit word. + if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && + ((answer.mantissa & 3) == 1) ) { // we may fall between two floats! + // To be in-between two floats we need that in doing + // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); + // ... we dropped out only zeroes. But if this happened, then we can go back!!! + if((answer.mantissa << shift) == product.high) { + answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up + } + } + + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { + answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); + answer.power2++; // undo previous addition + } + + answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); + if (answer.power2 >= binary::infinite_power()) { // infinity + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + } + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_BIGINT_H +#define FASTFLOAT_BIGINT_H + +#include +#include +#include +#include + + +namespace fast_float { + +// the limb width: we want efficient multiplication of double the bits in +// limb, or for 64-bit limbs, at least 64-bit multiplication where we can +// extract the high and low parts efficiently. this is every 64-bit +// architecture except for sparc, which emulates 128-bit multiplication. +// we might have platforms where `CHAR_BIT` is not 8, so let's avoid +// doing `8 * sizeof(limb)`. +#if defined(FASTFLOAT_64BIT) && !defined(__sparc) +#define FASTFLOAT_64BIT_LIMB 1 +typedef uint64_t limb; +constexpr size_t limb_bits = 64; +#else +#define FASTFLOAT_32BIT_LIMB +typedef uint32_t limb; +constexpr size_t limb_bits = 32; +#endif + +typedef span limb_span; + +// number of bits in a bigint. this needs to be at least the number +// of bits required to store the largest bigint, which is +// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or +// ~3600 bits, so we round to 4000. +constexpr size_t bigint_bits = 4000; +constexpr size_t bigint_limbs = bigint_bits / limb_bits; + +// vector-like type that is allocated on the stack. the entire +// buffer is pre-allocated, and only the length changes. +template +struct stackvec { + limb data[size]; + // we never need more than 150 limbs + uint16_t length{0}; + + stackvec() = default; + stackvec(const stackvec &) = delete; + stackvec &operator=(const stackvec &) = delete; + stackvec(stackvec &&) = delete; + stackvec &operator=(stackvec &&other) = delete; + + // create stack vector from existing limb span. + FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { + FASTFLOAT_ASSERT(try_extend(s)); + } + + FASTFLOAT_CONSTEXPR14 limb& operator[](size_t index) noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + FASTFLOAT_CONSTEXPR14 const limb& operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + // index from the end of the container + FASTFLOAT_CONSTEXPR14 const limb& rindex(size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + size_t rindex = length - index - 1; + return data[rindex]; + } + + // set the length, without bounds checking. + FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { + length = uint16_t(len); + } + constexpr size_t len() const noexcept { + return length; + } + constexpr bool is_empty() const noexcept { + return length == 0; + } + constexpr size_t capacity() const noexcept { + return size; + } + // append item to vector, without bounds checking + FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { + data[length] = value; + length++; + } + // append item to vector, returning if item was added + FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { + if (len() < capacity()) { + push_unchecked(value); + return true; + } else { + return false; + } + } + // add items to the vector, from a span, without bounds checking + FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { + limb* ptr = data + length; + std::copy_n(s.ptr, s.len(), ptr); + set_len(len() + s.len()); + } + // try to add items to the vector, returning if items were added + FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { + if (len() + s.len() <= capacity()) { + extend_unchecked(s); + return true; + } else { + return false; + } + } + // resize the vector, without bounds checking + // if the new size is longer than the vector, assign value to each + // appended item. + FASTFLOAT_CONSTEXPR20 + void resize_unchecked(size_t new_len, limb value) noexcept { + if (new_len > len()) { + size_t count = new_len - len(); + limb* first = data + len(); + limb* last = first + count; + ::std::fill(first, last, value); + set_len(new_len); + } else { + set_len(new_len); + } + } + // try to resize the vector, returning if the vector was resized. + FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { + if (new_len > capacity()) { + return false; + } else { + resize_unchecked(new_len, value); + return true; + } + } + // check if any limbs are non-zero after the given index. + // this needs to be done in reverse order, since the index + // is relative to the most significant limbs. + FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { + while (index < len()) { + if (rindex(index) != 0) { + return true; + } + index++; + } + return false; + } + // normalize the big integer, so most-significant zero limbs are removed. + FASTFLOAT_CONSTEXPR14 void normalize() noexcept { + while (len() > 0 && rindex(0) == 0) { + length--; + } + } +}; + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +uint64_t empty_hi64(bool& truncated) noexcept { + truncated = false; + return 0; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { + truncated = false; + int shl = leading_zeroes(r0); + return r0 << shl; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { + int shl = leading_zeroes(r0); + if (shl == 0) { + truncated = r1 != 0; + return r0; + } else { + int shr = 64 - shl; + truncated = (r1 << shl) != 0; + return (r0 << shl) | (r1 >> shr); + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { + return uint64_hi64(r0, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + return uint64_hi64((x0 << 32) | x1, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + uint64_t x2 = r2; + return uint64_hi64(x0, (x1 << 32) | x2, truncated); +} + +// add two small integers, checking for overflow. +// we want an efficient operation. for msvc, where +// we don't have built-in intrinsics, this is still +// pretty fast. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +limb scalar_add(limb x, limb y, bool& overflow) noexcept { + limb z; +// gcc and clang +#if defined(__has_builtin) + #if __has_builtin(__builtin_add_overflow) + if (!cpp20_and_in_constexpr()) { + overflow = __builtin_add_overflow(x, y, &z); + return z; + } + #endif +#endif + + // generic, this still optimizes correctly on MSVC. + z = x + y; + overflow = z < x; + return z; +} + +// multiply two small integers, getting both the high and low bits. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +limb scalar_mul(limb x, limb y, limb& carry) noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + #if defined(__SIZEOF_INT128__) + // GCC and clang both define it as an extension. + __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); + carry = limb(z >> limb_bits); + return limb(z); + #else + // fallback, no native 128-bit integer multiplication with carry. + // on msvc, this optimizes identically, somehow. + value128 z = full_multiplication(x, y); + bool overflow; + z.low = scalar_add(z.low, carry, overflow); + z.high += uint64_t(overflow); // cannot overflow + carry = z.high; + return z.low; + #endif +#else + uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#endif +} + +// add scalar value to bigint starting from offset. +// used in grade school multiplication +template +inline FASTFLOAT_CONSTEXPR20 +bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { + size_t index = start; + limb carry = y; + bool overflow; + while (carry != 0 && index < vec.len()) { + vec[index] = scalar_add(vec[index], carry, overflow); + carry = limb(overflow); + index += 1; + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add scalar value to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool small_add(stackvec& vec, limb y) noexcept { + return small_add_from(vec, y, 0); +} + +// multiply bigint by scalar value. +template +inline FASTFLOAT_CONSTEXPR20 +bool small_mul(stackvec& vec, limb y) noexcept { + limb carry = 0; + for (size_t index = 0; index < vec.len(); index++) { + vec[index] = scalar_mul(vec[index], y, carry); + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add bigint to bigint starting from index. +// used in grade school multiplication +template +FASTFLOAT_CONSTEXPR20 +bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { + // the effective x buffer is from `xstart..x.len()`, so exit early + // if we can't get that current range. + if (x.len() < start || y.len() > x.len() - start) { + FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); + } + + bool carry = false; + for (size_t index = 0; index < y.len(); index++) { + limb xi = x[index + start]; + limb yi = y[index]; + bool c1 = false; + bool c2 = false; + xi = scalar_add(xi, yi, c1); + if (carry) { + xi = scalar_add(xi, 1, c2); + } + x[index + start] = xi; + carry = c1 | c2; + } + + // handle overflow + if (carry) { + FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); + } + return true; +} + +// add bigint to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool large_add_from(stackvec& x, limb_span y) noexcept { + return large_add_from(x, y, 0); +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 +bool long_mul(stackvec& x, limb_span y) noexcept { + limb_span xs = limb_span(x.data, x.len()); + stackvec z(xs); + limb_span zs = limb_span(z.data, z.len()); + + if (y.len() != 0) { + limb y0 = y[0]; + FASTFLOAT_TRY(small_mul(x, y0)); + for (size_t index = 1; index < y.len(); index++) { + limb yi = y[index]; + stackvec zi; + if (yi != 0) { + // re-use the same buffer throughout + zi.set_len(0); + FASTFLOAT_TRY(zi.try_extend(zs)); + FASTFLOAT_TRY(small_mul(zi, yi)); + limb_span zis = limb_span(zi.data, zi.len()); + FASTFLOAT_TRY(large_add_from(x, zis, index)); + } + } + } + + x.normalize(); + return true; +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 +bool large_mul(stackvec& x, limb_span y) noexcept { + if (y.len() == 1) { + FASTFLOAT_TRY(small_mul(x, y[0])); + } else { + FASTFLOAT_TRY(long_mul(x, y)); + } + return true; +} + +template +struct pow5_tables { + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, + 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, + 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, + 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, + 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, + 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif +}; + +template +constexpr uint32_t pow5_tables::large_step; + +template +constexpr uint64_t pow5_tables::small_power_of_5[]; + +template +constexpr limb pow5_tables::large_power_of_5[]; + +// big integer type. implements a small subset of big integer +// arithmetic, using simple algorithms since asymptotically +// faster algorithms are slower for a small number of limbs. +// all operations assume the big-integer is normalized. +struct bigint : pow5_tables<> { + // storage of the limbs, in little-endian order. + stackvec vec; + + FASTFLOAT_CONSTEXPR20 bigint(): vec() {} + bigint(const bigint &) = delete; + bigint &operator=(const bigint &) = delete; + bigint(bigint &&) = delete; + bigint &operator=(bigint &&other) = delete; + + FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() { +#ifdef FASTFLOAT_64BIT_LIMB + vec.push_unchecked(value); +#else + vec.push_unchecked(uint32_t(value)); + vec.push_unchecked(uint32_t(value >> 32)); +#endif + vec.normalize(); + } + + // get the high 64 bits from the vector, and if bits were truncated. + // this is to get the significant digits for the float. + FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint64_hi64(vec.rindex(0), truncated); + } else { + uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); + truncated |= vec.nonzero(2); + return result; + } +#else + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint32_hi64(vec.rindex(0), truncated); + } else if (vec.len() == 2) { + return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); + } else { + uint64_t result = uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); + truncated |= vec.nonzero(3); + return result; + } +#endif + } + + // compare two big integers, returning the large value. + // assumes both are normalized. if the return value is + // negative, other is larger, if the return value is + // positive, this is larger, otherwise they are equal. + // the limbs are stored in little-endian order, so we + // must compare the limbs in ever order. + FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept { + if (vec.len() > other.vec.len()) { + return 1; + } else if (vec.len() < other.vec.len()) { + return -1; + } else { + for (size_t index = vec.len(); index > 0; index--) { + limb xi = vec[index - 1]; + limb yi = other.vec[index - 1]; + if (xi > yi) { + return 1; + } else if (xi < yi) { + return -1; + } + } + return 0; + } + } + + // shift left each limb n bits, carrying over to the new limb + // returns true if we were able to shift all the digits. + FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { + // Internally, for each item, we shift left by n, and add the previous + // right shifted limb-bits. + // For example, we transform (for u8) shifted left 2, to: + // b10100100 b01000010 + // b10 b10010001 b00001000 + FASTFLOAT_DEBUG_ASSERT(n != 0); + FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); + + size_t shl = n; + size_t shr = limb_bits - shl; + limb prev = 0; + for (size_t index = 0; index < vec.len(); index++) { + limb xi = vec[index]; + vec[index] = (xi << shl) | (prev >> shr); + prev = xi; + } + + limb carry = prev >> shr; + if (carry != 0) { + return vec.try_push(carry); + } + return true; + } + + // move the limbs left by `n` limbs. + FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { + FASTFLOAT_DEBUG_ASSERT(n != 0); + if (n + vec.len() > vec.capacity()) { + return false; + } else if (!vec.is_empty()) { + // move limbs + limb* dst = vec.data + n; + const limb* src = vec.data; + std::copy_backward(src, src + vec.len(), dst + vec.len()); + // fill in empty limbs + limb* first = vec.data; + limb* last = first + n; + ::std::fill(first, last, 0); + vec.set_len(n + vec.len()); + return true; + } else { + return true; + } + } + + // move the limbs left by `n` bits. + FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { + size_t rem = n % limb_bits; + size_t div = n / limb_bits; + if (rem != 0) { + FASTFLOAT_TRY(shl_bits(rem)); + } + if (div != 0) { + FASTFLOAT_TRY(shl_limbs(div)); + } + return true; + } + + // get the number of leading zeros in the bigint. + FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { + if (vec.is_empty()) { + return 0; + } else { +#ifdef FASTFLOAT_64BIT_LIMB + return leading_zeroes(vec.rindex(0)); +#else + // no use defining a specialized leading_zeroes for a 32-bit type. + uint64_t r0 = vec.rindex(0); + return leading_zeroes(r0 << 32); +#endif + } + } + + // get the number of bits in the bigint. + FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { + int lz = ctlz(); + return int(limb_bits * vec.len()) - lz; + } + + FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { + return small_mul(vec, y); + } + + FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { + return small_add(vec, y); + } + + // multiply as if by 2 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { + return shl(exp); + } + + // multiply as if by 5 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { + // multiply by a power of 5 + size_t large_length = sizeof(large_power_of_5) / sizeof(limb); + limb_span large = limb_span(large_power_of_5, large_length); + while (exp >= large_step) { + FASTFLOAT_TRY(large_mul(vec, large)); + exp -= large_step; + } +#ifdef FASTFLOAT_64BIT_LIMB + uint32_t small_step = 27; + limb max_native = 7450580596923828125UL; +#else + uint32_t small_step = 13; + limb max_native = 1220703125U; +#endif + while (exp >= small_step) { + FASTFLOAT_TRY(small_mul(vec, max_native)); + exp -= small_step; + } + if (exp != 0) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + // This is similar to https://github.com/llvm/llvm-project/issues/47746, + // except the workaround described there don't work here + FASTFLOAT_TRY( + small_mul(vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))) + ); + } + + return true; + } + + // multiply as if by 10 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { + FASTFLOAT_TRY(pow5(exp)); + return pow2(exp); + } +}; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DIGIT_COMPARISON_H +#define FASTFLOAT_DIGIT_COMPARISON_H + +#include +#include +#include +#include + + +namespace fast_float { + +// 1e0 to 1e19 +constexpr static uint64_t powers_of_ten_uint64[] = { + 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, + 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL, + 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL, + 1000000000000000000UL, 10000000000000000000UL}; + +// calculate the exponent, in scientific notation, of the number. +// this algorithm is not even close to optimized, but it has no practical +// effect on performance: in order to have a faster algorithm, we'd need +// to slow down performance for faster algorithms, and this is still fast. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +int32_t scientific_exponent(parsed_number_string_t & num) noexcept { + uint64_t mantissa = num.mantissa; + int32_t exponent = int32_t(num.exponent); + while (mantissa >= 10000) { + mantissa /= 10000; + exponent += 4; + } + while (mantissa >= 100) { + mantissa /= 100; + exponent += 2; + } + while (mantissa >= 10) { + mantissa /= 10; + exponent += 1; + } + return exponent; +} + +// this converts a native floating-point number to an extended-precision float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa to_extended(T value) noexcept { + using equiv_uint = typename binary_format::equiv_uint; + constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); + constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); + constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); + + adjusted_mantissa am; + int32_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); + equiv_uint bits; +#if FASTFLOAT_HAS_BIT_CAST + bits = std::bit_cast(value); +#else + ::memcpy(&bits, &value, sizeof(T)); +#endif + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } + + return am; +} + +// get the extended precision value of the halfway point between b and b+u. +// we are given a native float that represents b, so we need to adjust it +// halfway between b and b+u. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa to_extended_halfway(T value) noexcept { + adjusted_mantissa am = to_extended(value); + am.mantissa <<= 1; + am.mantissa += 1; + am.power2 -= 1; + return am; +} + +// round an extended-precision float to the nearest machine float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void round(adjusted_mantissa& am, callback cb) noexcept { + int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + if (-am.power2 >= mantissa_shift) { + // have a denormal float + int32_t shift = -am.power2 + 1; + cb(am, std::min(shift, 64)); + // check for round-up: if rounding-nearest carried us to the hidden bit. + am.power2 = (am.mantissa < (uint64_t(1) << binary_format::mantissa_explicit_bits())) ? 0 : 1; + return; + } + + // have a normal float, use the default shift. + cb(am, mantissa_shift); + + // check for carry + if (am.mantissa >= (uint64_t(2) << binary_format::mantissa_explicit_bits())) { + am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); + am.power2++; + } + + // check for infinite: we could have carried to an infinite power + am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); + if (am.power2 >= binary_format::infinite_power()) { + am.power2 = binary_format::infinite_power(); + am.mantissa = 0; + } +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept { + const uint64_t mask + = (shift == 64) + ? UINT64_MAX + : (uint64_t(1) << shift) - 1; + const uint64_t halfway + = (shift == 0) + ? 0 + : uint64_t(1) << (shift - 1); + uint64_t truncated_bits = am.mantissa & mask; + bool is_above = truncated_bits > halfway; + bool is_halfway = truncated_bits == halfway; + + // shift digits into position + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; + + bool is_odd = (am.mantissa & 1) == 1; + am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void round_down(adjusted_mantissa& am, int32_t shift) noexcept { + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; +} +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void skip_zeros(UC const * & first, UC const * last) noexcept { + uint64_t val; + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + break; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + break; + } + first++; + } +} + +// determine if any non-zero digits were truncated. +// all characters must be valid digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool is_truncated(UC const * first, UC const * last) noexcept { + // do 8-bit optimizations, can just compare to 8 literal 0s. + uint64_t val; + while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + return true; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + return true; + } + ++first; + } + return false; +} +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool is_truncated(span s) noexcept { + return is_truncated(s.ptr, s.ptr + s.len()); +} + + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + counter += 8; + count += 8; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 +void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - UC('0')); + p++; + counter++; + count++; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void add_native(bigint& big, limb power, limb value) noexcept { + big.mul(power); + big.add(value); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void round_up_bigint(bigint& big, size_t& count) noexcept { + // need to round-up the digits, but need to avoid rounding + // ....9999 to ...10000, which could cause a false halfway point. + add_native(big, 10, 1); + count++; +} + +// parse the significant digits into a big integer +template +inline FASTFLOAT_CONSTEXPR20 +void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_digits, size_t& digits) noexcept { + // try to minimize the number of big integer and scalar multiplication. + // therefore, try to parse 8 digits at a time, and multiply by the largest + // scalar value (9 or 19 digits) for each step. + size_t counter = 0; + digits = 0; + limb value = 0; +#ifdef FASTFLOAT_64BIT_LIMB + size_t step = 19; +#else + size_t step = 9; +#endif + + // process all integer digits. + UC const * p = num.integer.ptr; + UC const * pend = p + num.integer.len(); + skip_zeros(p, pend); + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (num.fraction.ptr != nullptr) { + truncated |= is_truncated(num.fraction); + } + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + + // add our fraction digits, if they're available. + if (num.fraction.ptr != nullptr) { + p = num.fraction.ptr; + pend = p + num.fraction.len(); + if (digits == 0) { + skip_zeros(p, pend); + } + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + } + + if (counter != 0) { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + } +} + +template +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept { + FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); + adjusted_mantissa answer; + bool truncated; + answer.mantissa = bigmant.hi64(truncated); + int bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); + answer.power2 = bigmant.bit_length() - 64 + bias; + + round(answer, [truncated](adjusted_mantissa& a, int32_t shift) { + round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { + return is_above || (is_halfway && truncated) || (is_odd && is_halfway); + }); + }); + + return answer; +} + +// the scaling here is quite simple: we have, for the real digits `m * 10^e`, +// and for the theoretical digits `n * 2^f`. Since `e` is always negative, +// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. +// we then need to scale by `2^(f- e)`, and then the two significant digits +// are of the same magnitude. +template +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept { + bigint& real_digits = bigmant; + int32_t real_exp = exponent; + + // get the value of `b`, rounded down, and get a bigint representation of b+h + adjusted_mantissa am_b = am; + // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type. + round(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); }); + T b; + to_float(false, am_b, b); + adjusted_mantissa theor = to_extended_halfway(b); + bigint theor_digits(theor.mantissa); + int32_t theor_exp = theor.power2; + + // scale real digits and theor digits to be same power. + int32_t pow2_exp = theor_exp - real_exp; + uint32_t pow5_exp = uint32_t(-real_exp); + if (pow5_exp != 0) { + FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); + } + if (pow2_exp > 0) { + FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); + } else if (pow2_exp < 0) { + FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); + } + + // compare digits, and use it to director rounding + int ord = real_digits.compare(theor_digits); + adjusted_mantissa answer = am; + round(answer, [ord](adjusted_mantissa& a, int32_t shift) { + round_nearest_tie_even(a, shift, [ord](bool is_odd, bool _, bool __) -> bool { + (void)_; // not needed, since we've done our comparison + (void)__; // not needed, since we've done our comparison + if (ord > 0) { + return true; + } else if (ord < 0) { + return false; + } else { + return is_odd; + } + }); + }); + + return answer; +} + +// parse the significant digits as a big integer to unambiguously round the +// the significant digits. here, we are trying to determine how to round +// an extended float representation close to `b+h`, halfway between `b` +// (the float rounded-down) and `b+u`, the next positive float. this +// algorithm is always correct, and uses one of two approaches. when +// the exponent is positive relative to the significant digits (such as +// 1234), we create a big-integer representation, get the high 64-bits, +// determine if any lower bits are truncated, and use that to direct +// rounding. in case of a negative exponent relative to the significant +// digits (such as 1.2345), we create a theoretical representation of +// `b` as a big-integer type, scaled to the same binary exponent as +// the actual digits. we then compare the big integer representations +// of both, and use that to direct rounding. +template +inline FASTFLOAT_CONSTEXPR20 +adjusted_mantissa digit_comp(parsed_number_string_t& num, adjusted_mantissa am) noexcept { + // remove the invalid exponent bias + am.power2 -= invalid_am_bias; + + int32_t sci_exp = scientific_exponent(num); + size_t max_digits = binary_format::max_digits(); + size_t digits = 0; + bigint bigmant; + parse_mantissa(bigmant, num, max_digits, digits); + // can't underflow, since digits is at most max_digits. + int32_t exponent = sci_exp + 1 - int32_t(digits); + if (exponent >= 0) { + return positive_digit_comp(bigmant, exponent); + } else { + return negative_digit_comp(bigmant, am, exponent); + } +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_PARSE_NUMBER_H +#define FASTFLOAT_PARSE_NUMBER_H + + +#include +#include +#include +#include +namespace fast_float { + + +namespace detail { +/** + * Special case +inf, -inf, nan, infinity, -infinity. + * The case comparisons could be made much faster given that we know that the + * strings a null-free and fixed. + **/ +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 +parse_infnan(UC const * first, UC const * last, T &value) noexcept { + from_chars_result_t answer{}; + answer.ptr = first; + answer.ec = std::errc(); // be optimistic + bool minusSign = false; + if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + minusSign = true; + ++first; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if (*first == UC('+')) { + ++first; + } +#endif + if (last - first >= 3) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { + answer.ptr = (first += 3); + value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); + // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). + if(first != last && *first == UC('(')) { + for(UC const * ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == UC(')')) { + answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) + break; + } + else if(!((UC('a') <= *ptr && *ptr <= UC('z')) || (UC('A') <= *ptr && *ptr <= UC('Z')) || (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) + break; // forbidden char, not nan(n-char-seq-opt) + } + } + return answer; + } + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { + answer.ptr = first + 8; + } else { + answer.ptr = first + 3; + } + value = minusSign ? -std::numeric_limits::infinity() : std::numeric_limits::infinity(); + return answer; + } + } + answer.ec = std::errc::invalid_argument; + return answer; +} + +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // https://lemire.me/blog/2020/06/26/gcc-not-nearest/ +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return false; +#endif + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., asm). + // The value does not need to be std::numeric_limits::min(), any small + // value so that 1 + x should round to 1 would do (after accounting for excess + // precision, as in 387 instructions). + static volatile float fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. + // + // Explanation: + // Only when fegetround() == FE_TONEAREST do we have that + // fmin + 1.0f == 1.0f - fmin. + // + // FE_UPWARD: + // fmin + 1.0f > 1 + // 1.0f - fmin == 1 + // + // FE_DOWNWARD or FE_TOWARDZERO: + // fmin + 1.0f == 1 + // 1.0f - fmin < 1 + // + // Note: This may fail to be accurate if fast-math has been + // enabled, as rounding conventions may not apply. + #ifdef FASTFLOAT_VISUAL_STUDIO + # pragma warning(push) + // todo: is there a VS warning? + // see https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013 + #elif defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wfloat-equal" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wfloat-equal" + #endif + return (fmini + 1.0f == 1.0f - fmini); + #ifdef FASTFLOAT_VISUAL_STUDIO + # pragma warning(pop) + #elif defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif +} + +} // namespace detail + +template +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept { + return from_chars_advanced(first, last, value, options); + } +}; + +#if __STDCPP_FLOAT32_T__ == 1 +template <> +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + std::float32_t &value, parse_options_t options) noexcept{ + // if std::float32_t is defined, and we are in C++23 mode; macro set for float32; + // set value to float due to equivalence between float and float32_t + float val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +#if __STDCPP_FLOAT64_T__ == 1 +template <> +struct from_chars_caller +{ + template + FASTFLOAT_CONSTEXPR20 + static from_chars_result_t call(UC const * first, UC const * last, + std::float64_t &value, parse_options_t options) noexcept{ + // if std::float64_t is defined, and we are in C++23 mode; macro set for float64; + // set value as double due to equivalence between double and float64_t + double val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + + +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const * first, UC const * last, + T &value, chars_format fmt /*= chars_format::general*/) noexcept { + return from_chars_caller::call(first, last, value, parse_options_t(fmt)); +} + +/** + * This function overload takes parsed_number_string_t structure that is created and populated + * either by from_chars_advanced function taking chars range and parsing options + * or other parsing custom function implemented by user. + */ +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(parsed_number_string_t& pns, + T &value) noexcept { + + static_assert (is_supported_float_type(), "only some floating-point types are supported"); + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; + + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() returns + // true. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if(!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <=binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal + if (pns.exponent >= 0 && pns.mantissa <=binary_format::max_mantissa_fast_path(pns.exponent)) { +#if defined(__clang__) || defined(FASTFLOAT_32BIT) + // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if(pns.mantissa == 0) { + value = pns.negative ? T(-0.) : T(0.); + return answer; + } +#endif + value = T(pns.mantissa) * binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { value = -value; } + return answer; + } + } + } + adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); + if(pns.too_many_digits && am.power2 >= 0) { + if(am != compute_float>(pns.exponent, pns.mantissa + 1)) { + am = compute_error>(pns.exponent, pns.mantissa); + } + } + // If we called compute_float>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0), + // then we need to go the long way around again. This is very uncommon. + if(am.power2 < 0) { am = digit_comp(pns, am); } + to_float(pns.negative, am, value); + // Test for over/underflow. + if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == binary_format::infinite_power()) { + answer.ec = std::errc::result_out_of_range; + } + return answer; +} + +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars_advanced(UC const * first, UC const * last, + T &value, parse_options_t options) noexcept { + + static_assert (is_supported_float_type(), "only some floating-point types are supported"); + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string_t pns = parse_number_string(first, last, options); + if (!pns.valid) { + if (options.format & chars_format::no_infnan) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value); + } + } + + // call overload that takes parsed_number_string_t directly. + return from_chars_advanced(pns, value); +} + + +template +FASTFLOAT_CONSTEXPR20 +from_chars_result_t from_chars(UC const* first, UC const* last, T& value, int base) noexcept { + static_assert (is_supported_char_type(), "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last || base < 2 || base > 36) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + return parse_int_string(first, last, value, base); +} + +} // namespace fast_float + +#endif diff --git a/3rdparty/rapidyaml/include/c4/format.hpp b/3rdparty/rapidyaml/include/c4/format.hpp index 3035b115cb7c50..519c2a40df6b76 100644 --- a/3rdparty/rapidyaml/include/c4/format.hpp +++ b/3rdparty/rapidyaml/include/c4/format.hpp @@ -323,7 +323,7 @@ to_chars(substr buf, fmt::integral_padded_ fmt) return utoa(buf, fmt.val, fmt.radix, fmt.num_digits); } -/** read an format an integer unsigned type +/** read an integer type, detecting overflow (returns false on overflow) * @ingroup doc_from_chars */ template C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) @@ -332,6 +332,15 @@ C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) return atox(s, wrapper.val); return false; } +/** read an integer type, detecting overflow (returns false on overflow) + * @ingroup doc_from_chars */ +template +C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ *wrapper) +{ + if(C4_LIKELY(!overflows(s))) + return atox(s, wrapper->val); + return false; +} //----------------------------------------------------------------------------- diff --git a/3rdparty/rapidyaml/include/c4/szconv.hpp b/3rdparty/rapidyaml/include/c4/szconv.hpp index 9d0c4786ccdd70..e571f9f3d0f9fd 100644 --- a/3rdparty/rapidyaml/include/c4/szconv.hpp +++ b/3rdparty/rapidyaml/include/c4/szconv.hpp @@ -53,7 +53,7 @@ szconv(SizeIn sz) noexcept template C4_ALWAYS_INLINE typename std::enable_if::value, SizeOut>::type -szconv(SizeIn sz) C4_NOEXCEPT_X +szconv(SizeIn sz) { C4_XASSERT(sz >= 0); C4_XASSERT_MSG((SizeIn)sz <= (SizeIn)std::numeric_limits::max(), "size conversion overflow: in=%zu", (size_t)sz); diff --git a/3rdparty/rapidyaml/include/c4/yml/common.hpp b/3rdparty/rapidyaml/include/c4/yml/common.hpp index f9c7b9abbe1439..3f9ef677405d4f 100644 --- a/3rdparty/rapidyaml/include/c4/yml/common.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/common.hpp @@ -5,8 +5,49 @@ #include #include +#include #include +#if defined(C4_MSVC) || defined(C4_MINGW) +#include +#else +#include +#endif + + + +//----------------------------------------------------------------------------- + +#ifndef RYML_ERRMSG_SIZE +/// size for the error message buffer +#define RYML_ERRMSG_SIZE (1024) +#endif + +#ifndef RYML_LOGBUF_SIZE +/// size for the buffer used to format individual values to string +/// while preparing an error message. This is only used for formatting +/// individual values in the message; final messages will be larger +/// than this value (see @ref RYML_ERRMSG_SIZE). This is also used for +/// the detailed debug log messages when RYML_DBG is defined. +#define RYML_LOGBUF_SIZE (256) +#endif + +#ifndef RYML_LOGBUF_SIZE_MAX +/// size for the fallback larger log buffer. When @ref +/// RYML_LOGBUF_SIZE is not large enough to convert a value to string, +/// then temporary stack memory is allocated up to +/// RYML_LOGBUF_SIZE_MAX. This limit is in place to prevent a stack +/// overflow. If the printed value requires more than +/// RYML_LOGBUF_SIZE_MAX, the value is silently skipped. +#define RYML_LOGBUF_SIZE_MAX (1024) +#endif + +#ifndef RYML_LOCATIONS_SMALL_THRESHOLD +/// threshold at which a location search will revert from linear to +/// binary search. +#define RYML_LOCATIONS_SMALL_THRESHOLD (30) +#endif + //----------------------------------------------------------------------------- // Specify groups to have a predefined topic order in doxygen: @@ -83,6 +124,11 @@ * */ +/** @defgroup doc_ref_utils Anchor/Reference utilities + * + * @see sample::sample_anchors_and_aliases + * */ + /** @defgroup doc_tag_utils Tag utilities * @see sample::sample_tags */ @@ -134,11 +180,13 @@ # define RYML_ASSERT(cond) RYML_CHECK(cond) # define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) # define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) +# define _RYML_CB_ASSERT_(cb, cond, loc) _RYML_CB_CHECK((cb), (cond), (loc)) # define RYML_NOEXCEPT #else # define RYML_ASSERT(cond) # define RYML_ASSERT_MSG(cond, msg) # define _RYML_CB_ASSERT(cb, cond) +# define _RYML_CB_ASSERT_(cb, cond, loc) # define RYML_NOEXCEPT noexcept #endif @@ -148,7 +196,7 @@ do { \ if(C4_UNLIKELY(!(cond))) \ { \ - RYML_DEBUG_BREAK() \ + RYML_DEBUG_BREAK(); \ c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ C4_UNREACHABLE_AFTER_ERR(); \ } \ @@ -159,7 +207,7 @@ { \ if(C4_UNLIKELY(!(cond))) \ { \ - RYML_DEBUG_BREAK() \ + RYML_DEBUG_BREAK(); \ c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ C4_UNREACHABLE_AFTER_ERR(); \ } \ @@ -167,17 +215,16 @@ #if defined(RYML_DBG) && !defined(NDEBUG) && !defined(C4_NO_DEBUG_BREAK) # define RYML_DEBUG_BREAK() \ - { \ + do { \ if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ { \ C4_DEBUG_BREAK(); \ } \ - } + } while(0) #else # define RYML_DEBUG_BREAK() #endif - /** @endcond */ @@ -190,11 +237,33 @@ namespace yml { C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") -enum : size_t { - /** a null position */ - npos = size_t(-1), + +#ifndef RYML_ID_TYPE +/** The type of a node id in the YAML tree. In the future, the default + * will likely change to int32_t, which was observed to be faster. + * @see id_type */ +#define RYML_ID_TYPE size_t +#endif + + +/** The type of a node id in the YAML tree; to override the default + * type, define the macro @ref RYML_ID_TYPE to a suitable integer + * type. */ +using id_type = RYML_ID_TYPE; +static_assert(std::is_integral::value, "id_type must be an integer type"); + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") +enum : id_type { /** an index to none */ - NONE = size_t(-1) + NONE = id_type(-1), +}; +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +enum : size_t { + /** a null string position */ + npos = size_t(-1) }; @@ -212,28 +281,40 @@ struct RYML_EXPORT LineCol //! column size_t col; - LineCol() : offset(), line(), col() {} + LineCol() = default; //! construct from line and column LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {} //! construct from offset, line and column LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {} }; +static_assert(std::is_trivial::value, "LineCol not trivial"); +static_assert(std::is_standard_layout::value, "Location not trivial"); //! a source file position -struct RYML_EXPORT Location : public LineCol +struct RYML_EXPORT Location { + //! number of bytes from the beginning of the source buffer + size_t offset; + //! line + size_t line; + //! column + size_t col; + //! file name csubstr name; - operator bool () const { return !name.empty() || line != 0 || offset != 0; } + operator bool () const { return !name.empty() || line != 0 || offset != 0 || col != 0; } + operator LineCol const& () const { return reinterpret_cast(*this); } - Location() : LineCol(), name() {} - Location( size_t l, size_t c) : LineCol{ l, c}, name( ) {} - Location( csubstr n, size_t l, size_t c) : LineCol{ l, c}, name(n) {} - Location( csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {} - Location(const char *n, size_t l, size_t c) : LineCol{ l, c}, name(to_csubstr(n)) {} - Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {} + Location() = default; + Location( size_t l, size_t c) : offset( ), line(l), col(c), name( ) {} + Location( size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name( ) {} + Location( csubstr n, size_t l, size_t c) : offset( ), line(l), col(c), name(n) {} + Location( csubstr n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(n) {} + Location(const char *n, size_t l, size_t c) : offset( ), line(l), col(c), name(to_csubstr(n)) {} + Location(const char *n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(to_csubstr(n)) {} }; +static_assert(std::is_standard_layout::value, "Location not trivial"); //----------------------------------------------------------------------------- @@ -364,30 +445,30 @@ template } #define _RYML_CB_ERR(cb, msg_literal) \ + _RYML_CB_ERR_(cb, msg_literal, c4::yml::Location(__FILE__, 0, __LINE__, 0)) +#define _RYML_CB_CHECK(cb, cond) \ + _RYML_CB_CHECK_(cb, cond, c4::yml::Location(__FILE__, 0, __LINE__, 0)) +#define _RYML_CB_ERR_(cb, msg_literal, loc) \ do \ { \ const char msg[] = msg_literal; \ - RYML_DEBUG_BREAK() \ - c4::yml::error((cb), \ - msg, sizeof(msg), \ - c4::yml::Location(__FILE__, 0, __LINE__, 0)); \ + RYML_DEBUG_BREAK(); \ + c4::yml::error((cb), msg, sizeof(msg)-1, loc); \ C4_UNREACHABLE_AFTER_ERR(); \ } while(0) -#define _RYML_CB_CHECK(cb, cond) \ +#define _RYML_CB_CHECK_(cb, cond, loc) \ do \ { \ - if(!(cond)) \ + if(C4_UNLIKELY(!(cond))) \ { \ const char msg[] = "check failed: " #cond; \ - RYML_DEBUG_BREAK() \ - c4::yml::error((cb), \ - msg, sizeof(msg), \ - c4::yml::Location(__FILE__, 0, __LINE__, 0)); \ + RYML_DEBUG_BREAK(); \ + c4::yml::error((cb), msg, sizeof(msg)-1, loc); \ C4_UNREACHABLE_AFTER_ERR(); \ } \ } while(0) #define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data) -#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr) +#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), T, (num), nullptr) #define _RYML_CB_FREE(cb, buf, T, num) \ do { \ (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data); \ @@ -395,7 +476,51 @@ do \ } while(0) + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +typedef enum { + BLOCK_LITERAL, //!< keep newlines (|) + BLOCK_FOLD //!< replace newline with single space (>) +} BlockStyle_e; + +typedef enum { + CHOMP_CLIP, //!< single newline at end (default) + CHOMP_STRIP, //!< no newline at end (-) + CHOMP_KEEP //!< all newlines from end (+) +} BlockChomp_e; + + +/** Abstracts the fact that a scalar filter result may not fit in the + * intended memory. */ +struct FilterResult +{ + C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; } + C4_ALWAYS_INLINE size_t required_len() const noexcept { return str.len; } + C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; } + csubstr str; +}; +/** Abstracts the fact that a scalar filter result may not fit in the + * intended memory. */ +struct FilterResultExtending +{ + C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; } + C4_ALWAYS_INLINE size_t required_len() const noexcept { return reqlen; } + C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; } + csubstr str; + size_t reqlen; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + namespace detail { +// is there a better way to do this? template struct _charconstant_t : public std::conditional::value, @@ -411,10 +536,11 @@ struct _SubstrWriter { substr buf; size_t pos; - _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {} + _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) { C4_ASSERT(buf.str); } void append(csubstr s) { C4_ASSERT(!s.overlaps(buf)); + C4_ASSERT(s.str || !s.len); if(s.len && pos + s.len <= buf.len) { C4_ASSERT(s.str); @@ -424,12 +550,14 @@ struct _SubstrWriter } void append(char c) { + C4_ASSERT(buf.str); if(pos < buf.len) buf.str[pos] = c; ++pos; } void append_n(char c, size_t numtimes) { + C4_ASSERT(buf.str); if(numtimes && pos + numtimes < buf.len) memset(buf.str + pos, c, numtimes); pos += numtimes; @@ -445,9 +573,71 @@ struct _SubstrWriter }; } // namespace detail + +namespace detail { +// dumpfn is a function abstracting prints to terminal (or to string). +template +C4_NO_INLINE void _dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args) +{ + DumpResults results; + // try writing everything: + { + // buffer for converting individual arguments. it is defined + // in a child scope to free it in case the buffer is too small + // for any of the arguments. + char writebuf[RYML_LOGBUF_SIZE]; + results = format_dump_resume(std::forward(dumpfn), writebuf, fmt, std::forward(args)...); + } + // if any of the arguments failed to fit the buffer, allocate a + // larger buffer (up to a limit) and resume writing. + // + // results.bufsize is set to the size of the largest element + // serialized. Eg int(1) will require 1 byte. + if(C4_UNLIKELY(results.bufsize > RYML_LOGBUF_SIZE)) + { + const size_t bufsize = results.bufsize <= RYML_LOGBUF_SIZE_MAX ? results.bufsize : RYML_LOGBUF_SIZE_MAX; + #ifdef C4_MSVC + substr largerbuf = {static_cast(_alloca(bufsize)), bufsize}; + #else + substr largerbuf = {static_cast(alloca(bufsize)), bufsize}; + #endif + results = format_dump_resume(std::forward(dumpfn), results, largerbuf, fmt, std::forward(args)...); + } +} +template +C4_NORETURN C4_NO_INLINE void _report_err(Callbacks const& C4_RESTRICT callbacks, csubstr fmt, Args const& C4_RESTRICT ...args) +{ + char errmsg[RYML_ERRMSG_SIZE] = {0}; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + _dump(dumpfn, fmt, args...); + writer.append('\n'); + const size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + callbacks.m_error(errmsg, len, {}, callbacks.m_user_data); + C4_UNREACHABLE_AFTER_ERR(); +} +} // namespace detail + + +inline csubstr _c4prc(const char &C4_RESTRICT c) // pass by reference! +{ + switch(c) + { + case '\n': return csubstr("\\n"); + case '\t': return csubstr("\\t"); + case '\0': return csubstr("\\0"); + case '\r': return csubstr("\\r"); + case '\f': return csubstr("\\f"); + case '\b': return csubstr("\\b"); + case '\v': return csubstr("\\v"); + case '\a': return csubstr("\\a"); + default: return csubstr(&c, 1); + } +} + /// @endcond -C4_SUPPRESS_WARNING_GCC_CLANG_POP +C4_SUPPRESS_WARNING_GCC_POP } // namespace yml } // namespace c4 diff --git a/3rdparty/rapidyaml/include/c4/yml/detail/checks.hpp b/3rdparty/rapidyaml/include/c4/yml/detail/checks.hpp index 39b49e856b1ed5..14f3a1db043cc5 100644 --- a/3rdparty/rapidyaml/include/c4/yml/detail/checks.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/detail/checks.hpp @@ -17,7 +17,7 @@ namespace c4 { namespace yml { -void check_invariants(Tree const& t, size_t node=NONE); +void check_invariants(Tree const& t, id_type node=NONE); void check_free_list(Tree const& t); void check_arena(Tree const& t); @@ -26,7 +26,7 @@ void check_arena(Tree const& t); //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -inline void check_invariants(Tree const& t, size_t node) +inline void check_invariants(Tree const& t, id_type node) { if(node == NONE) { @@ -34,8 +34,8 @@ inline void check_invariants(Tree const& t, size_t node) node = t.root_id(); } - auto const& n = *t._p(node); -#ifdef RYML_DBG + NodeData const& n = *t._p(node); +#if defined(RYML_DBG) && 0 if(n.m_first_child != NONE || n.m_last_child != NONE) { printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child); @@ -100,10 +100,10 @@ inline void check_invariants(Tree const& t, size_t node) C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node); } - size_t count = 0; - for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i)) + id_type count = 0; + for(id_type i = n.m_first_child; i != NONE; i = t.next_sibling(i)) { -#ifdef RYML_DBG +#if defined(RYML_DBG) && 0 printf("check(%zu): descend to child[%zu]=%zu\n", node, count, i); #endif auto const& ch = *t._p(i); @@ -131,7 +131,7 @@ inline void check_invariants(Tree const& t, size_t node) check_arena(t); } - for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i)) + for(id_type i = t.first_child(node); i != NONE; i = t.next_sibling(i)) { check_invariants(t, i); } @@ -159,8 +159,8 @@ inline void check_free_list(Tree const& t) //C4_CHECK(head.m_prev_sibling == NONE); //C4_CHECK(tail.m_next_sibling == NONE); - size_t count = 0; - for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) + id_type count = 0; + for(id_type i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) { auto const& elm = *t._p(i); if(&elm != &head) diff --git a/3rdparty/rapidyaml/include/c4/yml/detail/parser_dbg.hpp b/3rdparty/rapidyaml/include/c4/yml/detail/parser_dbg.hpp index 0f5e69361d7571..393bce2b4e889e 100644 --- a/3rdparty/rapidyaml/include/c4/yml/detail/parser_dbg.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/detail/parser_dbg.hpp @@ -4,7 +4,11 @@ #ifndef _C4_YML_COMMON_HPP_ #include "../common.hpp" #endif + +#ifdef RYML_DBG #include +#endif + //----------------------------------------------------------------------------- // some debugging scaffolds @@ -23,109 +27,123 @@ #pragma clang diagnostic ignored "-Werror" #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" -// some debugging scaffolds -#ifdef RYML_DBG -#include -namespace c4 { -inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); }; -template -void _dbg_printf(c4::csubstr fmt, Args&& ...args) -{ - static char writebuf[256]; - auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward(args)...); - // resume writing if the results failed to fit the buffer - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. - { - results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) - { - results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); - } - } -} -} // namespace c4 -# define _c4dbgt(fmt, ...) this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) -# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) -# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) -# define _c4dbgq(msg) _dbg_printf(msg "\n") +#ifndef RYML_DBG # define _c4err(fmt, ...) \ - do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ - this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) -#else + this->_err("ERROR: " fmt, ## __VA_ARGS__) # define _c4dbgt(fmt, ...) # define _c4dbgpf(fmt, ...) +# define _c4dbgpf_(fmt, ...) # define _c4dbgp(msg) +# define _c4dbgp_(msg) # define _c4dbgq(msg) +# define _c4presc(...) +# define _c4prscalar(msg, scalar, keep_newlines) +#else # define _c4err(fmt, ...) \ - do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ - this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0) -#endif + do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) +# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \ + this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__); } } while(0) +# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) +# define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ ) +# define _c4dbgq(msg) _dbg_printf(msg "\n") +# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0) +# define _c4prscalar(msg, scalar, keep_newlines) \ + do { \ + _c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \ + if(_dbg_enabled()) { \ + __c4presc((scalar).str, (scalar).len, (keep_newlines)); \ + } \ + _c4dbgq("~~~"); \ + } while(0) +#endif // RYML_DBG + -#define _c4prsp(sp) sp -#define _c4presc(s) __c4presc(s.str, s.len) -inline c4::csubstr _c4prc(const char &C4_RESTRICT c) +//----------------------------------------------------------------------------- + +#ifdef RYML_DBG + +#include +namespace c4 { +inline bool& _dbg_enabled() { static bool enabled = true; return enabled; } +inline void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; } +inline void _dbg_dumper(csubstr s) { - switch(c) + if(s.str) + fwrite(s.str, 1, s.len, stdout); +} +inline substr _dbg_buf() noexcept +{ + static char writebuf[2048]; + return writebuf; +} +template +C4_NO_INLINE void _dbg_printf(c4::csubstr fmt, Args const& ...args) +{ + if(_dbg_enabled()) { - case '\n': return c4::csubstr("\\n"); - case '\t': return c4::csubstr("\\t"); - case '\0': return c4::csubstr("\\0"); - case '\r': return c4::csubstr("\\r"); - case '\f': return c4::csubstr("\\f"); - case '\b': return c4::csubstr("\\b"); - case '\v': return c4::csubstr("\\v"); - case '\a': return c4::csubstr("\\a"); - default: return c4::csubstr(&c, 1); + substr buf = _dbg_buf(); + const size_t needed_size = c4::format_dump(&_dbg_dumper, buf, fmt, args...); + C4_CHECK(needed_size <= buf.len); } } -inline void __c4presc(const char *s, size_t len) +inline void __c4presc(const char *s, size_t len, bool keep_newlines=false) { + RYML_ASSERT(s || !len); size_t prev = 0; for(size_t i = 0; i < len; ++i) { switch(s[i]) { - case '\n' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break; - case '\t' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('t'); prev = i+1; break; - case '\0' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('0'); prev = i+1; break; - case '\r' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('r'); prev = i+1; break; - case '\f' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('f'); prev = i+1; break; - case '\b' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('b'); prev = i+1; break; - case '\v' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('v'); prev = i+1; break; - case '\a' : if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('a'); prev = i+1; break; - case '\x1b': if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('e'); prev = i+1; break; + case '\n' : _dbg_printf("{}{}{}", csubstr(s+prev, i-prev), csubstr("\\n"), csubstr(keep_newlines ? "\n":"")); prev = i+1; break; + case '\t' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\t")); prev = i+1; break; + case '\0' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\0")); prev = i+1; break; + case '\r' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\r")); prev = i+1; break; + case '\f' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\f")); prev = i+1; break; + case '\b' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\b")); prev = i+1; break; + case '\v' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\v")); prev = i+1; break; + case '\a' : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\a")); prev = i+1; break; + case '\x1b': _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\x1b")); prev = i+1; break; case -0x3e/*0xc2u*/: if(i+1 < len) { if(s[i+1] == -0x60/*0xa0u*/) { - if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('_'); prev = i+2; ++i; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\_")); prev = i+1; } else if(s[i+1] == -0x7b/*0x85u*/) { - if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('N'); prev = i+2; ++i; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\N")); prev = i+1; } - break; } + break; case -0x1e/*0xe2u*/: if(i+2 < len && s[i+1] == -0x80/*0x80u*/) { if(s[i+2] == -0x58/*0xa8u*/) { - if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('L'); prev = i+3; i += 2; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\L")); prev = i+1; } else if(s[i+2] == -0x57/*0xa9u*/) { - if(i > prev) { fwrite(s+prev, 1, i-prev, stdout); } putchar('\\'); putchar('P'); prev = i+3; i += 2; + _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\P")); prev = i+1; } - break; } + break; } } if(len > prev) - fwrite(s + prev, 1, len - prev, stdout); + _dbg_printf("{}", csubstr(s+prev, len-prev)); } +inline void __c4presc(csubstr s, bool keep_newlines=false) +{ + __c4presc(s.str, s.len, keep_newlines); +} +} // namespace c4 + +#endif // RYML_DBG #pragma clang diagnostic pop #pragma GCC diagnostic pop @@ -134,5 +152,4 @@ inline void __c4presc(const char *s, size_t len) # pragma warning(pop) #endif - #endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/detail/print.hpp b/3rdparty/rapidyaml/include/c4/yml/detail/print.hpp index 3a1de7ae4c56b2..dd10964ae2baf0 100644 --- a/3rdparty/rapidyaml/include/c4/yml/detail/print.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/detail/print.hpp @@ -4,20 +4,76 @@ #include "c4/yml/tree.hpp" #include "c4/yml/node.hpp" +#ifdef RYML_DBG +#define _c4dbg_tree(...) print_tree(__VA_ARGS__) +#define _c4dbg_node(...) print_tree(__VA_ARGS__) +#else +#define _c4dbg_tree(...) +#define _c4dbg_node(...) +#endif namespace c4 { namespace yml { C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") -inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children) +inline const char* _container_style_code(Tree const& p, id_type node) { - printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void const*)p.get(node)); + if(p.is_container(node)) + { + if(p._p(node)->m_type & (FLOW_SL|FLOW_ML)) + { + return "[FLOW]"; + } + if(p._p(node)->m_type & (BLOCK)) + { + return "[BLCK]"; + } + } + return ""; +} +inline char _scalar_code(NodeType masked) +{ + if(masked & (KEY_LITERAL|VAL_LITERAL)) + return '|'; + if(masked & (KEY_FOLDED|VAL_FOLDED)) + return '>'; + if(masked & (KEY_SQUO|VAL_SQUO)) + return '\''; + if(masked & (KEY_DQUO|VAL_DQUO)) + return '"'; + if(masked & (KEY_PLAIN|VAL_PLAIN)) + return '~'; + return '@'; +} +inline char _scalar_code_key(NodeType t) +{ + return _scalar_code(t & KEY_STYLE); +} +inline char _scalar_code_val(NodeType t) +{ + return _scalar_code(t & VAL_STYLE); +} +inline char _scalar_code_key(Tree const& p, id_type node) +{ + return _scalar_code_key(p._p(node)->m_type); +} +inline char _scalar_code_val(Tree const& p, id_type node) +{ + return _scalar_code_key(p._p(node)->m_type); +} +inline id_type print_node(Tree const& p, id_type node, int level, id_type count, bool print_children) +{ + printf("[%zu]%*s[%zu] %p", (size_t)count, (2*level), "", (size_t)node, (void const*)p.get(node)); if(p.is_root(node)) { printf(" [ROOT]"); } - printf(" %s:", p.type_str(node)); + char typebuf[128]; + csubstr typestr = p.type(node).type_str(typebuf); + RYML_CHECK(typestr.str); + printf(" %.*s", (int)typestr.len, typestr.str); if(p.has_key(node)) { if(p.has_key_anchor(node)) @@ -28,65 +84,47 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo if(p.has_key_tag(node)) { csubstr kt = p.key_tag(node); - csubstr k = p.key(node); - printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str); + printf(" <%.*s>", (int)kt.len, kt.str); } - else - { - csubstr k = p.key(node); - printf(" '%.*s'", (int)k.len, k.str); - } - } - else - { - RYML_ASSERT( ! p.has_key_tag(node)); + const char code = _scalar_code_key(p, node); + csubstr k = p.key(node); + printf(" %c%.*s%c :", code, (int)k.len, k.str, code); } - if(p.has_val(node)) + if(p.has_val_anchor(node)) { - if(p.has_val_tag(node)) - { - csubstr vt = p.val_tag(node); - csubstr v = p.val(node); - printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str); - } - else - { - csubstr v = p.val(node); - printf(" '%.*s'", (int)v.len, v.str); - } + csubstr a = p.val_anchor(node); + printf(" &%.*s'", (int)a.len, a.str); } - else + if(p.has_val_tag(node)) { - if(p.has_val_tag(node)) - { - csubstr vt = p.val_tag(node); - printf(" %.*s", (int)vt.len, vt.str); - } + csubstr vt = p.val_tag(node); + printf(" <%.*s>", (int)vt.len, vt.str); } - if(p.has_val_anchor(node)) + if(p.has_val(node)) { - auto &a = p.val_anchor(node); - printf(" valanchor='&%.*s'", (int)a.len, a.str); + const char code = _scalar_code_val(p, node); + csubstr v = p.val(node); + printf(" %c%.*s%c", code, (int)v.len, v.str, code); } - printf(" (%zd sibs)", p.num_siblings(node)); + printf(" (%zu sibs)", (size_t)p.num_siblings(node)); ++count; - if(p.is_container(node)) + if(!p.is_container(node)) + { + printf("\n"); + } + else { - printf(" %zd children:\n", p.num_children(node)); + printf(" (%zu children)\n", (size_t)p.num_children(node)); if(print_children) { - for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i)) + for(id_type i = p.first_child(node); i != NONE; i = p.next_sibling(i)) { count = print_node(p, i, level+1, count, print_children); } } } - else - { - printf("\n"); - } return count; } @@ -106,21 +144,37 @@ inline void print_node(ConstNodeRef const& p, int level=0) //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -inline size_t print_tree(Tree const& p, size_t node=NONE) +inline id_type print_tree(const char *message, Tree const& p, id_type node=NONE) { printf("--------------------------------------\n"); - size_t ret = 0; + if(message != nullptr) + printf("%s:\n", message); + id_type ret = 0; if(!p.empty()) { if(node == NONE) node = p.root_id(); ret = print_node(p, node, 0, 0, true); } - printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret); + printf("#nodes=%zu vs #printed=%zu\n", (size_t)p.size(), (size_t)ret); printf("--------------------------------------\n"); return ret; } +inline id_type print_tree(Tree const& p, id_type node=NONE) +{ + return print_tree(nullptr, p, node); +} + +inline void print_tree(ConstNodeRef const& p, int level) +{ + print_node(p, level); + for(ConstNodeRef ch : p.children()) + { + print_tree(ch, level+1); + } +} + C4_SUPPRESS_WARNING_GCC_CLANG_POP } /* namespace yml */ diff --git a/3rdparty/rapidyaml/include/c4/yml/detail/stack.hpp b/3rdparty/rapidyaml/include/c4/yml/detail/stack.hpp index c12b03b99d5322..8a5d3783c00444 100644 --- a/3rdparty/rapidyaml/include/c4/yml/detail/stack.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/detail/stack.hpp @@ -18,22 +18,27 @@ C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") namespace detail { -/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */ -template +/** A lightweight contiguous stack with Small Storage + * Optimization. This is required because std::vector can throw + * exceptions, and we don't want to enforce any particular error + * mechanism. */ +template class stack { static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); - enum : size_t { sso_size = N }; +public: + + enum : id_type { sso_size = N }; public: - T m_buf[N]; - T * m_stack; - size_t m_size; - size_t m_capacity; - Callbacks m_callbacks; + T m_buf[size_t(N)]; + T *C4_RESTRICT m_stack; + id_type m_size; + id_type m_capacity; + Callbacks m_callbacks; public: @@ -79,29 +84,29 @@ class stack public: - size_t size() const { return m_size; } - size_t empty() const { return m_size == 0; } - size_t capacity() const { return m_capacity; } + id_type size() const { return m_size; } + id_type empty() const { return m_size == 0; } + id_type capacity() const { return m_capacity; } void clear() { m_size = 0; } - void resize(size_t sz) + void resize(id_type sz) { reserve(sz); m_size = sz; } - void reserve(size_t sz); + void reserve(id_type sz); void push(T const& C4_RESTRICT n) { - RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); + _RYML_CB_ASSERT(m_callbacks, (const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); if(m_size == m_capacity) { - size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + id_type cap = m_capacity == 0 ? N : 2 * m_capacity; reserve(cap); } m_stack[m_size] = n; @@ -110,10 +115,10 @@ class stack void push_top() { - RYML_ASSERT(m_size > 0); + _RYML_CB_ASSERT(m_callbacks, m_size > 0); if(m_size == m_capacity) { - size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + id_type cap = m_capacity == 0 ? N : 2 * m_capacity; reserve(cap); } m_stack[m_size] = m_stack[m_size - 1]; @@ -122,25 +127,25 @@ class stack T const& C4_RESTRICT pop() { - RYML_ASSERT(m_size > 0); + _RYML_CB_ASSERT(m_callbacks, m_size > 0); --m_size; return m_stack[m_size]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } - C4_ALWAYS_INLINE T & C4_RESTRICT top() { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top() { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; } - C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { RYML_ASSERT(m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT top(size_t i) { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT top(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top(id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT bottom(size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom(id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } - C4_ALWAYS_INLINE T & C4_RESTRICT operator[](size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT operator[](id_type i) { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; } public: @@ -154,10 +159,12 @@ class stack const_iterator end () const { return (const_iterator)m_stack + m_size; } public: + void _free(); void _cp(stack const* C4_RESTRICT that); void _mv(stack * that); void _cb(Callbacks const& cb); + }; @@ -165,8 +172,8 @@ class stack //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -template -void stack::reserve(size_t sz) +template +void stack::reserve(id_type sz) { if(sz <= m_size) return; @@ -176,11 +183,12 @@ void stack::reserve(size_t sz) m_capacity = N; return; } - T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data); - memcpy(buf, m_stack, m_size * sizeof(T)); + T *buf = (T*) m_callbacks.m_allocate((size_t)sz * sizeof(T), m_stack, m_callbacks.m_user_data); + _RYML_CB_ASSERT(m_callbacks, ((uintptr_t)buf % alignof(T)) == 0u); + memcpy(buf, m_stack, (size_t)m_size * sizeof(T)); if(m_stack != m_buf) { - m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data); } m_stack = buf; m_capacity = sz; @@ -189,38 +197,38 @@ void stack::reserve(size_t sz) //----------------------------------------------------------------------------- -template +template void stack::_free() { - RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero + _RYML_CB_ASSERT(m_callbacks, m_stack != nullptr); // this structure cannot be memset() to zero if(m_stack != m_buf) { - m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data); m_stack = m_buf; m_size = N; m_capacity = N; } else { - RYML_ASSERT(m_capacity == N); + _RYML_CB_ASSERT(m_callbacks, m_capacity == N); } } //----------------------------------------------------------------------------- -template +template void stack::_cp(stack const* C4_RESTRICT that) { if(that->m_stack != that->m_buf) { - RYML_ASSERT(that->m_capacity > N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); } else { - RYML_ASSERT(that->m_capacity <= N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); } memcpy(m_stack, that->m_stack, that->m_size * sizeof(T)); m_size = that->m_size; @@ -231,19 +239,19 @@ void stack::_cp(stack const* C4_RESTRICT that) //----------------------------------------------------------------------------- -template +template void stack::_mv(stack * that) { if(that->m_stack != that->m_buf) { - RYML_ASSERT(that->m_capacity > N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); m_stack = that->m_stack; } else { - RYML_ASSERT(that->m_capacity <= N); - RYML_ASSERT(that->m_size <= that->m_capacity); + _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N); + _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity); memcpy(m_buf, that->m_buf, that->m_size * sizeof(T)); m_stack = m_buf; } @@ -251,7 +259,7 @@ void stack::_mv(stack * that) m_capacity = that->m_capacity; m_callbacks = that->m_callbacks; // make sure no deallocation happens on destruction - RYML_ASSERT(that->m_stack != m_buf); + _RYML_CB_ASSERT(m_callbacks, that->m_stack != m_buf); that->m_stack = that->m_buf; that->m_capacity = N; that->m_size = 0; @@ -260,7 +268,7 @@ void stack::_mv(stack * that) //----------------------------------------------------------------------------- -template +template void stack::_cb(Callbacks const& cb) { if(cb != m_callbacks) diff --git a/3rdparty/rapidyaml/include/c4/yml/emit.def.hpp b/3rdparty/rapidyaml/include/c4/yml/emit.def.hpp index efec0c42759409..3b8896e20cf8a8 100644 --- a/3rdparty/rapidyaml/include/c4/yml/emit.def.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/emit.def.hpp @@ -6,12 +6,15 @@ #endif /** @file emit.def.hpp Definitions for emit functions. */ +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#include "c4/yml/detail/parser_dbg.hpp" +#endif namespace c4 { namespace yml { template -substr Emitter::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +substr Emitter::emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess) { if(t.empty()) { @@ -22,45 +25,31 @@ substr Emitter::emit_as(EmitType_e type, Tree const& t, size_t id, bool id = t.root_id(); _RYML_CB_CHECK(t.callbacks(), id < t.capacity()); m_tree = &t; + m_flow = false; if(type == EMIT_YAML) _emit_yaml(id); else if(type == EMIT_JSON) - _do_visit_json(id); + _do_visit_json(id, 0); else _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type"); m_tree = nullptr; return this->Writer::_get(error_on_excess); } -template -substr Emitter::emit_as(EmitType_e type, Tree const& t, bool error_on_excess) -{ - if(t.empty()) - return {}; - return this->emit_as(type, t, t.root_id(), error_on_excess); -} - -template -substr Emitter::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess) -{ - _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); - return this->emit_as(type, *n.tree(), n.id(), error_on_excess); -} - //----------------------------------------------------------------------------- template -void Emitter::_emit_yaml(size_t id) +void Emitter::_emit_yaml(id_type id) { // save branches in the visitor by doing the initial stream/doc // logic here, sparing the need to check stream/val/keyval inside // the visitor functions - auto dispatch = [this](size_t node){ + auto dispatch = [this](id_type node){ NodeType ty = m_tree->type(node); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) _do_visit_flow_sl(node, 0); - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) _do_visit_flow_ml(node, 0); else { @@ -69,48 +58,51 @@ void Emitter::_emit_yaml(size_t id) }; if(!m_tree->is_root(id)) { - if(m_tree->is_container(id) && !m_tree->type(id).marked_flow()) + if(m_tree->is_container(id) && !m_tree->type(id).is_flow()) { - size_t ilevel = 0; + id_type ilevel = 0; if(m_tree->has_key(id)) { this->Writer::_do_write(m_tree->key(id)); this->Writer::_do_write(":\n"); ++ilevel; } - _do_visit_block_container(id, ilevel, ilevel); + _do_visit_block_container(id, 0, ilevel, ilevel); return; } } - auto *btd = m_tree->tag_directives().b; - auto *etd = m_tree->tag_directives().e; - auto write_tag_directives = [&btd, etd, this](size_t next_node){ - auto end = btd; - while(end < etd) + TagDirectiveRange tagds = m_tree->tag_directives(); + auto write_tag_directives = [&tagds, this](const id_type next_node){ + TagDirective const* C4_RESTRICT end = tagds.b; + while(end < tagds.e) { if(end->next_node_id > next_node) break; ++end; } - for( ; btd != end; ++btd) + const id_type parent = m_tree->parent(next_node); + for( ; tagds.b != end; ++tagds.b) { - if(next_node != m_tree->first_child(m_tree->parent(next_node))) + if(next_node != m_tree->first_child(parent)) this->Writer::_do_write("...\n"); this->Writer::_do_write("%TAG "); - this->Writer::_do_write(btd->handle); + this->Writer::_do_write(tagds.b->handle); this->Writer::_do_write(' '); - this->Writer::_do_write(btd->prefix); + this->Writer::_do_write(tagds.b->prefix); this->Writer::_do_write('\n'); } }; if(m_tree->is_stream(id)) { - if(m_tree->first_child(id) != NONE) - write_tag_directives(m_tree->first_child(id)); - for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child)) + const id_type first_child = m_tree->first_child(id); + if(first_child != NONE) + write_tag_directives(first_child); + for(id_type child = first_child; child != NONE; child = m_tree->next_sibling(child)) { dispatch(child); + if(m_tree->is_doc(child) && m_tree->type(child).is_flow_sl()) + this->Writer::_do_write('\n'); if(m_tree->next_sibling(child) != NONE) write_tag_directives(m_tree->next_sibling(child)); } @@ -130,14 +122,14 @@ void Emitter::_emit_yaml(size_t id) _writek(id, 0); this->Writer::_do_write(": "); _writev(id, 0); - if(!m_tree->type(id).marked_flow()) + if(!m_tree->type(id).is_flow()) this->Writer::_do_write('\n'); } else if(m_tree->is_val(id)) { //this->Writer::_do_write("- "); _writev(id, 0); - if(!m_tree->type(id).marked_flow()) + if(!m_tree->type(id).is_flow()) this->Writer::_do_write('\n'); } else if(m_tree->type(id) == NOTYPE) @@ -150,58 +142,138 @@ void Emitter::_emit_yaml(size_t id) } } +#define _rymlindent_nextline() this->_indent(ilevel + 1); + template -void Emitter::_write_doc(size_t id) +void Emitter::_write_doc(id_type id) { RYML_ASSERT(m_tree->is_doc(id)); + RYML_ASSERT(!m_tree->has_key(id)); if(!m_tree->is_root(id)) { RYML_ASSERT(m_tree->is_stream(m_tree->parent(id))); this->Writer::_do_write("---"); } + // if(!m_tree->has_val(id)) // this is more frequent { - if(m_tree->has_val_tag(id)) + const bool tag = m_tree->has_val_tag(id); + const bool anchor = m_tree->has_val_anchor(id); + if(!tag && !anchor) + { + ; + } + else if(!tag && anchor) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif + } + else if(tag && !anchor) { if(!m_tree->is_root(id)) this->Writer::_do_write(' '); _write_tag(m_tree->val_tag(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif } - if(m_tree->has_val_anchor(id)) + else // tag && anchor { if(!m_tree->is_root(id)) this->Writer::_do_write(' '); - this->Writer::_do_write('&'); + _write_tag(m_tree->val_tag(id)); + this->Writer::_do_write(" &"); this->Writer::_do_write(m_tree->val_anchor(id)); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(m_tree->has_children(id) && m_tree->is_root(id)) + this->Writer::_do_write('\n'); + #endif } } else // docval { - RYML_ASSERT(m_tree->has_val(id)); - RYML_ASSERT(!m_tree->has_key(id)); - if(!m_tree->is_root(id)) - this->Writer::_do_write(' '); - _writev(id, 0); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id)); + // some plain scalars such as '...' and '---' must not + // appear at 0-indentation + const csubstr val = m_tree->val(id); + const bool preceded_by_3_dashes = !m_tree->is_root(id); + const type_bits style_marks = m_tree->type(id) & (KEY_STYLE|VAL_STYLE); + const bool is_plain = m_tree->type(id).is_val_plain(); + const bool is_ambiguous = (is_plain || !style_marks) + && ((val.begins_with("...") || val.begins_with("---")) + || + (val.find('\n') != npos)); + if(preceded_by_3_dashes) + { + if(val.len == 0 && !m_tree->has_val_anchor(id) && !m_tree->has_val_tag(id)) + { + this->Writer::_do_write('\n'); + return; + } + else if(val.len && is_ambiguous) + { + this->Writer::_do_write('\n'); + } + else + { + this->Writer::_do_write(' '); + } + } + id_type ilevel = 0u; + if(is_ambiguous) + { + _rymlindent_nextline(); + ++ilevel; + } + _writev(id, ilevel); + if(val.len && m_tree->is_root(id)) + this->Writer::_do_write('\n'); } - this->Writer::_do_write('\n'); + if(!m_tree->is_root(id)) + this->Writer::_do_write('\n'); } template -void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) +void Emitter::_do_visit_flow_sl(id_type node, id_type depth, id_type ilevel) { - RYML_ASSERT(!m_tree->is_stream(node)); - RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); - RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + const bool prev_flow = m_flow; + m_flow = true; + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_doc(node)) { _write_doc(node); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED if(!m_tree->has_children(node)) return; + else + #endif + { + if(m_tree->is_map(node)) + { + this->Writer::_do_write('{'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node)); + this->Writer::_do_write('['); + } + } } else if(m_tree->is_container(node)) { - RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node)); bool spc = false; // write a space @@ -243,7 +315,7 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) } } // container - for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) + for(id_type child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) { if(count++) this->Writer::_do_write(','); @@ -260,7 +332,7 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) else { // with single-line flow, we can never go back to block - _do_visit_flow_sl(child, ilevel + 1); + _do_visit_flow_sl(child, depth + 1, ilevel + 1); } } @@ -272,112 +344,118 @@ void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) { this->Writer::_do_write(']'); } + m_flow = prev_flow; } C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable error, triggered by flow_ml not implemented template -void Emitter::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent) +void Emitter::_do_visit_flow_ml(id_type id, id_type depth, id_type ilevel, id_type do_indent) { C4_UNUSED(id); + C4_UNUSED(depth); C4_UNUSED(ilevel); C4_UNUSED(do_indent); c4::yml::error("not implemented"); + #ifdef THIS_IS_A_WORK_IN_PROGRESS + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); + const bool prev_flow = m_flow; + m_flow = true; + // do it... + m_flow = prev_flow; + #endif } template -void Emitter::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent) +void Emitter::_do_visit_block_container(id_type node, id_type depth, id_type level, bool do_indent) { - RepC ind = indent_to(do_indent * next_level); - if(m_tree->is_seq(node)) { - for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) + for(id_type child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) { _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child)); if(m_tree->is_val(child)) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _writev(child, next_level); + _writev(child, level); this->Writer::_do_write('\n'); } else { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child)); NodeType ty = m_tree->type(child); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _do_visit_flow_sl(child, 0u); + _do_visit_flow_sl(child, depth+1, 0u); this->Writer::_do_write('\n'); } - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) { - this->Writer::_do_write(ind); + _indent(level, do_indent); this->Writer::_do_write("- "); - _do_visit_flow_ml(child, next_level, do_indent); + _do_visit_flow_ml(child, depth+1, 0u, do_indent); this->Writer::_do_write('\n'); } else { - _do_visit_block(child, next_level, do_indent); + _do_visit_block(child, depth+1, level, do_indent); // same indentation level } } do_indent = true; - ind = indent_to(do_indent * next_level); } } else // map { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node)); - for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) + for(id_type ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich)); if(m_tree->is_keyval(ich)) { - this->Writer::_do_write(ind); - _writek(ich, next_level); + _indent(level, do_indent); + _writek(ich, level); this->Writer::_do_write(": "); - _writev(ich, next_level); + _writev(ich, level); this->Writer::_do_write('\n'); } else { _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich)); NodeType ty = m_tree->type(ich); - if(ty.marked_flow_sl()) + if(ty.is_flow_sl()) { - this->Writer::_do_write(ind); - _do_visit_flow_sl(ich, 0u); + _indent(level, do_indent); + _do_visit_flow_sl(ich, depth+1, 0u); this->Writer::_do_write('\n'); } - else if(ty.marked_flow_ml()) + else if(ty.is_flow_ml()) { - this->Writer::_do_write(ind); - _do_visit_flow_ml(ich, 0u); + _indent(level, do_indent); + _do_visit_flow_ml(ich, depth+1, 0u); this->Writer::_do_write('\n'); } else { - _do_visit_block(ich, next_level, do_indent); + _do_visit_block(ich, depth+1, level, do_indent); // same level! } - } + } // keyval vs container do_indent = true; - ind = indent_to(do_indent * next_level); - } - } + } // for children + } // seq vs map } template -void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_indent) +void Emitter::_do_visit_block(id_type node, id_type depth, id_type ilevel, id_type do_indent) { - RYML_ASSERT(!m_tree->is_stream(node)); - RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); - RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); - RepC ind = indent_to(do_indent * ilevel); - + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node)); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_doc(node)) { _write_doc(node); @@ -386,21 +464,19 @@ void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_inde } else if(m_tree->is_container(node)) { - RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); - + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node)); bool spc = false; // write a space bool nl = false; // write a newline - if(m_tree->has_key(node)) { - this->Writer::_do_write(ind); + _indent(ilevel, do_indent); _writek(node, ilevel); this->Writer::_do_write(':'); spc = true; } else if(!m_tree->is_root(node)) { - this->Writer::_do_write(ind); + _indent(ilevel, do_indent); this->Writer::_do_write('-'); spc = true; } @@ -452,20 +528,22 @@ void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_inde } } // container - size_t next_level = ilevel + 1; + id_type next_level = ilevel + 1; if(m_tree->is_root(node) || m_tree->is_doc(node)) next_level = ilevel; // do not indent at top level - _do_visit_block_container(node, next_level, do_indent); + _do_visit_block_container(node, depth, next_level, do_indent); } C4_SUPPRESS_WARNING_MSVC_POP template -void Emitter::_do_visit_json(size_t id) +void Emitter::_do_visit_json(id_type id, id_type depth) { _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams + if(C4_UNLIKELY(depth > m_opts.max_depth())) + _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded"); if(m_tree->is_keyval(id)) { _writek_json(id); @@ -489,11 +567,11 @@ void Emitter::_do_visit_json(size_t id) this->Writer::_do_write('{'); } // container - for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) + for(id_type ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) { if(ich != m_tree->first_child(id)) this->Writer::_do_write(','); - _do_visit_json(ich); + _do_visit_json(ich, depth+1); } if(m_tree->is_seq(id)) @@ -503,7 +581,7 @@ void Emitter::_do_visit_json(size_t id) } template -void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel) +void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type ilevel) { if( ! sc.tag.empty()) { @@ -523,98 +601,162 @@ void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, s if(sc.anchor != "<<") this->Writer::_do_write('*'); this->Writer::_do_write(sc.anchor); + if(flags.is_key_ref()) + this->Writer::_do_write(' '); return; } // ensure the style flags only have one of KEY or VAL - _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0))); - - auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE); - if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL)) + _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & SCALAR_STYLE) == 0) || (((flags&KEY_STYLE) == 0) != ((flags&VAL_STYLE) == 0))); + type_bits style_marks = flags & SCALAR_STYLE; + if(!style_marks) + style_marks = scalar_style_choose(sc.scalar); + if(style_marks & (KEY_LITERAL|VAL_LITERAL)) { _write_scalar_literal(sc.scalar, ilevel, flags.has_key()); } - else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED)) + else if(style_marks & (KEY_FOLDED|VAL_FOLDED)) { _write_scalar_folded(sc.scalar, ilevel, flags.has_key()); } - else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO)) + else if(style_marks & (KEY_SQUO|VAL_SQUO)) { _write_scalar_squo(sc.scalar, ilevel); } - else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO)) + else if(style_marks & (KEY_DQUO|VAL_DQUO)) { _write_scalar_dquo(sc.scalar, ilevel); } - else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN)) - { - _write_scalar_plain(sc.scalar, ilevel); - } - else if(!style_marks) + else if(style_marks & (KEY_PLAIN|VAL_PLAIN)) { - size_t first_non_nl = sc.scalar.first_not_of('\n'); - bool all_newlines = first_non_nl == npos; - bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t"); - bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty())); - if(do_literal) - { - _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); - } + if(C4_LIKELY(!(sc.scalar.begins_with(": ") || sc.scalar.begins_with(":\t")))) + _write_scalar_plain(sc.scalar, ilevel); else - { - for(size_t i = 0; i < sc.scalar.len; ++i) - { - if(sc.scalar.str[i] == '\n') - { - _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); - goto wrote_special; - } - // todo: check for escaped characters requiring double quotes - } - _write_scalar(sc.scalar, flags.is_quoted()); - wrote_special: - ; - } + _write_scalar_squo(sc.scalar, ilevel); } else { _RYML_CB_ERR(m_tree->callbacks(), "not implemented"); } } + template void Emitter::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags) { - if(C4_UNLIKELY( ! sc.tag.empty())) - _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); + if(flags & (KEYTAG|VALTAG)) + if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_TAG) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); if(C4_UNLIKELY(flags.has_anchor())) - _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); - _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted()); + if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_ANCHOR) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); + if(sc.scalar.len) + { + // use double quoted style... + // if it is a key (mandatory in JSON) + // if the style is marked quoted + bool dquoted = ((flags & (KEY|VALQUO)) + || (scalar_style_json_choose(sc.scalar) & SCALAR_DQUO)); // choose the style + if(dquoted) + _write_scalar_json_dquo(sc.scalar); + else + this->Writer::_do_write(sc.scalar); + } + else + { + if(sc.scalar.str || (flags & (KEY|VALQUO|KEYTAG|VALTAG))) + this->Writer::_do_write("\"\""); + else + this->Writer::_do_write("null"); + } +} + +template +size_t Emitter::_write_escaped_newlines(csubstr s, size_t i) +{ + RYML_ASSERT(s.len > i); + RYML_ASSERT(s.str[i] == '\n'); + //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i)); + // add an extra newline for each sequence of consecutive + // newline/whitespace + this->Writer::_do_write('\n'); + do + { + this->Writer::_do_write('\n'); // write the newline again + ++i; // increase the outer loop counter! + } while(i < s.len && s.str[i] == '\n'); + _RYML_CB_ASSERT(m_tree->callbacks(), i > 0); + --i; + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == '\n'); + return i; } -#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); } +inline bool _is_indented_block(csubstr s, size_t prev, size_t i) noexcept +{ + if(prev == 0 && s.begins_with_any(" \t")) + return true; + const size_t pos = s.first_not_of('\n', i); + return (pos != npos) && (s.str[pos] == ' ' || s.str[pos] == '\t'); +} template -void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation) +size_t Emitter::_write_indented_block(csubstr s, size_t i, id_type ilevel) { + //_c4dbgpf("indblock@i={} rem=[{}]~~~\n{}~~~", i, s.sub(i).len, s.sub(i)); + _RYML_CB_ASSERT(m_tree->callbacks(), i > 0); + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i-1] == '\n'); + _RYML_CB_ASSERT(m_tree->callbacks(), i < s.len); + _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n'); +again: + size_t pos = s.find("\n ", i); + if(pos == npos) + pos = s.find("\n\t", i); + if(pos != npos) + { + ++pos; + //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos)); + _rymlindent_nextline(); + this->Writer::_do_write(s.range(i, pos)); + i = pos; + goto again; + } + // consume the newlines after the indented block + // to prevent them from being escaped + pos = s.find('\n', i); + if(pos != npos) + { + const size_t pos2 = s.first_not_of('\n', pos); + pos = (pos2 != npos) ? pos2 : pos; + //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos)); + _rymlindent_nextline(); + this->Writer::_do_write(s.range(i, pos)); + i = pos; + } + return i; +} + +template +void Emitter::_write_scalar_literal(csubstr s, id_type ilevel, bool explicit_key) +{ + _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos); if(explicit_key) this->Writer::_do_write("? "); - csubstr trimmed = s.trimr("\n\r"); - size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r'); + csubstr trimmed = s.trimr('\n'); + const size_t numnewlines_at_end = s.len - trimmed.len; + const bool is_newline_only = (trimmed.len == 0 && (s.len > 0)); + const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t"); // - if(!explicit_indentation) - this->Writer::_do_write('|'); - else - this->Writer::_do_write("|2"); + this->Writer::_do_write('|'); + if(explicit_indentation) + this->Writer::_do_write('2'); // - if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/) - this->Writer::_do_write("+\n"); - else if(numnewlines_at_end == 1) - this->Writer::_do_write('\n'); - else - this->Writer::_do_write("-\n"); + if(numnewlines_at_end > 1 || is_newline_only) + this->Writer::_do_write('+'); + else if(numnewlines_at_end == 0) + this->Writer::_do_write('-'); // if(trimmed.len) { + this->Writer::_do_write('\n'); size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < trimmed.len; ++i) { @@ -631,81 +773,93 @@ void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool expli _rymlindent_nextline() this->Writer::_do_write(trimmed.sub(pos)); } - if(numnewlines_at_end) - { - this->Writer::_do_write('\n'); - --numnewlines_at_end; - } - } - for(size_t i = 0; i < numnewlines_at_end; ++i) - { - _rymlindent_nextline() - if(i+1 < numnewlines_at_end || explicit_key) - this->Writer::_do_write('\n'); } - if(explicit_key && !numnewlines_at_end) + for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i) + this->Writer::_do_write('\n'); + if(explicit_key) this->Writer::_do_write('\n'); } template -void Emitter::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key) +void Emitter::_write_scalar_folded(csubstr s, id_type ilevel, bool explicit_key) { if(explicit_key) - { this->Writer::_do_write("? "); - } - RYML_ASSERT(s.find("\r") == csubstr::npos); + _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos); csubstr trimmed = s.trimr('\n'); - size_t numnewlines_at_end = s.len - trimmed.len; + const size_t numnewlines_at_end = s.len - trimmed.len; + const bool is_newline_only = (trimmed.len == 0 && (s.len > 0)); + const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t"); + // + this->Writer::_do_write('>'); + if(explicit_indentation) + this->Writer::_do_write('2'); + // if(numnewlines_at_end == 0) - { - this->Writer::_do_write(">-\n"); - } - else if(numnewlines_at_end == 1) - { - this->Writer::_do_write(">\n"); - } - else if(numnewlines_at_end > 1) - { - this->Writer::_do_write(">+\n"); - } + this->Writer::_do_write('-'); + else if(numnewlines_at_end > 1 || is_newline_only) + this->Writer::_do_write('+'); + // if(trimmed.len) { + this->Writer::_do_write('\n'); size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < trimmed.len; ++i) { if(trimmed[i] != '\n') continue; - // write everything up to this point - csubstr since_pos = trimmed.range(pos, i+1); // include the newline - pos = i+1; // because of the newline - _rymlindent_nextline() - this->Writer::_do_write(since_pos); - this->Writer::_do_write('\n'); // write the newline twice + // escape newline sequences + if( ! _is_indented_block(s, pos, i)) + { + if(pos < i) + { + _rymlindent_nextline() + this->Writer::_do_write(s.range(pos, i)); + i = _write_escaped_newlines(s, i); + pos = i+1; + } + else + { + if(i+1 < s.len) + { + if(s.str[i+1] == '\n') + { + ++i; + i = _write_escaped_newlines(s, i); + pos = i+1; + } + else + { + this->Writer::_do_write('\n'); + pos = i+1; + } + } + } + } + else // do not escape newlines in indented blocks + { + ++i; + _rymlindent_nextline() + this->Writer::_do_write(s.range(pos, i)); + if(pos > 0 || !s.begins_with_any(" \t")) + i = _write_indented_block(s, i, ilevel); + pos = i; + } } if(pos < trimmed.len) { _rymlindent_nextline() this->Writer::_do_write(trimmed.sub(pos)); } - if(numnewlines_at_end) - { - this->Writer::_do_write('\n'); - --numnewlines_at_end; - } - } - for(size_t i = 0; i < numnewlines_at_end; ++i) - { - _rymlindent_nextline() - if(i+1 < numnewlines_at_end || explicit_key) - this->Writer::_do_write('\n'); } - if(explicit_key && !numnewlines_at_end) + for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i) + this->Writer::_do_write('\n'); + if(explicit_key) this->Writer::_do_write('\n'); } template -void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) +void Emitter::_write_scalar_squo(csubstr s, id_type ilevel) { size_t pos = 0; // tracks the last character that was already written this->Writer::_do_write('\''); @@ -713,18 +867,20 @@ void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) { if(s[i] == '\n') { - csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this char - this->Writer::_do_write('\n'); // write the character again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + this->Writer::_do_write(s.range(pos, i)); // write everything up to (excluding) this char + //_c4dbgpf("newline at {}. writing ~~~{}~~~", i, s.range(pos, i)); + i = _write_escaped_newlines(s, i); + //_c4dbgpf("newline --> {}", i); + if(i < s.len) + _rymlindent_nextline() pos = i+1; } else if(s[i] == '\'') { csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this char - this->Writer::_do_write('\''); // write the character again + //_c4dbgpf("squote at {}. writing ~~~{}~~~", i, sub); + this->Writer::_do_write(sub); // write everything up to (including) this squote + this->Writer::_do_write('\''); // write the squote again pos = i+1; } } @@ -735,233 +891,184 @@ void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) } template -void Emitter::_write_scalar_dquo(csubstr s, size_t ilevel) +void Emitter::_write_scalar_dquo(csubstr s, id_type ilevel) { size_t pos = 0; // tracks the last character that was already written this->Writer::_do_write('"'); for(size_t i = 0; i < s.len; ++i) { const char curr = s.str[i]; - if(curr == '"' || curr == '\\') + switch(curr) + { + case '"': + case '\\': { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write('\\'); // write the escape this->Writer::_do_write(curr); // write the char pos = i+1; + break; } - else if(s[i] == '\n') +#ifndef prefer_writing_newlines_as_double_newlines + case '\n': { - csubstr sub = s.range(pos, i+1); - this->Writer::_do_write(sub); // write everything up to (including) this newline - this->Writer::_do_write('\n'); // write the newline again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\n"); // write the escape pos = i+1; - if(i+1 < s.len) // escape leading whitespace after the newline + (void)ilevel; + break; + } +#else + case '\n': + { + // write everything up to (excluding) this newline + //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i)); + this->Writer::_do_write(s.range(pos, i)); + i = _write_escaped_newlines(s, i); + ++i; + pos = i; + // as for the next line... + if(i < s.len) { - const char next = s.str[i+1]; - if(next == ' ' || next == '\t') + _rymlindent_nextline() // indent the next line + // escape leading whitespace, and flush it + size_t first = s.first_not_of(" \t", i); + _c4dbgpf("@i={} first={} rem=[{}]~~~{}~~~", i, first, s.sub(i).len, s.sub(i)); + if(first > i) + { + if(first == npos) + first = s.len; + this->Writer::_do_write('\\'); + this->Writer::_do_write(s.range(i, first)); this->Writer::_do_write('\\'); + i = first-1; + pos = first; + } } + break; } - else if(curr == ' ' || curr == '\t') + // escape trailing whitespace before a newline + case ' ': + case '\t': { - // escape trailing whitespace before a newline - size_t next = s.first_not_of(" \t\r", i); - if(next != npos && s[next] == '\n') + const size_t next = s.first_not_of(" \t\r", i); + if(next != npos && s.str[next] == '\n') { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write('\\'); // escape the whitespace pos = i; } + break; } - else if(C4_UNLIKELY(curr == '\r')) +#endif + case '\r': { csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (excluding) this char this->Writer::_do_write("\\r"); // write the escaped char pos = i+1; + break; + } + case '\b': + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\b"); // write the escaped char + pos = i+1; + break; + } } } // write missing characters at the end of the string if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } + this->Writer::_do_write(s.sub(pos)); this->Writer::_do_write('"'); } template -void Emitter::_write_scalar_plain(csubstr s, size_t ilevel) +void Emitter::_write_scalar_plain(csubstr s, id_type ilevel) { + if(C4_UNLIKELY(ilevel == 0 && (s.begins_with("...") || s.begins_with("---")))) + { + _rymlindent_nextline() // indent the next line + ++ilevel; + } size_t pos = 0; // tracks the last character that was already written for(size_t i = 0; i < s.len; ++i) { const char curr = s.str[i]; if(curr == '\n') { - csubstr sub = s.range(pos, i+1); + csubstr sub = s.range(pos, i); this->Writer::_do_write(sub); // write everything up to (including) this newline - this->Writer::_do_write('\n'); // write the newline again - if(i + 1 < s.len) - _rymlindent_nextline() // indent the next line + i = _write_escaped_newlines(s, i); pos = i+1; + if(pos < s.len) + _rymlindent_nextline() // indent the next line } } // write missing characters at the end of the string if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } + this->Writer::_do_write(s.sub(pos)); } #undef _rymlindent_nextline template -void Emitter::_write_scalar(csubstr s, bool was_quoted) +void Emitter::_write_scalar_json_dquo(csubstr s) { - // this block of code needed to be moved to before the needs_quotes - // assignment to work around a g++ optimizer bug where (s.str != nullptr) - // was evaluated as true even if s.str was actually a nullptr (!!!) - if(s.len == size_t(0)) - { - if(was_quoted || s.str != nullptr) - this->Writer::_do_write("''"); - return; - } - - const bool needs_quotes = ( - was_quoted - || - ( - ( ! s.is_number()) - && - ( - // has leading whitespace - // looks like reference or anchor - // would be treated as a directive - // see https://www.yaml.info/learn/quote.html#noplain - s.begins_with_any(" \n\t\r*&%@`") - || - s.begins_with("<<") - || - // has trailing whitespace - s.ends_with_any(" \n\t\r") - || - // has special chars - (s.first_of("#:-?,\n{}[]'\"") != npos) - ) - ) - ); - - if( ! needs_quotes) - { - this->Writer::_do_write(s); - } - else + size_t pos = 0; + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) { - const bool has_dquotes = s.first_of( '"') != npos; - const bool has_squotes = s.first_of('\'') != npos; - if(!has_squotes && has_dquotes) + switch(s.str[i]) { - this->Writer::_do_write('\''); - this->Writer::_do_write(s); - this->Writer::_do_write('\''); - } - else if(has_squotes && !has_dquotes) - { - RYML_ASSERT(s.count('\n') == 0); - this->Writer::_do_write('"'); - this->Writer::_do_write(s); - this->Writer::_do_write('"'); - } - else - { - _write_scalar_squo(s, /*FIXME FIXME FIXME*/0); + case '"': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\""); + pos = i + 1; + break; + case '\n': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\n"); + pos = i + 1; + break; + case '\t': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\t"); + pos = i + 1; + break; + case '\\': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\\"); + pos = i + 1; + break; + case '\r': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\r"); + pos = i + 1; + break; + case '\b': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\b"); + pos = i + 1; + break; + case '\f': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\f"); + pos = i + 1; + break; } } -} -template -void Emitter::_write_scalar_json(csubstr s, bool as_key, bool use_quotes) -{ - if((!use_quotes) - // json keys require quotes - && (!as_key) - && ( - // do not quote special cases - (s == "true" || s == "false" || s == "null") - || ( - // do not quote numbers - (s.is_number() - && ( - // quote integral numbers if they have a leading 0 - // https://github.com/biojppm/rapidyaml/issues/291 - (!(s.len > 1 && s.begins_with('0'))) - // do not quote reals with leading 0 - // https://github.com/biojppm/rapidyaml/issues/313 - || (s.find('.') != csubstr::npos) )) - ) - ) - ) - { - this->Writer::_do_write(s); - } - else + if(pos < s.len) { - size_t pos = 0; - this->Writer::_do_write('"'); - for(size_t i = 0; i < s.len; ++i) - { - switch(s.str[i]) - { - case '"': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\\""); - pos = i + 1; - break; - case '\n': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\n"); - pos = i + 1; - break; - case '\t': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\t"); - pos = i + 1; - break; - case '\\': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\\\"); - pos = i + 1; - break; - case '\r': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\r"); - pos = i + 1; - break; - case '\b': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\b"); - pos = i + 1; - break; - case '\f': - this->Writer ::_do_write(s.range(pos, i)); - this->Writer ::_do_write("\\f"); - pos = i + 1; - break; - } - } - if(pos < s.len) - { - csubstr sub = s.sub(pos); - this->Writer::_do_write(sub); - } - this->Writer::_do_write('"'); + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); } + this->Writer::_do_write('"'); } } // namespace yml diff --git a/3rdparty/rapidyaml/include/c4/yml/emit.hpp b/3rdparty/rapidyaml/include/c4/yml/emit.hpp index 85c4a098619698..879f5f5cf04a81 100644 --- a/3rdparty/rapidyaml/include/c4/yml/emit.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/emit.hpp @@ -15,13 +15,21 @@ #include "./node.hpp" #endif -#ifdef emit -#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120" -#endif #define RYML_DEPRECATE_EMIT \ - RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + RYML_DEPRECATED("use emit_yaml() instead. " \ + "See https://github.com/biojppm/rapidyaml/issues/120") #define RYML_DEPRECATE_EMITRS \ - RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + RYML_DEPRECATED("use emitrs_yaml() instead. " \ + "See https://github.com/biojppm/rapidyaml/issues/120") + +#ifdef emit +#error "emit is defined, likely from a Qt include. " \ + "This will cause a compilation error. " \ + "See https://github.com/biojppm/rapidyaml/issues/120" +#endif + + +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") //----------------------------------------------------------------------------- @@ -43,6 +51,15 @@ using EmitterOStream = Emitter>; using EmitterFile = Emitter; using EmitterBuf = Emitter; +namespace detail { +inline bool is_set_(ConstNodeRef n) { return n.tree() && (n.id() != NONE); } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + /** Specifies the type of content to emit */ typedef enum { EMIT_YAML = 0, ///< emit YAML @@ -50,6 +67,59 @@ typedef enum { } EmitType_e; +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** A lightweight object containing options to be used when emitting. */ +struct EmitOptions +{ + typedef enum : uint32_t { + DEFAULT_FLAGS = 0, + JSON_ERR_ON_TAG = 1 << 0, + JSON_ERR_ON_ANCHOR = 1 << 1, + _JSON_ERR_MASK = JSON_ERR_ON_TAG|JSON_ERR_ON_ANCHOR, + } EmitOptionFlags_e; + +public: + + /** @name option flags + * + * @{ */ + C4_ALWAYS_INLINE EmitOptionFlags_e json_error_flags() const noexcept { return m_option_flags; } + EmitOptions& json_error_flags(EmitOptionFlags_e d) noexcept { m_option_flags = (EmitOptionFlags_e)(d & _JSON_ERR_MASK); return *this; } + /** @} */ + +public: + + /** @name max depth for the emitted tree + * + * This makes the emitter fail when emitting trees exceeding the + * max_depth. + * + * @{ */ + C4_ALWAYS_INLINE id_type max_depth() const noexcept { return m_max_depth; } + EmitOptions& max_depth(id_type d) noexcept { m_max_depth = d; return *this; } + static constexpr const id_type max_depth_default = 64; + /** @} */ + +public: + + bool operator== (const EmitOptions& that) const noexcept + { + return m_max_depth == that.m_max_depth && + m_option_flags == that.m_option_flags; + } + +private: + + /** @cond dev */ + id_type m_max_depth{max_depth_default}; + EmitOptionFlags_e m_option_flags{DEFAULT_FLAGS}; + /** @endcond */ +}; + + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -61,12 +131,21 @@ class Emitter : public Writer { public: - /** Construct the emitter and its internal Writer state. Every - * parameter is forwarded to the constructor of the writer. */ + /** Construct the emitter and its internal Writer state, using default emit options. + * @param args arguments to be forwarded to the constructor of the writer. + * */ template - Emitter(Args &&...args) : Writer(std::forward(args)...), m_tree() {} - /** emit! + Emitter(Args &&...args) : Writer(std::forward(args)...), m_tree(), m_opts(), m_flow(false) {} + /** Construct the emitter and its internal Writer state. + * + * @param opts EmitOptions + * @param args arguments to be forwarded to the constructor of the writer. + * */ + template + Emitter(EmitOptions const& opts, Args &&...args) : Writer(std::forward(args)...), m_tree(), m_opts(opts), m_flow(false) {} + + /** emit! * * When writing to a buffer, returns a substr of the emitted YAML. * If the given buffer has insufficient space, the returned substr @@ -83,36 +162,63 @@ class Emitter : public Writer * @param error_on_excess when true, an error is raised when the * output buffer is too small for the emitted YAML/JSON * */ - substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + substr emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess); /** emit starting at the root node */ - substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true); - /** emit the given node */ - substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true); + substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true) + { + if(t.empty()) + return {}; + return this->emit_as(type, t, t.root_id(), error_on_excess); + } + /** emit starting at the given node */ + substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true) + { + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return this->emit_as(type, *n.tree(), n.id(), error_on_excess); + } + +public: + + /** get the emit options for this object */ + EmitOptions const& options() const noexcept { return m_opts; } + + /** set the max depth for emitted trees (to prevent a stack overflow) */ + void max_depth(id_type max_depth) noexcept { m_opts.max_depth(max_depth); } + /** get the max depth for emitted trees (to prevent a stack overflow) */ + id_type max_depth() const noexcept { return m_opts.max_depth(); } private: Tree const* C4_RESTRICT m_tree; + EmitOptions m_opts; + bool m_flow; + +private: - void _emit_yaml(size_t id); - void _do_visit_flow_sl(size_t id, size_t ilevel=0); - void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1); - void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1); - void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent); - void _do_visit_json(size_t id); + void _emit_yaml(id_type id); + void _do_visit_flow_sl(id_type id, id_type depth, id_type ilevel=0); + void _do_visit_flow_ml(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1); + void _do_visit_block(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1); + void _do_visit_block_container(id_type id, id_type depth, id_type next_level, bool do_indent); + void _do_visit_json(id_type id, id_type depth); private: - void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level); + void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type level); void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags); - void _write_doc(size_t id); - void _write_scalar(csubstr s, bool was_quoted); - void _write_scalar_json(csubstr s, bool as_key, bool was_quoted); - void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false); - void _write_scalar_folded(csubstr s, size_t level, bool as_key); - void _write_scalar_squo(csubstr s, size_t level); - void _write_scalar_dquo(csubstr s, size_t level); - void _write_scalar_plain(csubstr s, size_t level); + void _write_doc(id_type id); + void _write_scalar_json_dquo(csubstr s); + void _write_scalar_literal(csubstr s, id_type level, bool as_key); + void _write_scalar_folded(csubstr s, id_type level, bool as_key); + void _write_scalar_squo(csubstr s, id_type level); + void _write_scalar_dquo(csubstr s, id_type level); + void _write_scalar_plain(csubstr s, id_type level); + + size_t _write_escaped_newlines(csubstr s, size_t i); + size_t _write_indented_block(csubstr s, size_t i, id_type level); void _write_tag(csubstr tag) { @@ -122,18 +228,28 @@ class Emitter : public Writer } enum : type_bits { - _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), - _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE, + _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE, _keysc_json = (KEY) | ~(VAL), _valsc_json = ~(KEY) | (VAL), }; - C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); } - C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); } + C4_ALWAYS_INLINE void _writek(id_type id, id_type level) { _write(m_tree->keysc(id), (m_tree->_p(id)->m_type.type & ~_valsc), level); } + C4_ALWAYS_INLINE void _writev(id_type id, id_type level) { _write(m_tree->valsc(id), (m_tree->_p(id)->m_type.type & ~_keysc), level); } - C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } - C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + C4_ALWAYS_INLINE void _writek_json(id_type id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } + C4_ALWAYS_INLINE void _writev_json(id_type id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + void _indent(id_type level, bool enabled) + { + if(enabled) + this->Writer::_do_write(' ', 2u * (size_t)level); + } + void _indent(id_type level) + { + if(!m_flow) + this->Writer::_do_write(' ', 2u * (size_t)level); + } }; @@ -147,33 +263,59 @@ class Emitter : public Writer */ -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. */ -inline size_t emit_yaml(Tree const& t, size_t id, FILE *f) +// emit from tree and node id ----------------------- + +/** (1) emit YAML to the given file, starting at the given node. A null + * file defaults to stdout. Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, FILE *f) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ +inline size_t emit_yaml(Tree const& t, id_type id, FILE *f) { EmitterFile em(f); return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; } -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. */ -inline size_t emit_json(Tree const& t, size_t id, FILE *f) +/** (1) emit JSON to the given file, starting at the given node. A null + * file defaults to stdout. Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, id_type id, EmitOptions const& opts, FILE *f) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ +inline size_t emit_json(Tree const& t, id_type id, FILE *f) { EmitterFile em(f); return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; } -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +// emit from root ------------------------- + +/** (1) emit YAML to the given file, starting at the root node. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, EmitOptions const& opts, FILE *f=nullptr) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_yaml(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; } -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +/** (1) emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, EmitOptions const& opts, FILE *f=nullptr) +{ + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_json(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); @@ -181,19 +323,39 @@ inline size_t emit_json(Tree const& t, FILE *f=nullptr) } -/** emit YAML to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +// emit from ConstNodeRef ------------------------ + +/** (1) emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr) +{ + if(!detail::is_set_(r)) + return {}; + EmitterFile em(opts, f); + return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr) { + if(!detail::is_set_(r)) + return {}; EmitterFile em(f); return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; } -/** emit JSON to the given file. A null file defaults to stdout. - * Return the number of bytes written. - * @overload */ +/** (1) emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr) +{ + if(!detail::is_set_(r)) + return {}; + EmitterFile em(opts, f); + return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; +} +/** (2) like (1), but use default emit options */ inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) { + if(!detail::is_set_(r)) + return {}; EmitterFile em(f); return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; } @@ -208,24 +370,6 @@ inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) * @{ */ -/** mark a tree or node to be emitted as json when using @ref operator<< . For example: - * - * ```cpp - * Tree t = parse_in_arena("{foo: bar}"); - * std::cout << t; // emits YAML - * std::cout << as_json(t); // emits JSON - * ``` - * - * @see @ref operator<< */ -struct as_json -{ - Tree const* tree; - size_t node; - as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} - as_json(Tree const& t, size_t id) : tree(&t), node(id) {} - as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {} -}; - /** emit YAML to an STL-like ostream */ template inline OStream& operator<< (OStream& s, Tree const& t) @@ -240,20 +384,77 @@ inline OStream& operator<< (OStream& s, Tree const& t) template inline OStream& operator<< (OStream& s, ConstNodeRef const& n) { + if(!detail::is_set_(n)) + return s; EmitterOStream em(s); em.emit_as(EMIT_YAML, n); return s; } +/** mark a tree or node to be emitted as yaml when using @ref + * operator<<, with options. For example: + * + * ```cpp + * Tree t = parse_in_arena("{foo: bar}"); + * std::cout << t; // emits YAML + * std::cout << as_yaml(t); // emits YAML, same as above + * std::cout << as_yaml(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth + * ``` + * + * @see @ref operator<< */ +struct as_json +{ + Tree const* tree; + size_t node; + EmitOptions options; + as_json(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts) {} + as_json(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts) {} + as_json(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {} +}; + +/** mark a tree or node to be emitted as yaml when using @ref + * operator<< . For example: + * + * ```cpp + * Tree t = parse_in_arena("{foo: bar}"); + * std::cout << t; // emits YAML + * std::cout << as_json(t); // emits JSON + * std::cout << as_json(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth + * ``` + * + * @see @ref operator<< */ +struct as_yaml +{ + Tree const* tree; + size_t node; + EmitOptions options; + as_yaml(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts) {} + as_yaml(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts) {} + as_yaml(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {} +}; + /** emit json to an STL-like stream */ template inline OStream& operator<< (OStream& s, as_json const& j) { - EmitterOStream em(s); + if(!j.tree || j.node == NONE) + return s; + EmitterOStream em(j.options, s); em.emit_as(EMIT_JSON, *j.tree, j.node, true); return s; } +/** emit yaml to an STL-like stream */ +template +inline OStream& operator<< (OStream& s, as_yaml const& y) +{ + if(!y.tree || y.node == NONE) + return s; + EmitterOStream em(y.options, s); + em.emit_as(EMIT_YAML, *y.tree, y.node, true); + return s; +} + /** @} */ @@ -264,35 +465,85 @@ inline OStream& operator<< (OStream& s, as_json const& j) * @{ */ -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +// emit from tree and node id ----------------------- + +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param t the tree to emit. + * @param id the node where to start emitting. + * @param opts emit options. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ -inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true) + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_YAML, t, id, error_on_excess); +} +/** (2) like (1), but use default emit options */ +inline substr emit_yaml(Tree const& t, id_type id, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); return em.emit_as(EMIT_YAML, t, id, error_on_excess); } -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param t the tree to emit. + * @param id the node where to start emitting. + * @param opts emit options. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ -inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, t, id, error_on_excess); +} +/** (2) like (1), but use default emit options */ +inline substr emit_json(Tree const& t, id_type id, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); return em.emit_as(EMIT_JSON, t, id, error_on_excess); } -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +// emit from root ------------------------- + +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param t the tree; will be emitted from the root node. + * @param buf the output buffer. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_YAML, t, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); return em.emit_as(EMIT_YAML, t, error_on_excess); } -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param t the tree; will be emitted from the root node. + * @param buf the output buffer. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, t, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); @@ -300,21 +551,51 @@ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) } -/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. +// emit from ConstNodeRef ------------------------ + +/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param r the starting node. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload - */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + if(!detail::is_set_(r)) + return {}; + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_YAML, r, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { + if(!detail::is_set_(r)) + return {}; EmitterBuf em(buf); return em.emit_as(EMIT_YAML, r, error_on_excess); } -/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. +/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param r the starting node. + * @param buf the output buffer. + * @param opts emit options. * @param error_on_excess Raise an error if the space in the buffer is insufficient. - * @overload - */ + * @return a substr trimmed to the result in the output buffer. If the buffer is + * insufficient (when error_on_excess is false), the string pointer of the + * result will be set to null, and the length will report the required buffer size. */ +inline substr emit_json(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true) +{ + if(!detail::is_set_(r)) + return {}; + EmitterBuf em(opts, buf); + return em.emit_as(EMIT_JSON, r, error_on_excess); +} +/** (2) like (1), but use default emit options */ inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { + if(!detail::is_set_(r)) + return {}; EmitterBuf em(buf); return em.emit_as(EMIT_JSON, r, error_on_excess); } @@ -322,141 +603,225 @@ inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess= //----------------------------------------------------------------------------- -/** emit+resize: emit YAML to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted YAML. */ +/** @defgroup doc_emit_to_container Emit to resizeable container + * + * @{ + */ + +// emit from tree and node id --------------------------- + +/** (1) emit+resize: emit YAML to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted YAML. If @p append is + * set to true, the emitted YAML is appended at the end of the container. + * + * @return a substr trimmed to the emitted YAML (excluding the initial contents, when appending) */ template -substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { - substr buf = to_substr(*cont); - substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false); + size_t startpos = append ? cont->size() : 0u; + cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail + substr buf = to_substr(*cont).sub(startpos); + substr ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/false); if(ret.str == nullptr && ret.len > 0) { - cont->resize(ret.len); - buf = to_substr(*cont); - ret = emit_yaml(t, id, buf, /*error_on_excess*/true); + cont->resize(startpos + ret.len); + buf = to_substr(*cont).sub(startpos); + ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/true); + } + else + { + cont->resize(startpos + ret.len); } return ret; } -/** emit+resize: emit JSON to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted JSON. */ +/** (2) like (1), but use default emit options */ template -substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false) { - substr buf = to_substr(*cont); - substr ret = emit_json(t, id, buf, /*error_on_excess*/false); + return emitrs_yaml(t, id, EmitOptions{}, cont, append); +} +/** (1) emit+resize: emit JSON to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted JSON. If @p append is + * set to true, the emitted YAML is appended at the end of the container. + * + * @return a substr trimmed to the emitted JSON (excluding the initial contents, when appending) */ +template +substr emitrs_json(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) +{ + const size_t startpos = append ? cont->size() : 0u; + cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail + substr buf = to_substr(*cont).sub(startpos); + EmitterBuf em(opts, buf); + substr ret = emit_json(t, id, opts, buf, /*error_on_excess*/false); if(ret.str == nullptr && ret.len > 0) { - cont->resize(ret.len); - buf = to_substr(*cont); - ret = emit_json(t, id, buf, /*error_on_excess*/true); + cont->resize(startpos + ret.len); + buf = to_substr(*cont).sub(startpos); + ret = emit_json(t, id, opts, buf, /*error_on_excess*/true); + } + else + { + cont->resize(startpos + ret.len); } return ret; } +/** (2) like (1), but use default emit options */ +template +substr emitrs_json(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false) +{ + return emitrs_json(t, id, EmitOptions{}, cont, append); +} -/** emit+resize: emit YAML to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(Tree const& t, size_t id) +CharOwningContainer emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts={}) { CharOwningContainer c; - emitrs_yaml(t, id, &c); + emitrs_yaml(t, id, opts, &c); return c; } -/** emit+resize: emit JSON to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted JSON. */ +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_json(Tree const& t, size_t id) +CharOwningContainer emitrs_json(Tree const& t, id_type id, EmitOptions const& opts={}) { CharOwningContainer c; - emitrs_json(t, id, &c); + emitrs_json(t, id, opts, &c); return c; } -/** emit+resize: YAML to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted YAML. */ +// emit from root ------------------------- + +/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted YAML. + * @return a substr trimmed to the new emitted contents. */ +template +substr emitrs_yaml(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) +{ + if(t.empty()) + return {}; + return emitrs_yaml(t, t.root_id(), opts, cont, append); +} +/** (2) like (1), but use default emit options */ +template +substr emitrs_yaml(Tree const& t, CharOwningContainer * cont, bool append=false) +{ + if(t.empty()) + return {}; + return emitrs_yaml(t, t.root_id(), EmitOptions{}, cont, append); +} +/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like + * container, resizing it as needed to fit the emitted JSON. + * @return a substr trimmed to the new emitted contents. */ template -substr emitrs_yaml(Tree const& t, CharOwningContainer * cont) +substr emitrs_json(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { if(t.empty()) return {}; - return emitrs_yaml(t, t.root_id(), cont); + return emitrs_json(t, t.root_id(), opts, cont, append); } -/** emit+resize: JSON to the given `std::string`/`std::vector`-like - * container, resizing it as needed to fit the emitted JSON. */ +/** (2) like (1), but use default emit options */ template -substr emitrs_json(Tree const& t, CharOwningContainer * cont) +substr emitrs_json(Tree const& t, CharOwningContainer * cont, bool append=false) { if(t.empty()) return {}; - return emitrs_json(t, t.root_id(), cont); + return emitrs_json(t, t.root_id(), EmitOptions{}, cont, append); } -/** emit+resize: YAML to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(Tree const& t) +CharOwningContainer emitrs_yaml(Tree const& t, EmitOptions const& opts={}) { CharOwningContainer c; if(t.empty()) return c; - emitrs_yaml(t, t.root_id(), &c); + emitrs_yaml(t, t.root_id(), opts, &c); return c; } -/** emit+resize: JSON to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted JSON. */ +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_json(Tree const& t) +CharOwningContainer emitrs_json(Tree const& t, EmitOptions const& opts={}) { CharOwningContainer c; if(t.empty()) return c; - emitrs_json(t, t.root_id(), &c); + emitrs_json(t, t.root_id(), opts, &c); return c; } -/** emit+resize: YAML to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted YAML. */ +// emit from ConstNodeRef ------------------------ + + +/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like container, + * resizing it as needed to fit the emitted YAML. + * @return a substr trimmed to the new emitted contents */ template -substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont) +substr emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) { + if(!detail::is_set_(n)) + return {}; _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); - return emitrs_yaml(*n.tree(), n.id(), cont); + return emitrs_yaml(*n.tree(), n.id(), opts, cont, append); } -/** emit+resize: JSON to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted JSON. */ +/** (2) like (1), but use default emit options */ template -substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont) +substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false) { + if(!detail::is_set_(n)) + return {}; _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); - return emitrs_json(*n.tree(), n.id(), cont); + return emitrs_yaml(*n.tree(), n.id(), EmitOptions{}, cont, append); +} +/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like container, + * resizing it as needed to fit the emitted JSON. + * @return a substr trimmed to the new emitted contents */ +template +substr emitrs_json(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false) +{ + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_json(*n.tree(), n.id(), opts, cont, append); +} +/** (2) like (1), but use default emit options */ +template +substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false) +{ + if(!detail::is_set_(n)) + return {}; + _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); + return emitrs_json(*n.tree(), n.id(), EmitOptions{}, cont, append); } -/** emit+resize: YAML to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted YAML. */ +/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_yaml(ConstNodeRef const& n) +CharOwningContainer emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts={}) { + if(!detail::is_set_(n)) + return {}; _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); CharOwningContainer c; - emitrs_yaml(*n.tree(), n.id(), &c); + emitrs_yaml(*n.tree(), n.id(), opts, &c); return c; } -/** emit+resize: JSON to the given `std::string`/`std::vector`-like container, - * resizing it as needed to fit the emitted JSON. */ +/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */ template -CharOwningContainer emitrs_json(ConstNodeRef const& n) +CharOwningContainer emitrs_json(ConstNodeRef const& n, EmitOptions const& opts={}) { + if(!detail::is_set_(n)) + return {}; _RYML_CB_CHECK(n.tree()->callbacks(), n.readable()); CharOwningContainer c; - emitrs_json(*n.tree(), n.id(), &c); + emitrs_json(*n.tree(), n.id(), opts, &c); return c; } + /** @} */ @@ -464,7 +829,7 @@ CharOwningContainer emitrs_json(ConstNodeRef const& n) /** @cond dev */ -RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f) +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, id_type id, FILE *f) { return emit_yaml(t, id, f); } @@ -477,7 +842,7 @@ RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) return emit_yaml(r, f); } -RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, id_type id, substr buf, bool error_on_excess=true) { return emit_yaml(t, id, buf, error_on_excess); } @@ -491,12 +856,12 @@ RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool e } template -RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, id_type id, CharOwningContainer * cont) { return emitrs_yaml(t, id, cont); } template -RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id) +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, id_type id) { return emitrs_yaml(t, id); } @@ -526,6 +891,8 @@ RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) } // namespace yml } // namespace c4 +C4_SUPPRESS_WARNING_GCC_CLANG_POP + #undef RYML_DEPRECATE_EMIT #undef RYML_DEPRECATE_EMITRS diff --git a/3rdparty/rapidyaml/include/c4/yml/event_handler_stack.hpp b/3rdparty/rapidyaml/include/c4/yml/event_handler_stack.hpp new file mode 100644 index 00000000000000..30e96953003e00 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/event_handler_stack.hpp @@ -0,0 +1,186 @@ +#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_ +#define _C4_YML_EVENT_HANDLER_STACK_HPP_ + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +#include "c4/yml/detail/stack.hpp" +#endif + +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#include "c4/yml/detail/parser_dbg.hpp" +#endif + +#ifndef _C4_YML_PARSER_STATE_HPP_ +#include "c4/yml/parser_state.hpp" +#endif + +#ifdef RYML_DBG +#ifndef _C4_YML_DETAIL_PRINT_HPP_ +#include "c4/yml/detail/print.hpp" +#endif +#endif + +namespace c4 { +namespace yml { + +/** @addtogroup doc_event_handlers + * @{ */ + +namespace detail { +using pfn_relocate_arena = void (*)(void*, csubstr prev_arena, substr next_arena); +} // detail + +/** Use this class a base of implementations of event handler to + * simplify the stack logic. */ +template +struct EventHandlerStack +{ + static_assert(std::is_base_of::value, + "ParserState must be a base of HandlerState"); + + using state = HandlerState; + using pfn_relocate_arena = detail::pfn_relocate_arena; + +public: + + detail::stack m_stack; + state *C4_RESTRICT m_curr; ///< current stack level: top of the stack. cached here for easier access. + state *C4_RESTRICT m_parent; ///< parent of the current stack level. + pfn_relocate_arena m_relocate_arena; ///< callback when the arena gets relocated + void * m_relocate_arena_data; + +protected: + + EventHandlerStack() : m_stack(), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {} + EventHandlerStack(Callbacks const& cb) : m_stack(cb), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {} + +protected: + + void _stack_start_parse(const char *filename, pfn_relocate_arena relocate_arena, void *relocate_arena_data) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_curr != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena_data != nullptr); + m_curr->start_parse(filename, m_curr->node_id); + m_relocate_arena = relocate_arena; + m_relocate_arena_data = relocate_arena_data; + } + + void _stack_finish_parse() + { + } + +protected: + + void _stack_reset_root() + { + m_stack.clear(); + m_stack.push({}); + m_parent = nullptr; + m_curr = &m_stack.top(); + } + + void _stack_reset_non_root() + { + m_stack.clear(); + m_stack.push({}); // parent + m_stack.push({}); // node + m_parent = &m_stack.top(1); + m_curr = &m_stack.top(); + } + + void _stack_push() + { + m_stack.push_top(); + m_parent = &m_stack.top(1); // don't use m_curr. watch out for relocations inside the prev push + m_curr = &m_stack.top(); + m_curr->reset_after_push(); + } + + void _stack_pop() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); + m_parent->reset_before_pop(*m_curr); + m_stack.pop(); + m_parent = m_stack.size() > 1 ? &m_stack.top(1) : nullptr; + m_curr = &m_stack.top(); + #ifdef RYML_DBG + if(m_parent) + _c4dbgpf("popped! top is now node={} (parent={})", m_curr->node_id, m_parent->node_id); + else + _c4dbgpf("popped! top is now node={} @ ROOT", m_curr->node_id); + #endif + } + +protected: + + // undefined at the end + #define _has_any_(bits) (static_cast(this)->template _has_any__()) + + bool _stack_should_push_on_begin_doc() const + { + const bool is_root = (m_stack.size() == 1u); + return is_root && (_has_any_(DOC|VAL|MAP|SEQ) || m_curr->has_children); + } + + bool _stack_should_pop_on_end_doc() const + { + const bool is_root = (m_stack.size() == 1u); + return !is_root && _has_any_(DOC); + } + +protected: + + void _stack_relocate_to_new_arena(csubstr prev, substr curr) + { + for(state &st : m_stack) + { + if(st.line_contents.rem.is_sub(prev)) + st.line_contents.rem = _stack_relocate_to_new_arena(st.line_contents.rem, prev, curr); + if(st.line_contents.full.is_sub(prev)) + st.line_contents.full = _stack_relocate_to_new_arena(st.line_contents.full, prev, curr); + if(st.line_contents.stripped.is_sub(prev)) + st.line_contents.stripped = _stack_relocate_to_new_arena(st.line_contents.stripped, prev, curr); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena_data != nullptr); + m_relocate_arena(m_relocate_arena_data, prev, curr); + } + + substr _stack_relocate_to_new_arena(csubstr s, csubstr prev, substr curr) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, prev.is_super(s)); + auto pos = s.str - prev.str; + substr out = {curr.str + pos, s.len}; + _RYML_CB_ASSERT(m_stack.m_callbacks, curr.is_super(out)); + return out; + } + +public: + + /** Check whether the current parse tokens are trailing on the + * previous doc, and raise an error if they are. This function is + * called by the parse engine (not the event handler) before a doc + * is started. */ + void check_trailing_doc_token() const + { + const bool is_root = (m_stack.size() == 1u); + const bool isndoc = (m_curr->flags & NDOC) != 0; + const bool suspicious = _has_any_(MAP|SEQ|VAL); + _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_curr->node_id, is_root, suspicious, isndoc); + if((is_root || _has_any_(DOC)) && suspicious && !isndoc) + _RYML_CB_ERR_(m_stack.m_callbacks, "parse error", m_curr->pos); + } + +protected: + + #undef _has_any_ + +}; + +/** @} */ + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_EVENT_HANDLER_STACK_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/event_handler_tree.hpp b/3rdparty/rapidyaml/include/c4/yml/event_handler_tree.hpp new file mode 100644 index 00000000000000..25369df8ef30b3 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/event_handler_tree.hpp @@ -0,0 +1,754 @@ +#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_ +#define _C4_YML_EVENT_HANDLER_TREE_HPP_ + +#ifndef _C4_YML_TREE_HPP_ +#include "c4/yml/tree.hpp" +#endif + +#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_ +#include "c4/yml/event_handler_stack.hpp" +#endif + +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable code + +namespace c4 { +namespace yml { + +/** @addtogroup doc_event_handlers + * @{ */ + + +/** The stack state needed specifically by @ref EventHandlerTree */ +struct EventHandlerTreeState : public ParserState +{ + NodeData *tr_data; +}; + + +/** The event handler to create a ryml @ref Tree. See the + * documentation for @ref doc_event_handlers, which has important + * notes about the event model used by rapidyaml. */ +struct EventHandlerTree : public EventHandlerStack +{ + + /** @name types + * @{ */ + + using state = EventHandlerTreeState; + + /** @} */ + +public: + + /** @cond dev */ + Tree *C4_RESTRICT m_tree; + id_type m_id; + size_t m_num_directives; + bool m_yaml_directive; + + #if RYML_DBG + #define _enable_(bits) _enable__(); _c4dbgpf("node[{}]: enable {}", m_curr->node_id, #bits) + #define _disable_(bits) _disable__(); _c4dbgpf("node[{}]: disable {}", m_curr->node_id, #bits) + #else + #define _enable_(bits) _enable__() + #define _disable_(bits) _disable__() + #endif + #define _has_any_(bits) _has_any__() + /** @endcond */ + +public: + + /** @name construction and resetting + * @{ */ + + EventHandlerTree() : EventHandlerStack(), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {} + EventHandlerTree(Callbacks const& cb) : EventHandlerStack(cb), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {} + EventHandlerTree(Tree *tree, id_type id) : EventHandlerStack(tree->callbacks()), m_tree(tree), m_id(id), m_num_directives(), m_yaml_directive() + { + reset(tree, id); + } + + void reset(Tree *tree, id_type id) + { + if(C4_UNLIKELY(!tree)) + _RYML_CB_ERR(m_stack.m_callbacks, "null tree"); + if(C4_UNLIKELY(id >= tree->capacity())) + _RYML_CB_ERR(tree->callbacks(), "invalid node"); + if(C4_UNLIKELY(!tree->is_root(id))) + if(C4_UNLIKELY(tree->is_map(tree->parent(id)))) + if(C4_UNLIKELY(!tree->has_key(id))) + _RYML_CB_ERR(tree->callbacks(), "destination node belongs to a map and has no key"); + m_tree = tree; + m_id = id; + if(m_tree->is_root(id)) + { + _stack_reset_root(); + _reset_parser_state(m_curr, id, m_tree->root_id()); + } + else + { + _stack_reset_non_root(); + _reset_parser_state(m_parent, id, m_tree->parent(id)); + _reset_parser_state(m_curr, id, id); + } + m_num_directives = 0; + m_yaml_directive = false; + } + + /** @} */ + +public: + + /** @name parse events + * @{ */ + + void start_parse(const char* filename, detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr); + this->_stack_start_parse(filename, relocate_arena, relocate_arena_data); + } + + void finish_parse() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr); + if(m_num_directives && !m_tree->is_stream(m_tree->root_id())) + _RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {}); + this->_stack_finish_parse(); + /* This pointer is temporary. Remember that: + * + * - this handler object may be held by the user + * - it may be used with a temporary tree inside the parse function + * - when the parse function returns the temporary tree, its address + * will change + * + * As a result, the user could try to read the tree from m_tree, and + * end up reading the stale temporary object. + * + * So it is better to clear it here; then the user will get an obvious + * segfault if reading from m_tree. */ + m_tree = nullptr; + } + + void cancel_parse() + { + m_tree = nullptr; + } + + /** @} */ + +public: + + /** @name YAML stream events */ + /** @{ */ + + C4_ALWAYS_INLINE void begin_stream() const noexcept { /*nothing to do*/ } + + C4_ALWAYS_INLINE void end_stream() const noexcept { /*nothing to do*/ } + + /** @} */ + +public: + + /** @name YAML document events */ + /** @{ */ + + /** implicit doc start (without ---) */ + void begin_doc() + { + _c4dbgp("begin_doc"); + if(_stack_should_push_on_begin_doc()) + { + _c4dbgp("push!"); + _set_root_as_stream(); + _push(); + _enable_(DOC); + } + } + /** implicit doc end (without ...) */ + void end_doc() + { + _c4dbgp("end_doc"); + if(_stack_should_pop_on_end_doc()) + { + _remove_speculative(); + _c4dbgp("pop!"); + _pop(); + } + } + + /** explicit doc start, with --- */ + void begin_doc_expl() + { + _c4dbgp("begin_doc_expl"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->root_id() == m_curr->node_id); + if(!m_tree->is_stream(m_tree->root_id())) //if(_should_push_on_begin_doc()) + { + _c4dbgp("ensure stream"); + _set_root_as_stream(); + id_type first = m_tree->first_child(m_tree->root_id()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_stream(m_tree->root_id())); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->num_children(m_tree->root_id()) == 1u); + if(m_tree->has_children(first) || m_tree->is_val(first)) + { + _c4dbgp("push!"); + _push(); + } + else + { + _c4dbgp("tweak"); + _push(); + _remove_speculative(); + m_curr->node_id = m_tree->last_child(m_tree->root_id()); + m_curr->tr_data = m_tree->_p(m_curr->node_id); + } + } + else + { + _c4dbgp("push!"); + _push(); + } + _enable_(DOC); + } + /** explicit doc end, with ... */ + void end_doc_expl() + { + _c4dbgp("end_doc_expl"); + _remove_speculative(); + if(_stack_should_pop_on_end_doc()) + { + _c4dbgp("pop!"); + _pop(); + } + m_yaml_directive = false; + } + + /** @} */ + +public: + + /** @name YAML map events */ + /** @{ */ + + void begin_map_key_flow() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + void begin_map_key_block() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + + void begin_map_val_flow() + { + _c4dbgpf("node[{}]: begin_map_val_flow", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(MAP|FLOW_SL); + _save_loc(); + _push(); + } + void begin_map_val_block() + { + _c4dbgpf("node[{}]: begin_map_val_block", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(MAP|BLOCK); + _save_loc(); + _push(); + } + + void end_map() + { + _pop(); + _c4dbgpf("node[{}]: end_map_val", m_curr->node_id); + } + + /** @} */ + +public: + + /** @name YAML seq events */ + /** @{ */ + + void begin_seq_key_flow() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + void begin_seq_key_block() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + + void begin_seq_val_flow() + { + _c4dbgpf("node[{}]: begin_seq_val_flow", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(SEQ|FLOW_SL); + _save_loc(); + _push(); + } + void begin_seq_val_block() + { + _c4dbgpf("node[{}]: begin_seq_val_block", m_curr->node_id); + _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL)); + _enable_(SEQ|BLOCK); + _save_loc(); + _push(); + } + + void end_seq() + { + _pop(); + _c4dbgpf("node[{}]: end_seq_val", m_curr->node_id); + } + + /** @} */ + +public: + + /** @name YAML structure events */ + /** @{ */ + + void add_sibling() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_children(m_parent->node_id)); + NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes + _set_state_(m_curr, m_tree->_append_child__unprotected(m_parent->node_id)); + if(prev != m_tree->m_buf) + _refresh_after_relocation(); + _c4dbgpf("node[{}]: added sibling={} prev={}", m_parent->node_id, m_curr->node_id, m_tree->prev_sibling(m_curr->node_id)); + } + + /** set the previous val as the first key of a new map, with flow style. + * + * See the documentation for @ref doc_event_handlers, which has + * important notes about this event. + */ + void actually_val_is_first_key_of_new_map_flow() + { + if(C4_UNLIKELY(m_tree->is_container(m_curr->node_id))) + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_parent->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_container(m_curr->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->has_key(m_curr->node_id)); + const NodeData tmp = _val2key_(*m_curr->tr_data); + _disable_(_VALMASK|VAL_STYLE); + m_curr->tr_data->m_val = {}; + begin_map_val_flow(); + m_curr->tr_data->m_type = tmp.m_type; + m_curr->tr_data->m_key = tmp.m_key; + } + + /** like its flow counterpart, but this function can only be + * called after the end of a flow-val at root or doc level. + * + * See the documentation for @ref doc_event_handlers, which has + * important notes about this event. + */ + void actually_val_is_first_key_of_new_map_block() + { + _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos); + } + + /** @} */ + +public: + + /** @name YAML scalar events */ + /** @{ */ + + + C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_PLAIN); + } + C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_PLAIN); + } + + + C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_DQUO); + } + C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_DQUO); + } + + + C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_SQUO); + } + C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_SQUO); + } + + + C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_LITERAL); + } + C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_LITERAL); + } + + + C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set key scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_key.scalar = scalar; + _enable_(KEY|KEY_FOLDED); + } + C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar) noexcept + { + _c4dbgpf("node[{}]: set val scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast(scalar.str)); + m_curr->tr_data->m_val.scalar = scalar; + _enable_(VAL|VAL_FOLDED); + } + + + C4_ALWAYS_INLINE void mark_key_scalar_unfiltered() noexcept + { + _enable_(KEY_UNFILT); + } + C4_ALWAYS_INLINE void mark_val_scalar_unfiltered() noexcept + { + _enable_(VAL_UNFILT); + } + + /** @} */ + +public: + + /** @name YAML anchor/reference events */ + /** @{ */ + + void set_key_anchor(csubstr anchor) + { + _c4dbgpf("node[{}]: set key anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(KEYREF))) + _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); + _enable_(KEYANCH); + m_curr->tr_data->m_key.anchor = anchor; + } + void set_val_anchor(csubstr anchor) + { + _c4dbgpf("node[{}]: set val anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(VALREF))) + _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&')); + _enable_(VALANCH); + m_curr->tr_data->m_val.anchor = anchor; + } + + void set_key_ref(csubstr ref) + { + _c4dbgpf("node[{}]: set key ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(KEYANCH))) + _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); + _enable_(KEY|KEYREF); + m_curr->tr_data->m_key.anchor = ref.sub(1); + m_curr->tr_data->m_key.scalar = ref; + } + void set_val_ref(csubstr ref) + { + _c4dbgpf("node[{}]: set val ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + if(C4_UNLIKELY(_has_any_(VALANCH))) + _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos); + _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*')); + _enable_(VAL|VALREF); + m_curr->tr_data->m_val.anchor = ref.sub(1); + m_curr->tr_data->m_val.scalar = ref; + } + + /** @} */ + +public: + + /** @name YAML tag events */ + /** @{ */ + + void set_key_tag(csubstr tag) noexcept + { + _c4dbgpf("node[{}]: set key tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag); + _enable_(KEYTAG); + m_curr->tr_data->m_key.tag = tag; + } + void set_val_tag(csubstr tag) noexcept + { + _c4dbgpf("node[{}]: set val tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag); + _enable_(VALTAG); + m_curr->tr_data->m_val.tag = tag; + } + + /** @} */ + +public: + + /** @name YAML directive events */ + /** @{ */ + + C4_NO_INLINE void add_directive(csubstr directive) + { + _c4dbgpf("% directive! {}", directive); + _RYML_CB_ASSERT(m_tree->callbacks(), directive.begins_with('%')); + if(directive.begins_with("%TAG")) + { + if(C4_UNLIKELY(!m_tree->add_tag_directive(directive))) + _RYML_CB_ERR_(m_stack.m_callbacks, "failed to add directive", m_curr->pos); + } + else if(directive.begins_with("%YAML")) + { + _c4dbgpf("%YAML directive! ignoring...: {}", directive); + if(C4_UNLIKELY(m_yaml_directive)) + _RYML_CB_ERR_(m_stack.m_callbacks, "multiple yaml directives", m_curr->pos); + m_yaml_directive = true; + } + else + { + _c4dbgpf("unknown directive! ignoring... {}", directive); + } + ++m_num_directives; + } + + /** @} */ + +public: + + /** @name arena functions */ + /** @{ */ + + substr alloc_arena(size_t len) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + csubstr prev = m_tree->arena(); + substr out = m_tree->alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + _stack_relocate_to_new_arena(prev, curr); + return out; + } + + substr alloc_arena(size_t len, substr *relocated) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + csubstr prev = m_tree->arena(); + if(!prev.is_super(*relocated)) + return alloc_arena(len); + substr out = alloc_arena(len); + substr curr = m_tree->arena(); + if(curr.str != prev.str) + *relocated = _stack_relocate_to_new_arena(*relocated, prev, curr); + return out; + } + + /** @} */ + +public: + + /** @cond dev */ + void _reset_parser_state(state* st, id_type parse_root, id_type node) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _set_state_(st, node); + const NodeType type = m_tree->type(node); + #ifdef RYML_DBG + char flagbuf[80]; + _c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif + if(type == NOTYPE) + { + _c4dbgpf("node[{}] is notype", node); + if(m_tree->is_root(parse_root)) + { + _c4dbgpf("node[{}] is root", node); + st->flags |= RUNK|RTOP; + } + else + { + _c4dbgpf("node[{}] is not root. setting USTY", node); + st->flags |= USTY; + } + } + else if(type.is_map()) + { + _c4dbgpf("node[{}] is map", node); + st->flags |= RMAP|USTY; + } + else if(type.is_seq()) + { + _c4dbgpf("node[{}] is map", node); + st->flags |= RSEQ|USTY; + } + else if(type.has_key()) + { + _c4dbgpf("node[{}] has key. setting USTY", node); + st->flags |= USTY; + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "cannot append to node"); + } + if(type.is_doc()) + { + _c4dbgpf("node[{}] is doc", node); + st->flags |= RDOC; + } + #ifdef RYML_DBG + _c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags)); + #endif + } + + /** push a new parent, add a child to the new parent, and set the + * child as the current node */ + void _push() + { + _stack_push(); + NodeData const* prev = m_tree->m_buf; // watch out against relocation of the tree nodes + m_curr->node_id = m_tree->_append_child__unprotected(m_parent->node_id); + m_curr->tr_data = m_tree->_p(m_curr->node_id); + if(prev != m_tree->m_buf) + _refresh_after_relocation(); + _c4dbgpf("pushed! level={}. top is now node={} (parent={})", m_curr->level, m_curr->node_id, m_parent ? m_parent->node_id : NONE); + } + /** end the current scope */ + void _pop() + { + _remove_speculative_with_parent(); + _stack_pop(); + } + +public: + + template C4_HOT C4_ALWAYS_INLINE void _enable__() noexcept + { + m_curr->tr_data->m_type.type = static_cast(m_curr->tr_data->m_type.type | bits); + } + template C4_HOT C4_ALWAYS_INLINE void _disable__() noexcept + { + m_curr->tr_data->m_type.type = static_cast(m_curr->tr_data->m_type.type & (~bits)); + } + template C4_HOT C4_ALWAYS_INLINE bool _has_any__() const noexcept + { + return (m_curr->tr_data->m_type.type & bits) != 0; + } + +public: + + C4_ALWAYS_INLINE void _set_state_(state *C4_RESTRICT s, id_type id) noexcept + { + s->node_id = id; + s->tr_data = m_tree->_p(id); + } + void _refresh_after_relocation() + { + _c4dbgp("tree: refreshing stack data after tree data relocation"); + for(auto &st : m_stack) + st.tr_data = m_tree->_p(st.node_id); + } + + void _set_root_as_stream() + { + _c4dbgp("set root as stream"); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->root_id() == 0u); + _RYML_CB_ASSERT(m_tree->callbacks(), m_curr->node_id == 0u); + const bool hack = !m_tree->has_children(m_curr->node_id) && !m_tree->is_val(m_curr->node_id); + if(hack) + m_tree->_p(m_tree->root_id())->m_type.add(VAL); + m_tree->set_root_as_stream(); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_stream(m_tree->root_id())); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_children(m_tree->root_id())); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_doc(m_tree->first_child(m_tree->root_id()))); + if(hack) + m_tree->_p(m_tree->first_child(m_tree->root_id()))->m_type.rem(VAL); + _set_state_(m_curr, m_tree->root_id()); + } + + static NodeData _val2key_(NodeData const& C4_RESTRICT d) noexcept + { + NodeData r = d; + r.m_key = d.m_val; + r.m_val = {}; + r.m_type = d.m_type; + static_assert((_VALMASK >> 1u) == _KEYMASK, "required for this function to work"); + static_assert((VAL_STYLE >> 1u) == KEY_STYLE, "required for this function to work"); + r.m_type.type = ((d.m_type.type & (_VALMASK|VAL_STYLE)) >> 1u); + r.m_type.type = (r.m_type.type & ~(_VALMASK|VAL_STYLE)); + r.m_type.type = (r.m_type.type | KEY); + return r; + } + + void _remove_speculative() + { + _c4dbgp("remove speculative node"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0); + const id_type last_added = m_tree->size() - 1; + if(m_tree->has_parent(last_added)) + if(m_tree->_p(last_added)->m_type == NOTYPE) + m_tree->remove(last_added); + } + + void _remove_speculative_with_parent() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0); + const id_type last_added = m_tree->size() - 1; + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added)); + if(m_tree->_p(last_added)->m_type == NOTYPE) + { + _c4dbgpf("remove speculative node with parent. parent={} node={} parent(node)={}", m_parent->node_id, last_added, m_tree->parent(last_added)); + m_tree->remove(last_added); + } + } + + C4_ALWAYS_INLINE void _save_loc() + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree); + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0); + m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str; + } + +#undef _enable_ +#undef _disable_ +#undef _has_any_ + + /** @endcond */ +}; + +/** @} */ + +} // namespace yml +} // namespace c4 + +C4_SUPPRESS_WARNING_MSVC_POP + +#endif /* _C4_YML_EVENT_HANDLER_TREE_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/filter_processor.hpp b/3rdparty/rapidyaml/include/c4/yml/filter_processor.hpp new file mode 100644 index 00000000000000..8277d8fa6d5bb0 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/filter_processor.hpp @@ -0,0 +1,512 @@ +#ifndef _C4_YML_FILTER_PROCESSOR_HPP_ +#define _C4_YML_FILTER_PROCESSOR_HPP_ + +#include "c4/yml/common.hpp" + +#ifdef RYML_DBG +#include "c4/charconv.hpp" +#include "c4/yml/detail/parser_dbg.hpp" +#endif + +namespace c4 { +namespace yml { + +/** @defgroup doc_filter_processors Scalar filter processors + * + * These are internal classes used by @ref ParseEngine to parse the + * scalars; normally there is no reason for a user to be manually + * using these classes. + * + * @ingroup doc_parse */ +/** @{ */ + +//----------------------------------------------------------------------------- + +/** Filters an input string into a different output string */ +struct FilterProcessorSrcDst +{ + csubstr src; + substr dst; + size_t rpos; ///< read position + size_t wpos; ///< write position + + C4_ALWAYS_INLINE FilterProcessorSrcDst(csubstr src_, substr dst_) noexcept + : src(src_) + , dst(dst_) + , rpos(0) + , wpos(0) + { + RYML_ASSERT(!dst.overlaps(src)); + } + + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } + + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } + + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(dst.str, wpos <= dst.len ? wpos : dst.len); } + C4_ALWAYS_INLINE FilterResult result() const noexcept + { + FilterResult ret; + ret.str.str = wpos <= dst.len ? dst.str : nullptr; + ret.str.len = wpos; + return ret; + } + + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } + C4_ALWAYS_INLINE bool skipped_chars() const noexcept { return wpos != rpos; } + + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } + + C4_ALWAYS_INLINE void set_at(size_t pos, char c) noexcept + { + RYML_ASSERT(pos < wpos); + dst.str[pos] = c; + } + C4_ALWAYS_INLINE void set(char c) noexcept + { + if(wpos < dst.len) + dst.str[wpos] = c; + ++wpos; + } + C4_ALWAYS_INLINE void set(char c, size_t num) noexcept + { + RYML_ASSERT(num > 0); + if(wpos + num <= dst.len) + memset(dst.str + wpos, c, num); + wpos += num; + } + + C4_ALWAYS_INLINE void copy() noexcept + { + RYML_ASSERT(rpos < src.len); + if(wpos < dst.len) + dst.str[wpos] = src.str[rpos]; + ++wpos; + ++rpos; + } + C4_ALWAYS_INLINE void copy(size_t num) noexcept + { + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + if(wpos + num <= dst.len) + memcpy(dst.str + wpos, src.str + rpos, num); + wpos += num; + rpos += num; + } + + C4_ALWAYS_INLINE void translate_esc(char c) noexcept + { + if(wpos < dst.len) + dst.str[wpos] = c; + ++wpos; + rpos += 2; + } + C4_ALWAYS_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(rpos+nr <= src.len); + if(wpos+nw <= dst.len) + memcpy(dst.str + wpos, s, nw); + wpos += nw; + rpos += 1 + nr; + } + C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + translate_esc_bulk(s, nw, nr); + } +}; + + +//----------------------------------------------------------------------------- +// filter in place + +// debugging scaffold +/** @cond dev */ +#if defined(RYML_DBG) && 0 +#define _c4dbgip(...) _c4dbgpf(__VA_ARGS__) +#else +#define _c4dbgip(...) +#endif +/** @endcond */ + +/** Filters in place. While the result may be larger than the source, + * any extending happens only at the end of the string. Consequently, + * it's impossible for characters to be left unfiltered. + * + * @see FilterProcessorInplaceMidExtending */ +struct FilterProcessorInplaceEndExtending +{ + substr src; ///< the subject string + size_t wcap; ///< write capacity - the capacity of the subject string's buffer + size_t rpos; ///< read position + size_t wpos; ///< write position + + C4_ALWAYS_INLINE FilterProcessorInplaceEndExtending(substr src_, size_t wcap_) noexcept + : src(src_) + , wcap(wcap_) + , rpos(0) + , wpos(0) + { + RYML_ASSERT(wcap >= src.len); + } + + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } + + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } + + C4_ALWAYS_INLINE FilterResult result() const noexcept + { + _c4dbgip("inplace: wpos={} wcap={} small={}", wpos, wcap, wpos > rpos); + FilterResult ret; + ret.str.str = (wpos <= wcap) ? src.str : nullptr; + ret.str.len = wpos; + return ret; + } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); } + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } + + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } + + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } + + void set_at(size_t pos, char c) noexcept + { + RYML_ASSERT(pos < wpos); + const size_t save = wpos; + wpos = pos; + set(c); + wpos = save; + } + void set(char c) noexcept + { + if(wpos < wcap) // respect write-capacity + src.str[wpos] = c; + ++wpos; + } + void set(char c, size_t num) noexcept + { + RYML_ASSERT(num); + if(wpos + num <= wcap) // respect write-capacity + memset(src.str + wpos, c, num); + wpos += num; + } + + void copy() noexcept + { + RYML_ASSERT(wpos <= rpos); + RYML_ASSERT(rpos < src.len); + if(wpos < wcap) // respect write-capacity + src.str[wpos] = src.str[rpos]; + ++rpos; + ++wpos; + } + void copy(size_t num) noexcept + { + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + RYML_ASSERT(wpos <= rpos); + if(wpos + num <= wcap) // respect write-capacity + { + if(wpos + num <= rpos) // there is no overlap + memcpy(src.str + wpos, src.str + rpos, num); + else // there is overlap + memmove(src.str + wpos, src.str + rpos, num); + } + rpos += num; + wpos += num; + } + + void translate_esc(char c) noexcept + { + RYML_ASSERT(rpos + 2 <= src.len); + RYML_ASSERT(wpos <= rpos); + if(wpos < wcap) // respect write-capacity + src.str[wpos] = c; + rpos += 2; // add 1u to account for the escape character + ++wpos; + } + + void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(nw <= nr + 1u); + RYML_ASSERT(rpos+nr <= src.len); + RYML_ASSERT(wpos <= rpos); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + RYML_ASSERT(wpos_next <= rpos_next); + if(wpos_next <= wcap) + memcpy(src.str + wpos, s, nw); + rpos = rpos_next; + wpos = wpos_next; + } + + C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + translate_esc_bulk(s, nw, nr); + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** Filters in place. The result may be larger than the source, and + * extending may happen anywhere. As a result some characters may be + * left unfiltered when there is no slack in the buffer and the + * write-position would overlap the read-position. Consequently, it's + * possible for characters to be left unfiltered. In YAML, this + * happens only with double-quoted strings, and only with a small + * number of escape sequences such as `\L` which is substituted by three + * bytes. These escape sequences cause a call to translate_esc_extending() + * which is the only entry point to this unfiltered situation. + * + * @see FilterProcessorInplaceMidExtending */ +struct FilterProcessorInplaceMidExtending +{ + substr src; ///< the subject string + size_t wcap; ///< write capacity - the capacity of the subject string's buffer + size_t rpos; ///< read position + size_t wpos; ///< write position + size_t maxcap; ///< the max capacity needed for filtering the string. This may be larger than the final string size. + bool unfiltered_chars; ///< number of characters that were not added to wpos from lack of capacity + + C4_ALWAYS_INLINE FilterProcessorInplaceMidExtending(substr src_, size_t wcap_) noexcept + : src(src_) + , wcap(wcap_) + , rpos(0) + , wpos(0) + , maxcap(src.len) + , unfiltered_chars(false) + { + RYML_ASSERT(wcap >= src.len); + } + + C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; } + C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; } + C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); } + + C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; } + C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; } + + C4_ALWAYS_INLINE FilterResultExtending result() const noexcept + { + _c4dbgip("inplace: wpos={} wcap={} unfiltered={} maxcap={}", this->wpos, this->wcap, this->unfiltered_chars, this->maxcap); + FilterResultExtending ret; + ret.str.str = (wpos <= wcap && !unfiltered_chars) ? src.str : nullptr; + ret.str.len = wpos; + ret.reqlen = maxcap; + return ret; + } + C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); } + C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); } + + C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; } + C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; } + + C4_ALWAYS_INLINE void skip() noexcept { ++rpos; } + C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; } + + void set_at(size_t pos, char c) noexcept + { + RYML_ASSERT(pos < wpos); + const size_t save = wpos; + wpos = pos; + set(c); + wpos = save; + } + void set(char c) noexcept + { + if(wpos < wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) + src.str[wpos] = c; + } + else + { + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; + } + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; + } + void set(char c, size_t num) noexcept + { + RYML_ASSERT(num); + if(wpos + num <= wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) + memset(src.str + wpos, c, num); + } + else + { + _c4dbgip("inplace: add unwritten {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+num > maxcap ? wpos+num : maxcap)); + unfiltered_chars = true; + } + wpos += num; + maxcap = wpos > maxcap ? wpos : maxcap; + } + + void copy() noexcept + { + RYML_ASSERT(rpos < src.len); + if(wpos < wcap) // respect write-capacity + { + if((wpos < rpos) && !unfiltered_chars) // write only if wpos is behind rpos + src.str[wpos] = src.str[rpos]; + } + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; + } + ++rpos; + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; + } + void copy(size_t num) noexcept + { + RYML_ASSERT(num); + RYML_ASSERT(rpos+num <= src.len); + if(wpos + num <= wcap) // respect write-capacity + { + if((wpos < rpos) && !unfiltered_chars) // write only if wpos is behind rpos + { + if(wpos + num <= rpos) // there is no overlap + memcpy(src.str + wpos, src.str + rpos, num); + else // there is overlap + memmove(src.str + wpos, src.str + rpos, num); + } + } + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos : maxcap; + } + + void translate_esc(char c) noexcept + { + RYML_ASSERT(rpos + 2 <= src.len); + if(wpos < wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) + src.str[wpos] = c; + } + else + { + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap)); + unfiltered_chars = true; + } + rpos += 2; + ++wpos; + maxcap = wpos > maxcap ? wpos : maxcap; + } + + C4_NO_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(nr+1u >= nw); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + if(wpos_next <= wcap) // respect write-capacity + { + if((wpos <= rpos) && !unfiltered_chars) // write only if wpos is behind rpos + memcpy(src.str + wpos, s, nw); + } + else + { + _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={} (wpos={}{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos maxcap ? wpos : maxcap; + } + + C4_NO_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept + { + RYML_ASSERT(nw > 0); + RYML_ASSERT(nr > 0); + RYML_ASSERT(rpos+nr <= src.len); + const size_t wpos_next = wpos + nw; + const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character + if(wpos_next <= rpos_next) // read and write do not overlap. just do a vanilla copy. + { + if((wpos_next <= wcap) && !unfiltered_chars) + memcpy(src.str + wpos, s, nw); + rpos = rpos_next; + wpos = wpos_next; + maxcap = wpos > maxcap ? wpos : maxcap; + } + else // there is overlap. move the (to-be-read) string to the right. + { + const size_t excess = wpos_next - rpos_next; + RYML_ASSERT(wpos_next > rpos_next); + if(src.len + excess <= wcap) // ensure we do not go past the end + { + RYML_ASSERT(rpos+nr+excess <= src.len); + if(wpos_next <= wcap) + { + if(!unfiltered_chars) + { + memmove(src.str + wpos_next, src.str + rpos_next, src.len - rpos_next); + memcpy(src.str + wpos, s, nw); + } + rpos = wpos_next; // wpos, not rpos + } + else + { + rpos = rpos_next; + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true); + unfiltered_chars = true; + } + wpos = wpos_next; + // extend the string up to capacity + src.len += excess; + maxcap = wpos > maxcap ? wpos : maxcap; + } + else + { + //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0; + RYML_ASSERT(rpos_next <= src.len); + const size_t required_size = wpos_next + (src.len - rpos_next); + _c4dbgip("inplace: add unfiltered {}->{} maxcap={}->{}!", unfiltered_chars, true, maxcap, required_size > maxcap ? required_size : maxcap); + RYML_ASSERT(required_size > wcap); + unfiltered_chars = true; + maxcap = required_size > maxcap ? required_size : maxcap; + wpos = wpos_next; + rpos = rpos_next; + } + } + } +}; + +#undef _c4dbgip + + +/** @} */ + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_FILTER_PROCESSOR_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/fwd.hpp b/3rdparty/rapidyaml/include/c4/yml/fwd.hpp new file mode 100644 index 00000000000000..7fa1f176999868 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/fwd.hpp @@ -0,0 +1,24 @@ +#ifndef _C4_YML_FWD_HPP_ +#define _C4_YML_FWD_HPP_ + +/** @file fwd.hpp forward declarations */ + +namespace c4 { +namespace yml { + +struct NodeScalar; +struct NodeInit; +struct NodeData; +struct NodeType; +class NodeRef; +class ConstNodeRef; +class Tree; +struct ReferenceResolver; +template class ParseEngine; +struct EventHandlerTree; +using Parser = ParseEngine; + +} // namespace c4 +} // namespace yml + +#endif /* _C4_YML_FWD_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/node.hpp b/3rdparty/rapidyaml/include/c4/yml/node.hpp index 8543a53bb19231..b3f707f2fd4142 100644 --- a/3rdparty/rapidyaml/include/c4/yml/node.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/node.hpp @@ -16,6 +16,7 @@ # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wtype-limits" # pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wuseless-cast" #elif defined(_MSC_VER) # pragma warning(push) # pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) @@ -79,9 +80,9 @@ struct child_iterator using tree_type = typename NodeRefType::tree_type; tree_type * C4_RESTRICT m_tree; - size_t m_child_id; + id_type m_child_id; - child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {} + child_iterator(tree_type * t, id_type id) : m_tree(t), m_child_id(id) {} child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } @@ -108,9 +109,9 @@ struct children_view_ }; template -bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +bool _visit(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false) { - size_t increment = 0; + id_type increment = 0; if( ! (node.is_root() && skip_root)) { if(fn(node, indentation_level)) @@ -131,9 +132,9 @@ bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_r } template -bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +bool _visit_stacked(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false) { - size_t increment = 0; + id_type increment = 0; if( ! (node.is_root() && skip_root)) { if(fn(node, indentation_level)) @@ -169,8 +170,9 @@ struct RoNodeMethods; /** a CRTP base providing read-only methods for @ref ConstNodeRef and @ref NodeRef */ +namespace detail { template -struct detail::RoNodeMethods +struct RoNodeMethods { C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align") /** @cond dev */ @@ -185,6 +187,8 @@ struct detail::RoNodeMethods RYML_ASSERT(tree_ != nullptr); \ _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE); \ _RYML_CB_ASSERT(tree_->m_callbacks, (((Impl const* C4_RESTRICT)this)->readable())) + // a SFINAE beautifier to enable a function only if the + // implementation is mutable #define _C4_IF_MUTABLE(ty) typename std::enable_if::value, ty>::type /** @endcond */ @@ -199,59 +203,95 @@ struct detail::RoNodeMethods template C4_ALWAYS_INLINE auto get() RYML_NOEXCEPT -> _C4_IF_MUTABLE(NodeData*) { return ((Impl const*)this)->readable() ? tree__->get(id__) : nullptr; } - C4_ALWAYS_INLINE NodeType type() const RYML_NOEXCEPT { _C4RR(); return tree_->type(id_); } - C4_ALWAYS_INLINE const char* type_str() const RYML_NOEXCEPT { _C4RR(); return tree_->type_str(id_); } + C4_ALWAYS_INLINE NodeType type() const RYML_NOEXCEPT { _C4RR(); return tree_->type(id_); } /**< Forward to @ref Tree::type_str(). Node must be readable. */ + C4_ALWAYS_INLINE const char* type_str() const RYML_NOEXCEPT { _C4RR(); return tree_->type_str(id_); } /**< Forward to @ref Tree::type_str(). Node must be readable. */ + + C4_ALWAYS_INLINE csubstr key() const RYML_NOEXCEPT { _C4RR(); return tree_->key(id_); } /**< Forward to @ref Tree::key(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->key_tag(id_); } /**< Forward to @ref Tree::key_tag(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->key_ref(id_); } /**< Forward to @ref Tree::key_ref(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->key_anchor(id_); } /**< Forward to @ref Tree::key_anchor(). Node must be readable. */ + + C4_ALWAYS_INLINE csubstr val() const RYML_NOEXCEPT { _C4RR(); return tree_->val(id_); } /**< Forward to @ref Tree::val(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->val_tag(id_); } /**< Forward to @ref Tree::val_tag(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->val_ref(id_); } /**< Forward to @ref Tree::val_ref(). Node must be readable. */ + C4_ALWAYS_INLINE csubstr val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->val_anchor(id_); } /**< Forward to @ref Tree::val_anchor(). Node must be readable. */ + + C4_ALWAYS_INLINE NodeScalar const& keysc() const RYML_NOEXCEPT { _C4RR(); return tree_->keysc(id_); } /**< Forward to @ref Tree::keysc(). Node must be readable. */ + C4_ALWAYS_INLINE NodeScalar const& valsc() const RYML_NOEXCEPT { _C4RR(); return tree_->valsc(id_); } /**< Forward to @ref Tree::valsc(). Node must be readable. */ - C4_ALWAYS_INLINE csubstr key() const RYML_NOEXCEPT { _C4RR(); return tree_->key(id_); } - C4_ALWAYS_INLINE csubstr key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->key_tag(id_); } - C4_ALWAYS_INLINE csubstr key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->key_ref(id_); } - C4_ALWAYS_INLINE csubstr key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->key_anchor(id_); } + C4_ALWAYS_INLINE bool key_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->key_is_null(id_); } /**< Forward to @ref Tree::key_is_null(). Node must be readable. */ + C4_ALWAYS_INLINE bool val_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->val_is_null(id_); } /**< Forward to @ref Tree::val_is_null(). Node must be readable. */ - C4_ALWAYS_INLINE csubstr val() const RYML_NOEXCEPT { _C4RR(); return tree_->val(id_); } - C4_ALWAYS_INLINE csubstr val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->val_tag(id_); } - C4_ALWAYS_INLINE csubstr val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->val_ref(id_); } - C4_ALWAYS_INLINE csubstr val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->val_anchor(id_); } + C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { _C4RR(); return tree_->is_key_unfiltered(id_); } /**< Forward to @ref Tree::is_key_unfiltered(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { _C4RR(); return tree_->is_val_unfiltered(id_); } /**< Forward to @ref Tree::is_val_unfiltered(). Node must be readable. */ + + /** @} */ + +public: - C4_ALWAYS_INLINE NodeScalar const& keysc() const RYML_NOEXCEPT { _C4RR(); return tree_->keysc(id_); } - C4_ALWAYS_INLINE NodeScalar const& valsc() const RYML_NOEXCEPT { _C4RR(); return tree_->valsc(id_); } + /** @name node type predicates */ + /** @{ */ - C4_ALWAYS_INLINE bool key_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->key_is_null(id_); } - C4_ALWAYS_INLINE bool val_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->val_is_null(id_); } + C4_ALWAYS_INLINE bool empty() const RYML_NOEXCEPT { _C4RR(); return tree_->empty(id_); } /**< Forward to @ref Tree::empty(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_stream() const RYML_NOEXCEPT { _C4RR(); return tree_->is_stream(id_); } /**< Forward to @ref Tree::is_stream(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_doc() const RYML_NOEXCEPT { _C4RR(); return tree_->is_doc(id_); } /**< Forward to @ref Tree::is_doc(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_container() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container(id_); } /**< Forward to @ref Tree::is_container(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->is_map(id_); } /**< Forward to @ref Tree::is_map(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->is_seq(id_); } /**< Forward to @ref Tree::is_seq(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val(id_); } /**< Forward to @ref Tree::has_val(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key(id_); } /**< Forward to @ref Tree::has_key(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val(id_); } /**< Forward to @ref Tree::is_val(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_keyval() const RYML_NOEXCEPT { _C4RR(); return tree_->is_keyval(id_); } /**< Forward to @ref Tree::is_keyval(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_tag(id_); } /**< Forward to @ref Tree::has_key_tag(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_tag(id_); } /**< Forward to @ref Tree::has_val_tag(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_anchor(id_); } /**< Forward to @ref Tree::has_key_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_anchor(id_); } /**< Forward to @ref Tree::has_val_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_anchor(id_); } /**< Forward to @ref Tree::has_anchor(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_ref(id_); } /**< Forward to @ref Tree::is_key_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_ref(id_); } /**< Forward to @ref Tree::is_val_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_ref(id_); } /**< Forward to @ref Tree::is_ref(). Node must be readable. */ + C4_ALWAYS_INLINE bool parent_is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_seq(id_); } /**< Forward to @ref Tree::parent_is_seq(). Node must be readable. */ + C4_ALWAYS_INLINE bool parent_is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_map(id_); } /**< Forward to @ref Tree::parent_is_map(). Node must be readable. */ + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor() const noexcept { _C4RR(); return tree_->has_key_anchor(id_); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_hanchor() const noexcept { _C4RR(); return tree_->has_val_anchor(id_); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor() const noexcept { _C4RR(); return tree_->has_anchor(id_); } + RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { _C4RR(); return tree_->is_anchor_or_ref(id_); } /** @} */ public: - /** @name node property predicates */ + /** @name node container+scalar style predicates */ /** @{ */ - C4_ALWAYS_INLINE bool empty() const RYML_NOEXCEPT { _C4RR(); return tree_->empty(id_); } /**< Forward to Tree::empty(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_stream() const RYML_NOEXCEPT { _C4RR(); return tree_->is_stream(id_); } /**< Forward to Tree::is_stream(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_doc() const RYML_NOEXCEPT { _C4RR(); return tree_->is_doc(id_); } /**< Forward to Tree::is_doc(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_container() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container(id_); } /**< Forward to Tree::is_container(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->is_map(id_); } /**< Forward to Tree::is_map(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->is_seq(id_); } /**< Forward to Tree::is_seq(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_val() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val(id_); } /**< Forward to Tree::has_val(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_key() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key(id_); } /**< Forward to Tree::has_key(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_val() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val(id_); } /**< Forward to Tree::is_val(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_keyval() const RYML_NOEXCEPT { _C4RR(); return tree_->is_keyval(id_); } /**< Forward to Tree::is_keyval(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_key_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_tag(id_); } /**< Forward to Tree::has_key_tag(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_val_tag() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_tag(id_); } /**< Forward to Tree::has_val_tag(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_anchor(id_); } /**< Forward to Tree::has_key_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_anchor(id_); } /**< Forward to Tree::is_key_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_anchor(id_); } /**< Forward to Tree::has_val_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_anchor(id_); } /**< Forward to Tree::is_val_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->has_anchor(id_); } /**< Forward to Tree::has_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->is_anchor(id_); } /**< Forward to Tree::is_anchor(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_key_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_ref(id_); } /**< Forward to Tree::is_key_ref(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_val_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_ref(id_); } /**< Forward to Tree::is_val_ref(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_ref(id_); } /**< Forward to Tree::is_ref(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_anchor_or_ref() const RYML_NOEXCEPT { _C4RR(); return tree_->is_anchor_or_ref(id_); } /**< Forward to Tree::is_anchor_or_ref(. Node must be readable. */ - C4_ALWAYS_INLINE bool is_key_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_quoted(id_); } /**< Forward to Tree::is_key_quoted(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_val_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_quoted(id_); } /**< Forward to Tree::is_val_quoted(). Node must be readable. */ - C4_ALWAYS_INLINE bool is_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_quoted(id_); } /**< Forward to Tree::is_quoted(). Node must be readable. */ - C4_ALWAYS_INLINE bool parent_is_seq() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_seq(id_); } /**< Forward to Tree::parent_is_seq(). Node must be readable. */ - C4_ALWAYS_INLINE bool parent_is_map() const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_map(id_); } /**< Forward to Tree::parent_is_map(). Node must be readable. */ + // documentation to the right --> + + C4_ALWAYS_INLINE bool type_has_any(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_any(id_, bits); } /**< Forward to @ref Tree::type_has_any(). Node must be readable. */ + C4_ALWAYS_INLINE bool type_has_all(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_all(id_, bits); } /**< Forward to @ref Tree::type_has_all(). Node must be readable. */ + C4_ALWAYS_INLINE bool type_has_none(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_none(id_, bits); } /**< Forward to @ref Tree::type_has_none(). Node must be readable. */ + + C4_ALWAYS_INLINE bool is_container_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container_styled(id_); } /**< Forward to @ref Tree::is_container_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_block() const RYML_NOEXCEPT { _C4RR(); return tree_->is_block(id_); } /**< Forward to @ref Tree::is_block(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow_sl() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_sl(id_); } /**< Forward to @ref Tree::is_flow_sl(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow_ml() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_ml(id_); } /**< Forward to @ref Tree::is_flow_ml(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_flow() const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow(id_); } /**< Forward to @ref Tree::is_flow(). Node must be readable. */ + + C4_ALWAYS_INLINE bool is_key_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_styled(id_); } /**< Forward to @ref Tree::is_key_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_styled(id_); } /**< Forward to @ref Tree::is_val_styled(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_literal() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_literal(id_); } /**< Forward to @ref Tree::is_key_literal(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_literal() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_literal(id_); } /**< Forward to @ref Tree::is_val_literal(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_folded() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_folded(id_); } /**< Forward to @ref Tree::is_key_folded(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_folded() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_folded(id_); } /**< Forward to @ref Tree::is_val_folded(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_squo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_squo(id_); } /**< Forward to @ref Tree::is_key_squo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_squo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_squo(id_); } /**< Forward to @ref Tree::is_val_squo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_dquo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_dquo(id_); } /**< Forward to @ref Tree::is_key_dquo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_dquo() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_dquo(id_); } /**< Forward to @ref Tree::is_val_dquo(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_plain() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_plain(id_); } /**< Forward to @ref Tree::is_key_plain(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_plain() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_plain(id_); } /**< Forward to @ref Tree::is_val_plain(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_key_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_quoted(id_); } /**< Forward to @ref Tree::is_key_quoted(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_val_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_quoted(id_); } /**< Forward to @ref Tree::is_val_quoted(). Node must be readable. */ + C4_ALWAYS_INLINE bool is_quoted() const RYML_NOEXCEPT { _C4RR(); return tree_->is_quoted(id_); } /**< Forward to @ref Tree::is_quoted(). Node must be readable. */ /** @} */ @@ -260,21 +300,21 @@ struct detail::RoNodeMethods /** @name hierarchy predicates */ /** @{ */ - C4_ALWAYS_INLINE bool is_root() const RYML_NOEXCEPT { _C4RR(); return tree_->is_root(id_); } /**< Forward to Tree::is_root(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_parent() const RYML_NOEXCEPT { _C4RR(); return tree_->has_parent(id_); } /**< Forward to Tree::has_parent() Node must be readable. */ + // documentation to the right --> + + C4_ALWAYS_INLINE bool is_root() const RYML_NOEXCEPT { _C4RR(); return tree_->is_root(id_); } /**< Forward to @ref Tree::is_root(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_parent() const RYML_NOEXCEPT { _C4RR(); return tree_->has_parent(id_); } /**< Forward to @ref Tree::has_parent() Node must be readable. */ - C4_ALWAYS_INLINE bool has_child(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_child(id_, n.m_id) : false; } /**< Node must be readable. */ - C4_ALWAYS_INLINE bool has_child(size_t node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, node); } /**< Node must be readable. */ - C4_ALWAYS_INLINE bool has_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, name); } /**< Node must be readable. */ - C4_ALWAYS_INLINE bool has_children() const RYML_NOEXCEPT { _C4RR(); return tree_->has_children(id_); } /**< Node must be readable. */ + C4_ALWAYS_INLINE bool has_child(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_child(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_child(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, node); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, name); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_children() const RYML_NOEXCEPT { _C4RR(); return tree_->has_children(id_); } /**< Forward to @ref Tree::has_child(). Node must be readable. */ - C4_ALWAYS_INLINE bool has_sibling(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_sibling(id_, n.m_id) : false; } /**< Node must be readable. */ - C4_ALWAYS_INLINE bool has_sibling(size_t node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, node); } /**< Node must be readable. */ - C4_ALWAYS_INLINE bool has_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, name); } /**< Node must be readable. */ - /** does not count with this */ - C4_ALWAYS_INLINE bool has_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_other_siblings(id_); } + C4_ALWAYS_INLINE bool has_sibling(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_sibling(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_sibling(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, node); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, name); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE bool has_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_other_siblings(id_); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */ - /** counts with this */ RYML_DEPRECATED("use has_other_siblings()") bool has_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_siblings(id_); } /** @} */ @@ -284,67 +324,65 @@ struct detail::RoNodeMethods /** @name hierarchy getters */ /** @{ */ - template - C4_ALWAYS_INLINE auto doc(size_t i) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { RYML_ASSERT(tree_); return {tree__, tree__->doc(i)}; } /**< Forward to Tree::doc(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl doc(size_t i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; } /**< Forward to Tree::doc(). Node must be readable. */ + // documentation to the right --> template - C4_ALWAYS_INLINE auto parent() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->parent(id__)}; } /**< Forward to Tree::parent(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl parent() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->parent(id_)}; } /**< Forward to Tree::parent(). Node must be readable. */ + C4_ALWAYS_INLINE auto doc(id_type i) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { RYML_ASSERT(tree_); return {tree__, tree__->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */ + /** succeeds even when the node may have invalid or seed id */ + C4_ALWAYS_INLINE ConstImpl doc(id_type i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */ template - C4_ALWAYS_INLINE auto first_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_child(id__)}; } /**< Forward to Tree::first_child(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl first_child() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_child(id_)}; } /**< Forward to Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE auto parent() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->parent(id__)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl parent() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->parent(id_)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */ template - C4_ALWAYS_INLINE auto last_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_child(id__)}; } /**< Forward to Tree::last_child(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl last_child () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_child (id_)}; } /**< Forward to Tree::last_child(). Node must be readable. */ + C4_ALWAYS_INLINE auto first_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_child(id__)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl first_child() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_child(id_)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ template - C4_ALWAYS_INLINE auto child(size_t pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->child(id__, pos)}; } /**< Forward to Tree::child(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl child(size_t pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->child(id_, pos)}; } /**< Forward to Tree::child(). Node must be readable. */ + C4_ALWAYS_INLINE auto last_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_child(id__)}; } /**< Forward to @ref Tree::last_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl last_child () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_child (id_)}; } /**< Forward to @ref Tree::last_child(). Node must be readable. */ template - C4_ALWAYS_INLINE auto find_child(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_child(id__, name)}; } /**< Forward to Tree::first_child(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl find_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_child(id_, name)}; } /**< Forward to Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE auto child(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->child(id__, pos)}; } /**< Forward to @ref Tree::child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl child(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->child(id_, pos)}; } /**< Forward to @ref Tree::child(). Node must be readable. */ template - C4_ALWAYS_INLINE auto prev_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->prev_sibling(id__)}; } /**< Forward to Tree::prev_sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl prev_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->prev_sibling(id_)}; } /**< Forward to Tree::prev_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE auto find_child(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_child(id__, name)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl find_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_child(id_, name)}; } /**< Forward to @ref Tree::first_child(). Node must be readable. */ template - C4_ALWAYS_INLINE auto next_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->next_sibling(id__)}; } /**< Forward to Tree::next_sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl next_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->next_sibling(id_)}; } /**< Forward to Tree::next_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE auto prev_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->prev_sibling(id__)}; } /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl prev_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->prev_sibling(id_)}; } /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE auto first_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_sibling(id__)}; } /**< Forward to Tree::first_sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl first_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_sibling(id_)}; } /**< Forward to Tree::first_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE auto next_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->next_sibling(id__)}; } /**< Forward to @ref Tree::next_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl next_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->next_sibling(id_)}; } /**< Forward to @ref Tree::next_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE auto last_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_sibling(id__)}; } /**< Forward to Tree::last_sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl last_sibling () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_sibling(id_)}; } /**< Forward to Tree::last_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE auto first_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_sibling(id__)}; } /**< Forward to @ref Tree::first_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl first_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_sibling(id_)}; } /**< Forward to @ref Tree::first_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE auto sibling(size_t pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->sibling(id__, pos)}; } /**< Forward to Tree::sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl sibling(size_t pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->sibling(id_, pos)}; } /**< Forward to Tree::sibling(). Node must be readable. */ + C4_ALWAYS_INLINE auto last_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_sibling(id__)}; } /**< Forward to @ref Tree::last_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl last_sibling () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_sibling(id_)}; } /**< Forward to @ref Tree::last_sibling(). Node must be readable. */ template - C4_ALWAYS_INLINE auto find_sibling(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_sibling(id__, name)}; } /**< Forward to Tree::find_sibling(). Node must be readable. */ - C4_ALWAYS_INLINE ConstImpl find_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_sibling(id_, name)}; } /**< Forward to Tree::find_sibling(). Node must be readable. */ - - /** O(num_children). Forward to Tree::num_children(). */ - C4_ALWAYS_INLINE size_t num_children() const RYML_NOEXCEPT { _C4RR(); return tree_->num_children(id_); } + C4_ALWAYS_INLINE auto sibling(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->sibling(id__, pos)}; } /**< Forward to @ref Tree::sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl sibling(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->sibling(id_, pos)}; } /**< Forward to @ref Tree::sibling(). Node must be readable. */ - C4_ALWAYS_INLINE size_t num_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_siblings(id_); } - - /** O(num_siblings). Return the number of siblings except this. */ - C4_ALWAYS_INLINE size_t num_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_other_siblings(id_); } + template + C4_ALWAYS_INLINE auto find_sibling(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_sibling(id__, name)}; } /**< Forward to @ref Tree::find_sibling(). Node must be readable. */ + C4_ALWAYS_INLINE ConstImpl find_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_sibling(id_, name)}; } /**< Forward to @ref Tree::find_sibling(). Node must be readable. */ - /** O(num_children). Return the position of a child within this node, using Tree::child_pos(). */ - C4_ALWAYS_INLINE size_t child_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->m_callbacks, n.readable()); return tree_->child_pos(id_, n.m_id); } + C4_ALWAYS_INLINE id_type num_children() const RYML_NOEXCEPT { _C4RR(); return tree_->num_children(id_); } /**< O(num_children). Forward to @ref Tree::num_children(). */ + C4_ALWAYS_INLINE id_type num_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_siblings(id_); } /**< O(num_children). Forward to @ref Tree::num_siblings(). */ + C4_ALWAYS_INLINE id_type num_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_other_siblings(id_); } /**< O(num_siblings). Forward to @ref Tree::num_other_siblings(). */ + C4_ALWAYS_INLINE id_type child_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->m_callbacks, n.readable()); return tree_->child_pos(id_, n.m_id); } /**< O(num_children). Forward to @ref Tree::child_pos(). */ + C4_ALWAYS_INLINE id_type sibling_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->callbacks(), n.readable()); return tree_->child_pos(tree_->parent(id_), n.m_id); } /**< O(num_siblings). Forward to @ref Tree::sibling_pos(). */ - /** O(num_siblings) */ - C4_ALWAYS_INLINE size_t sibling_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->callbacks(), n.readable()); return tree_->child_pos(tree_->parent(id_), n.m_id); } + C4_ALWAYS_INLINE id_type depth_asc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_asc(id_); } /** O(log(num_nodes)). Forward to Tree::depth_asc(). Node must be readable. */ + C4_ALWAYS_INLINE id_type depth_desc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_desc(id_); } /** O(num_nodes). Forward to Tree::depth_desc(). Node must be readable. */ /** @} */ @@ -376,7 +414,7 @@ struct detail::RoNodeMethods C4_ALWAYS_INLINE auto operator[] (csubstr key) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); - size_t ch = tree__->find_child(id__, key); + id_type ch = tree__->find_child(id__, key); return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key); } @@ -399,10 +437,10 @@ struct detail::RoNodeMethods * * @see https://github.com/biojppm/rapidyaml/issues/389 */ template - C4_ALWAYS_INLINE auto operator[] (size_t pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) + C4_ALWAYS_INLINE auto operator[] (id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); - size_t ch = tree__->child(id__, pos); + id_type ch = tree__->child(id__, pos); return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos); } @@ -418,7 +456,7 @@ struct detail::RoNodeMethods C4_ALWAYS_INLINE ConstImpl operator[] (csubstr key) const RYML_NOEXCEPT { _C4RR(); - size_t ch = tree_->find_child(id_, key); + id_type ch = tree_->find_child(id_, key); _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } @@ -432,10 +470,10 @@ struct detail::RoNodeMethods * it is UB to use the return value if it is not valid. * * @see https://github.com/biojppm/rapidyaml/issues/389 */ - C4_ALWAYS_INLINE ConstImpl operator[] (size_t pos) const RYML_NOEXCEPT + C4_ALWAYS_INLINE ConstImpl operator[] (id_type pos) const RYML_NOEXCEPT { _C4RR(); - size_t ch = tree_->child(id_, pos); + id_type ch = tree_->child(id_, pos); _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } @@ -486,7 +524,7 @@ struct detail::RoNodeMethods _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity())); _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_)); - size_t ch = tree__->find_child(id__, key); + id_type ch = tree__->find_child(id__, key); return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key); } @@ -508,7 +546,7 @@ struct detail::RoNodeMethods * valid (points at a tree and a node), b) the calling object must * be readable (must not be in seed state), c) the calling object * must be pointing at a MAP node. The preconditions are similar - * to the non-const operator[](size_t), but instead of using + * to the non-const operator[](id_type), but instead of using * assertions, this function directly checks those conditions and * calls the error callback if any of the checks fail. * @@ -516,15 +554,15 @@ struct detail::RoNodeMethods * seed state, the error callback is not invoked when this * happens. */ template - C4_ALWAYS_INLINE auto at(size_t pos) -> _C4_IF_MUTABLE(Impl) + C4_ALWAYS_INLINE auto at(id_type pos) -> _C4_IF_MUTABLE(Impl) { RYML_CHECK(tree_ != nullptr); - const size_t cap = tree_->capacity(); + const id_type cap = tree_->capacity(); _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap)); _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap)); _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_)); - size_t ch = tree__->child(id__, pos); + id_type ch = tree__->child(id__, pos); return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos); } @@ -543,7 +581,7 @@ struct detail::RoNodeMethods _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity())); _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_)); - size_t ch = tree_->find_child(id_, key); + id_type ch = tree_->find_child(id_, key); _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } @@ -551,21 +589,21 @@ struct detail::RoNodeMethods /** Get a child by position, with error checking; complexity is * O(pos). * - * Behaves as operator[](size_t) const, but always raises an error + * Behaves as operator[](id_type) const, but always raises an error * (even when RYML_USE_ASSERT is set to false) when the returned * node does not exist, or when this node is not readable, or when * it is not a container. This behaviour is similar to * std::vector::at(), but the error consists in calling the error * callback instead of directly raising an exception. */ - ConstImpl at(size_t pos) const + ConstImpl at(id_type pos) const { RYML_CHECK(tree_ != nullptr); - const size_t cap = tree_->capacity(); + const id_type cap = tree_->capacity(); _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap)); _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap)); _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable()); _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_)); - size_t ch = tree_->child(id_, pos); + const id_type ch = tree_->child(id_, pos); _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE); return {tree_, ch}; } @@ -577,6 +615,8 @@ struct detail::RoNodeMethods /** @name deserialization */ /** @{ */ + /** deserialize the node's val to the given variable, forwarding + * to the user-overrideable @ref read() function. */ template ConstImpl const& operator>> (T &v) const { @@ -586,24 +626,26 @@ struct detail::RoNodeMethods return *((ConstImpl const*)this); } - /** deserialize the node's key to the given variable */ + /** deserialize the node's key to the given variable, forwarding + * to the user-overrideable @ref read() function; use @ref key() + * to disambiguate; for example: `node >> ryml::key(var)` */ template ConstImpl const& operator>> (Key v) const { _C4RR(); - if( ! from_chars(key(), &v.k)) + if(key().empty() || ! from_chars(key(), &v.k)) _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); return *((ConstImpl const*)this); } - /** deserialize the node's key as base64 */ + /** deserialize the node's key as base64. lightweight wrapper over @ref deserialize_key() */ ConstImpl const& operator>> (Key w) const { deserialize_key(w.wrapper); return *((ConstImpl const*)this); } - /** deserialize the node's val as base64 */ + /** deserialize the node's val as base64. lightweight wrapper over @ref deserialize_val() */ ConstImpl const& operator>> (fmt::base64_wrapper w) const { deserialize_val(w); @@ -627,6 +669,8 @@ struct detail::RoNodeMethods return from_chars(val(), &v); }; + /** look for a child by name, if it exists assign to var. return + * true if the child existed. */ template bool get_if(csubstr name, T *var) const { @@ -638,6 +682,9 @@ struct detail::RoNodeMethods return true; } + /** look for a child by name, if it exists assign to var, + * otherwise default to fallback. return true if the child + * existed. */ template bool get_if(csubstr name, T *var, T const& fallback) const { @@ -677,14 +724,20 @@ struct detail::RoNodeMethods using children_view = detail::children_view_; using const_children_view = detail::children_view_; + /** get an iterator to the first child */ template C4_ALWAYS_INLINE auto begin() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, tree__->first_child(id__)); } + /** get an iterator to the first child */ C4_ALWAYS_INLINE const_iterator begin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } + /** get an iterator to the first child */ C4_ALWAYS_INLINE const_iterator cbegin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } + /** get an iterator to after the last child */ template C4_ALWAYS_INLINE auto end() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, NONE); } + /** get an iterator to after the last child */ C4_ALWAYS_INLINE const_iterator end() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, NONE); } + /** get an iterator to after the last child */ C4_ALWAYS_INLINE const_iterator cend() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); } /** get an iterable view over children */ @@ -721,14 +774,14 @@ struct detail::RoNodeMethods /** visit every child node calling fn(node) */ template - bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT + bool visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT { _C4RR(); return detail::_visit(*(ConstImpl const*)this, fn, indentation_level, skip_root); } /** visit every child node calling fn(node) */ template - auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) RYML_NOEXCEPT + auto visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT -> _C4_IF_MUTABLE(bool) { _C4RR(); @@ -737,14 +790,14 @@ struct detail::RoNodeMethods /** visit every child node calling fn(node, level) */ template - bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT + bool visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT { _C4RR(); return detail::_visit_stacked(*(ConstImpl const*)this, fn, indentation_level, skip_root); } /** visit every child node calling fn(node, level) */ template - auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) RYML_NOEXCEPT + auto visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT -> _C4_IF_MUTABLE(bool) { _C4RR(); @@ -768,6 +821,7 @@ struct detail::RoNodeMethods C4_SUPPRESS_WARNING_GCC_CLANG_POP }; +} // detail //----------------------------------------------------------------------------- @@ -787,7 +841,7 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods; @@ -797,17 +851,17 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethodsroot_id()) {} - ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {} - ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef() noexcept : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef(Tree const &t) noexcept : m_tree(&t), m_id(t .root_id()) {} + ConstNodeRef(Tree const *t) noexcept : m_tree(t ), m_id(t->root_id()) {} + ConstNodeRef(Tree const *t, id_type id) noexcept : m_tree(t), m_id(id) {} + ConstNodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE) {} - ConstNodeRef(ConstNodeRef const&) = default; - ConstNodeRef(ConstNodeRef &&) = default; + ConstNodeRef(ConstNodeRef const&) noexcept = default; + ConstNodeRef(ConstNodeRef &&) noexcept = default; - ConstNodeRef(NodeRef const&); - ConstNodeRef(NodeRef &&); + ConstNodeRef(NodeRef const&) noexcept; + ConstNodeRef(NodeRef &&) noexcept; /** @} */ @@ -816,13 +870,13 @@ class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods private: Tree *C4_RESTRICT m_tree; - size_t m_id; + id_type m_id; /** This member is used to enable lazy operator[] writing. When a child * with a key or index is not found, m_id is set to the id of the parent @@ -945,7 +999,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods friend struct detail::RoNodeMethods; // require valid: a helper macro, undefined at the end - #define _C4RV() \ + #define _C4RR() \ RYML_ASSERT(m_tree != nullptr); \ _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed()) // require id: a helper macro, undefined at the end @@ -958,15 +1012,15 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @name construction */ /** @{ */ - NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } - NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } - NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} - NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + NodeRef() noexcept : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) noexcept : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) noexcept : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, id_type id) noexcept : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, id_type id, id_type seed_pos) noexcept : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = (size_t)seed_pos; } + NodeRef(Tree *t, id_type id, csubstr seed_key) noexcept : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE), m_seed() {} - inline void _clear_seed() { /*do the following manually or an assert is triggered: */ m_seed.str = nullptr; m_seed.len = NONE; } + inline void _clear_seed() noexcept { /*do the following manually or an assert is triggered: */ m_seed.str = nullptr; m_seed.len = npos; } /** @} */ @@ -975,11 +1029,11 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @name assignment */ /** @{ */ - NodeRef(NodeRef const&) = default; - NodeRef(NodeRef &&) = default; + NodeRef(NodeRef const&) noexcept = default; + NodeRef(NodeRef &&) noexcept = default; - NodeRef& operator= (NodeRef const&) = default; - NodeRef& operator= (NodeRef &&) = default; + NodeRef& operator= (NodeRef const&) noexcept = default; + NodeRef& operator= (NodeRef &&) noexcept = default; /** @} */ @@ -988,12 +1042,12 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @name state_queries * @{ */ - /** true if the object is not referring to any existing or seed node @see the doc for the NodeRef */ - inline bool invalid() const { return m_tree == nullptr || m_id == NONE; } - /** true if the object is not invalid and in seed state. @see the doc for the NodeRef */ - inline bool is_seed() const { return (m_tree != NULL && m_id != NONE) && (m_seed.str != nullptr || m_seed.len != (size_t)NONE); } - /** true if the object is not invalid and not in seed state. @see the doc for the NodeRef */ - inline bool readable() const { return (m_tree != NULL && m_id != NONE) && (m_seed.str == nullptr && m_seed.len == (size_t)NONE); } + /** true if the object is not referring to any existing or seed node. @see the doc for @ref NodeRef */ + inline bool invalid() const noexcept { return m_tree == nullptr || m_id == NONE; } + /** true if the object is not invalid and in seed state. @see the doc for @ref NodeRef */ + inline bool is_seed() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str != nullptr || m_seed.len != (size_t)NONE); } + /** true if the object is not invalid and not in seed state. @see the doc for @ref NodeRef */ + inline bool readable() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str == nullptr && m_seed.len == (size_t)NONE); } RYML_DEPRECATED("use one of readable(), is_seed() or !invalid()") inline bool valid() const { return m_tree != nullptr && m_id != NONE; } @@ -1031,21 +1085,19 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods RYML_DEPRECATED("use !readable()") bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } RYML_DEPRECATED("use readable()") bool operator!= (std::nullptr_t) const { return !(m_tree == nullptr || m_id == NONE || is_seed()); } - RYML_DEPRECATED("use `this->val() == s`") bool operator== (csubstr s) const { _C4RV(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) == s; } - RYML_DEPRECATED("use `this->val() != s`") bool operator!= (csubstr s) const { _C4RV(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) != s; } + RYML_DEPRECATED("use `this->val() == s`") bool operator== (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) == s; } + RYML_DEPRECATED("use `this->val() != s`") bool operator!= (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) != s; } /** @endcond */ - /** @} */ - public: /** @name node_property_getters * @{ */ - C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; } - C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + C4_ALWAYS_INLINE Tree * tree() noexcept { return m_tree; } + C4_ALWAYS_INLINE Tree const* tree() const noexcept { return m_tree; } - C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + C4_ALWAYS_INLINE id_type id() const noexcept { return m_id; } /** @} */ @@ -1056,7 +1108,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods void create() { _apply_seed(); } - void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + void change_type(NodeType t) { _C4RR(); m_tree->change_type(m_id, t); } void set_type(NodeType t) { _apply_seed(); m_tree->_set_flags(m_id, t); } void set_key(csubstr key) { _apply_seed(); m_tree->_set_key(m_id, key); } @@ -1068,37 +1120,11 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods void set_key_ref(csubstr key_ref) { _apply_seed(); m_tree->set_key_ref(m_id, key_ref); } void set_val_ref(csubstr val_ref) { _apply_seed(); m_tree->set_val_ref(m_id, val_ref); } - template - size_t set_key_serialized(T const& C4_RESTRICT k) - { - _apply_seed(); - csubstr s = m_tree->to_arena(k); - m_tree->_set_key(m_id, s); - return s.len; - } - template - size_t set_val_serialized(T const& C4_RESTRICT v) - { - _apply_seed(); - csubstr s = m_tree->to_arena(v); - m_tree->_set_val(m_id, s); - return s.len; - } - size_t set_val_serialized(std::nullptr_t) - { - _apply_seed(); - m_tree->_set_val(m_id, csubstr{}); - return 0; - } + void set_container_style(NodeType_e style) { _C4RR(); m_tree->set_container_style(m_id, style); } + void set_key_style(NodeType_e style) { _C4RR(); m_tree->set_key_style(m_id, style); } + void set_val_style(NodeType_e style) { _C4RR(); m_tree->set_val_style(m_id, style); } - /** encode a blob as base64 into the tree's arena, then assign the - * result to the node's key @return the size of base64-encoded - * blob */ - size_t set_key_serialized(fmt::const_base64_wrapper w); - /** encode a blob as base64 into the tree's arena, then assign the - * result to the node's val @return the size of base64-encoded - * blob */ - size_t set_val_serialized(fmt::const_base64_wrapper w); +public: inline void clear() { @@ -1189,6 +1215,45 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods return m_tree->to_arena(s); } + template + size_t set_key_serialized(T const& C4_RESTRICT k) + { + _apply_seed(); + csubstr s = m_tree->to_arena(k); + m_tree->_set_key(m_id, s); + return s.len; + } + size_t set_key_serialized(std::nullptr_t) + { + _apply_seed(); + m_tree->_set_key(m_id, csubstr{}); + return 0; + } + + template + size_t set_val_serialized(T const& C4_RESTRICT v) + { + _apply_seed(); + csubstr s = m_tree->to_arena(v); + m_tree->_set_val(m_id, s); + return s.len; + } + size_t set_val_serialized(std::nullptr_t) + { + _apply_seed(); + m_tree->_set_val(m_id, csubstr{}); + return 0; + } + + /** encode a blob as base64 into the tree's arena, then assign the + * result to the node's key + * @return the size of base64-encoded blob */ + size_t set_key_serialized(fmt::const_base64_wrapper w); + /** encode a blob as base64 into the tree's arena, then assign the + * result to the node's val + * @return the size of base64-encoded blob */ + size_t set_val_serialized(fmt::const_base64_wrapper w); + /** serialize a variable, then assign the result to the node's val */ inline NodeRef& operator<< (csubstr s) { @@ -1250,14 +1315,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods m_id = m_tree->append_child(m_id); m_tree->_set_key(m_id, m_seed); m_seed.str = nullptr; - m_seed.len = NONE; + m_seed.len = (size_t)NONE; } - else if(m_seed.len != NONE) // we have a seed index: create a child at that position + else if(m_seed.len != (size_t)NONE) // we have a seed index: create a child at that position { - _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->num_children(m_id) == m_seed.len); + _RYML_CB_ASSERT(m_tree->m_callbacks, (size_t)m_tree->num_children(m_id) == m_seed.len); m_id = m_tree->append_child(m_id); m_seed.str = nullptr; - m_seed.len = NONE; + m_seed.len = (size_t)NONE; } else { @@ -1287,7 +1352,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef insert_child(NodeRef after) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); return r; @@ -1295,7 +1360,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef insert_child(NodeInit const& i, NodeRef after) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); r._apply(i); @@ -1304,14 +1369,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef prepend_child() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); return r; } inline NodeRef prepend_child(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); r._apply(i); return r; @@ -1319,14 +1384,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef append_child() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_child(m_id)); return r; } inline NodeRef append_child(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_child(m_id)); r._apply(i); return r; @@ -1334,7 +1399,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef insert_sibling(ConstNodeRef const& after) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); return r; @@ -1342,7 +1407,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree); NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); r._apply(i); @@ -1351,14 +1416,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef prepend_sibling() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); return r; } inline NodeRef prepend_sibling(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); r._apply(i); return r; @@ -1366,14 +1431,14 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline NodeRef append_sibling() { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_sibling(m_id)); return r; } inline NodeRef append_sibling(NodeInit const& i) { - _C4RV(); + _C4RR(); NodeRef r(m_tree, m_tree->append_sibling(m_id)); r._apply(i); return r; @@ -1383,7 +1448,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline void remove_child(NodeRef & child) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_child(child)); _RYML_CB_ASSERT(m_tree->m_callbacks, child.parent().id() == id()); m_tree->remove(child.id()); @@ -1391,11 +1456,11 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods } //! remove the nth child of this node - inline void remove_child(size_t pos) + inline void remove_child(id_type pos) { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, pos >= 0 && pos < num_children()); - size_t child = m_tree->child(m_id, pos); + id_type child = m_tree->child(m_id, pos); _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE); m_tree->remove(child); } @@ -1403,8 +1468,8 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods //! remove a child by name inline void remove_child(csubstr key) { - _C4RV(); - size_t child = m_tree->find_child(m_id, key); + _C4RR(); + id_type child = m_tree->find_child(m_id, key); _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE); m_tree->remove(child); } @@ -1417,7 +1482,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * `n.move({})`. */ inline void move(ConstNodeRef const& after) { - _C4RV(); + _C4RR(); m_tree->move(m_id, after.m_id); } @@ -1427,7 +1492,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * pointer is reset to the tree of the parent node. */ inline void move(NodeRef const& parent, ConstNodeRef const& after) { - _C4RV(); + _C4RR(); if(parent.m_tree == m_tree) { m_tree->move(m_id, parent.m_id, after.m_id); @@ -1445,9 +1510,9 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * default-constructed reference like this: `n.move({})`. */ inline NodeRef duplicate(ConstNodeRef const& after) const { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree == after.m_tree || after.m_id == NONE); - size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); + id_type dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); NodeRef r(m_tree, dup); return r; } @@ -1459,17 +1524,17 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods * this: `n.move({})`. */ inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree || after.m_id == NONE); if(parent.m_tree == m_tree) { - size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); + id_type dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); NodeRef r(m_tree, dup); return r; } else { - size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); + id_type dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); NodeRef r(parent.m_tree, dup); return r; } @@ -1477,7 +1542,7 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const { - _C4RV(); + _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree); if(parent.m_tree == m_tree) { @@ -1491,37 +1556,37 @@ class RYML_EXPORT NodeRef : public detail::RoNodeMethods /** @} */ -#undef _C4RV +#undef _C4RR #undef _C4RID }; //----------------------------------------------------------------------------- -inline ConstNodeRef::ConstNodeRef(NodeRef const& that) +inline ConstNodeRef::ConstNodeRef(NodeRef const& that) noexcept : m_tree(that.m_tree) - , m_id(!that.is_seed() ? that.id() : NONE) + , m_id(!that.is_seed() ? that.id() : (id_type)NONE) { } -inline ConstNodeRef::ConstNodeRef(NodeRef && that) +inline ConstNodeRef::ConstNodeRef(NodeRef && that) noexcept : m_tree(that.m_tree) - , m_id(!that.is_seed() ? that.id() : NONE) + , m_id(!that.is_seed() ? that.id() : (id_type)NONE) { } -inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) noexcept { m_tree = (that.m_tree); - m_id = (!that.is_seed() ? that.id() : NONE); + m_id = (!that.is_seed() ? that.id() : (id_type)NONE); return *this; } -inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) noexcept { m_tree = (that.m_tree); - m_id = (!that.is_seed() ? that.id() : NONE); + m_id = (!that.is_seed() ? that.id() : (id_type)NONE); return *this; } @@ -1539,30 +1604,112 @@ inline void write(NodeRef *n, T const& v) n->set_val_serialized(v); } +namespace detail { +// SFINAE overloads for skipping leading + which cannot be read by the charconv functions template -typename std::enable_if< ! std::is_floating_point::value, bool>::type -inline read(NodeRef const& n, T *v) +C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v) + -> typename std::enable_if::value, bool>::type { - return from_chars(n.val(), v); + if(val.begins_with('+')) + val = val.sub(1); + return from_chars(val, v); } template -typename std::enable_if< ! std::is_floating_point::value, bool>::type -inline read(ConstNodeRef const& n, T *v) +C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v) + -> typename std::enable_if< ! std::is_arithmetic::value, bool>::type { - return from_chars(n.val(), v); + return from_chars(val, v); } +} // namespace detail +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ +template +inline auto read(NodeRef const& n, T *v) + -> typename std::enable_if< ! std::is_floating_point::value, bool>::type +{ + csubstr val = n.val(); + if(val.empty()) + return false; + return detail::read_skip_plus(val, v); +} +/** convert the val of a scalar node to a particular type, by + * forwarding its val to @ref from_chars(). The full string is + * used. + * @return false if the conversion failed */ +template +inline auto read(ConstNodeRef const& n, T *v) + -> typename std::enable_if< ! std::is_floating_point::value, bool>::type +{ + csubstr val = n.val(); + if(val.empty()) + return false; + return detail::read_skip_plus(val, v); +} + +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(NodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } +/** convert the val of a scalar node to a floating point type, by + * forwarding its val to @ref from_chars_float(). + * + * @return false if the conversion failed + * + * @warning Unlike non-floating types, only the leading part of the + * string that may constitute a number is processed. This happens + * because the float parsing is delegated to fast_float, which is + * implemented that way. Consequently, for example, all of `"34"`, + * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure + * about the contents of the data, you can use + * csubstr::first_real_span() to check before calling `>>`, for + * example like this: + * + * ```cpp + * csubstr val = node.val(); + * if(val.first_real_span() == val) + * node >> v; + * else + * ERROR("not a real") + * ``` + */ template typename std::enable_if::value, bool>::type inline read(ConstNodeRef const& n, T *v) { - return from_chars_float(n.val(), v); + csubstr val = n.val(); + if(val.empty()) + return false; + return from_chars_float(val, v); } /** @} */ diff --git a/3rdparty/rapidyaml/include/c4/yml/node_type.hpp b/3rdparty/rapidyaml/include/c4/yml/node_type.hpp new file mode 100644 index 00000000000000..862b27bc9a441a --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/node_type.hpp @@ -0,0 +1,271 @@ +#ifndef C4_YML_NODE_TYPE_HPP_ +#define C4_YML_NODE_TYPE_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "c4/yml/common.hpp" +#endif + +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") + +namespace c4 { +namespace yml { + +/** @addtogroup doc_node_type + * + * @{ + */ + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + +/** the integral type necessary to cover all the bits for NodeType_e */ +using type_bits = uint32_t; + + +/** a bit mask for marking node types and styles */ +typedef enum : type_bits { + #define __(v) (type_bits(1) << v) // a convenience define, undefined below + NOTYPE = 0, ///< no node type or style is set + KEY = __(0), ///< is member of a map, must have non-empty key + VAL = __(1), ///< a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or SEQ + MAP = __(2), ///< a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes + SEQ = __(3), ///< a seq: a parent of VAL/SEQ/MAP nodes + DOC = __(4), ///< a document + STREAM = __(5)|SEQ, ///< a stream: a seq of docs + KEYREF = __(6), ///< a *reference: the key references an &anchor + VALREF = __(7), ///< a *reference: the val references an &anchor + KEYANCH = __(8), ///< the key has an &anchor + VALANCH = __(9), ///< the val has an &anchor + KEYTAG = __(10), ///< the key has a tag + VALTAG = __(11), ///< the val has a tag + _TYMASK = __(12)-1, ///< all the bits up to here + // + // unfiltered flags: + // + KEY_UNFILT = __(12), ///< the key scalar was left unfiltered; the parser was set not to filter. @see ParserOptions + VAL_UNFILT = __(13), ///< the val scalar was left unfiltered; the parser was set not to filter. @see ParserOptions + // + // style flags: + // + FLOW_SL = __(14), ///< mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,key2: val2}') + FLOW_ML = __(15), ///< (NOT IMPLEMENTED, work in progress) mark container with multi-line flow style (seqs as '[\n val1,\n val2\n], maps as '{\n key: val,\n key2: val2\n}') + BLOCK = __(16), ///< mark container with block style (seqs as '- val\n', maps as 'key: val') + KEY_LITERAL = __(17), ///< mark key scalar as multiline, block literal | + VAL_LITERAL = __(18), ///< mark val scalar as multiline, block literal | + KEY_FOLDED = __(19), ///< mark key scalar as multiline, block folded > + VAL_FOLDED = __(20), ///< mark val scalar as multiline, block folded > + KEY_SQUO = __(21), ///< mark key scalar as single quoted ' + VAL_SQUO = __(22), ///< mark val scalar as single quoted ' + KEY_DQUO = __(23), ///< mark key scalar as double quoted " + VAL_DQUO = __(24), ///< mark val scalar as double quoted " + KEY_PLAIN = __(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) + VAL_PLAIN = __(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) + // + // type combination masks: + // + KEYVAL = KEY|VAL, + KEYSEQ = KEY|SEQ, + KEYMAP = KEY|MAP, + DOCMAP = DOC|MAP, + DOCSEQ = DOC|SEQ, + DOCVAL = DOC|VAL, + // + // style combination masks: + // + SCALAR_LITERAL = KEY_LITERAL|VAL_LITERAL, + SCALAR_FOLDED = KEY_FOLDED|VAL_FOLDED, + SCALAR_SQUO = KEY_SQUO|VAL_SQUO, + SCALAR_DQUO = KEY_DQUO|VAL_DQUO, + SCALAR_PLAIN = KEY_PLAIN|VAL_PLAIN, + KEYQUO = KEY_SQUO|KEY_DQUO|KEY_FOLDED|KEY_LITERAL, ///< key style is one of ', ", > or | + VALQUO = VAL_SQUO|VAL_DQUO|VAL_FOLDED|VAL_LITERAL, ///< val style is one of ', ", > or | + KEY_STYLE = KEY_LITERAL|KEY_FOLDED|KEY_SQUO|KEY_DQUO|KEY_PLAIN, ///< mask of all the scalar styles for key (not container styles!) + VAL_STYLE = VAL_LITERAL|VAL_FOLDED|VAL_SQUO|VAL_DQUO|VAL_PLAIN, ///< mask of all the scalar styles for val (not container styles!) + SCALAR_STYLE = KEY_STYLE|VAL_STYLE, + CONTAINER_STYLE_FLOW = FLOW_SL|FLOW_ML, + CONTAINER_STYLE_BLOCK = BLOCK, + CONTAINER_STYLE = FLOW_SL|FLOW_ML|BLOCK, + STYLE = SCALAR_STYLE | CONTAINER_STYLE, + // + // mixed masks + _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, + _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, + #undef __ +} NodeType_e; + +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator| (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) | ((type_bits)rhs)); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator& (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) & ((type_bits)rhs)); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator>> (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) >> n); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator<< (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) << n); } +constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator~ (NodeType_e bits) noexcept { return (NodeType_e)(~(type_bits)bits); } +C4_ALWAYS_INLINE NodeType_e& operator&= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject & (type_bits)bits); return subject; } +C4_ALWAYS_INLINE NodeType_e& operator|= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject | (type_bits)bits); return subject; } + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** wraps a NodeType_e element with some syntactic sugar and predicates */ +struct RYML_EXPORT NodeType +{ +public: + + NodeType_e type; + +public: + + C4_ALWAYS_INLINE NodeType() noexcept : type(NOTYPE) {} + C4_ALWAYS_INLINE NodeType(NodeType_e t) noexcept : type(t) {} + C4_ALWAYS_INLINE NodeType(type_bits t) noexcept : type((NodeType_e)t) {} + + C4_ALWAYS_INLINE bool has_any(NodeType_e t) const noexcept { return (type & t) != 0u; } + C4_ALWAYS_INLINE bool has_all(NodeType_e t) const noexcept { return (type & t) == t; } + C4_ALWAYS_INLINE bool has_none(NodeType_e t) const noexcept { return (type & t) == 0; } + + C4_ALWAYS_INLINE void set(NodeType_e t) noexcept { type = t; } + C4_ALWAYS_INLINE void add(NodeType_e t) noexcept { type = (type|t); } + C4_ALWAYS_INLINE void rem(NodeType_e t) noexcept { type = (type & ~t); } + C4_ALWAYS_INLINE void addrem(NodeType_e bits_to_add, NodeType_e bits_to_remove) noexcept { type |= bits_to_add; type &= ~bits_to_remove; } + + C4_ALWAYS_INLINE void clear() noexcept { type = NOTYPE; } + +public: + + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () noexcept { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const noexcept { return type; } + +public: + + /** @name node type queries + * @{ */ + + /** return a preset string based on the node type */ + C4_ALWAYS_INLINE const char *type_str() const noexcept { return type_str(type); } + /** return a preset string based on the node type */ + static const char* type_str(NodeType_e t) noexcept; + + /** fill a string with the node type flags. If the string is small, returns {null, len} */ + C4_ALWAYS_INLINE csubstr type_str(substr buf) const noexcept { return type_str(buf, type); } + /** fill a string with the node type flags. If the string is small, returns {null, len} */ + static csubstr type_str(substr buf, NodeType_e t) noexcept; + +public: + + /** @name node type queries + * @{ */ + + C4_ALWAYS_INLINE bool is_notype() const noexcept { return type == NOTYPE; } + C4_ALWAYS_INLINE bool is_stream() const noexcept { return ((type & STREAM) == STREAM) != 0; } + C4_ALWAYS_INLINE bool is_doc() const noexcept { return (type & DOC) != 0; } + C4_ALWAYS_INLINE bool is_container() const noexcept { return (type & (MAP|SEQ|STREAM)) != 0; } + C4_ALWAYS_INLINE bool is_map() const noexcept { return (type & MAP) != 0; } + C4_ALWAYS_INLINE bool is_seq() const noexcept { return (type & SEQ) != 0; } + C4_ALWAYS_INLINE bool has_key() const noexcept { return (type & KEY) != 0; } + C4_ALWAYS_INLINE bool has_val() const noexcept { return (type & VAL) != 0; } + C4_ALWAYS_INLINE bool is_val() const noexcept { return (type & KEYVAL) == VAL; } + C4_ALWAYS_INLINE bool is_keyval() const noexcept { return (type & KEYVAL) == KEYVAL; } + C4_ALWAYS_INLINE bool has_key_tag() const noexcept { return (type & KEYTAG) != 0; } + C4_ALWAYS_INLINE bool has_val_tag() const noexcept { return (type & VALTAG) != 0; } + C4_ALWAYS_INLINE bool has_key_anchor() const noexcept { return (type & KEYANCH) != 0; } + C4_ALWAYS_INLINE bool has_val_anchor() const noexcept { return (type & VALANCH) != 0; } + C4_ALWAYS_INLINE bool has_anchor() const noexcept { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_key_ref() const noexcept { return (type & KEYREF) != 0; } + C4_ALWAYS_INLINE bool is_val_ref() const noexcept { return (type & VALREF) != 0; } + C4_ALWAYS_INLINE bool is_ref() const noexcept { return (type & (KEYREF|VALREF)) != 0; } + + C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { return (type & (KEY_UNFILT)) != 0; } + C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { return (type & (VAL_UNFILT)) != 0; } + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor() const noexcept { return has_key_anchor(); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_anchor() const noexcept { return has_val_anchor(); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor() const noexcept { return has_anchor(); } + RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { return has_anchor() || is_ref(); } + /** @} */ + +public: + + /** @name container+scalar style queries + * @{ */ + + C4_ALWAYS_INLINE bool is_container_styled() const noexcept { return (type & (CONTAINER_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_block() const noexcept { return (type & (BLOCK)) != 0; } + C4_ALWAYS_INLINE bool is_flow_sl() const noexcept { return (type & (FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool is_flow_ml() const noexcept { return (type & (FLOW_ML)) != 0; } + C4_ALWAYS_INLINE bool is_flow() const noexcept { return (type & (FLOW_ML|FLOW_SL)) != 0; } + + C4_ALWAYS_INLINE bool is_key_styled() const noexcept { return (type & (KEY_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_val_styled() const noexcept { return (type & (VAL_STYLE)) != 0; } + C4_ALWAYS_INLINE bool is_key_literal() const noexcept { return (type & (KEY_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool is_val_literal() const noexcept { return (type & (VAL_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool is_key_folded() const noexcept { return (type & (KEY_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool is_val_folded() const noexcept { return (type & (VAL_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool is_key_squo() const noexcept { return (type & (KEY_SQUO)) != 0; } + C4_ALWAYS_INLINE bool is_val_squo() const noexcept { return (type & (VAL_SQUO)) != 0; } + C4_ALWAYS_INLINE bool is_key_dquo() const noexcept { return (type & (KEY_DQUO)) != 0; } + C4_ALWAYS_INLINE bool is_val_dquo() const noexcept { return (type & (VAL_DQUO)) != 0; } + C4_ALWAYS_INLINE bool is_key_plain() const noexcept { return (type & (KEY_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool is_val_plain() const noexcept { return (type & (VAL_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool is_key_quoted() const noexcept { return (type & KEYQUO) != 0; } + C4_ALWAYS_INLINE bool is_val_quoted() const noexcept { return (type & VALQUO) != 0; } + C4_ALWAYS_INLINE bool is_quoted() const noexcept { return (type & (KEYQUO|VALQUO)) != 0; } + + C4_ALWAYS_INLINE void set_container_style(NodeType_e style) noexcept { type = ((style & CONTAINER_STYLE) | (type & ~CONTAINER_STYLE)); } + C4_ALWAYS_INLINE void set_key_style(NodeType_e style) noexcept { type = ((style & KEY_STYLE) | (type & ~KEY_STYLE)); } + C4_ALWAYS_INLINE void set_val_style(NodeType_e style) noexcept { type = ((style & VAL_STYLE) | (type & ~VAL_STYLE)); } + + /** @} */ + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @name scalar style helpers + * @{ */ + +/** choose a YAML emitting style based on the scalar's contents */ +RYML_EXPORT NodeType_e scalar_style_choose(csubstr scalar) noexcept; + +/** choose a json style based on the scalar's contents */ +RYML_EXPORT NodeType_e scalar_style_json_choose(csubstr scalar) noexcept; + +/** query whether a scalar can be encoded using single quotes. + * It may not be possible, notably when there is leading + * whitespace after a newline. */ +RYML_EXPORT bool scalar_style_query_squo(csubstr s) noexcept; + +/** query whether a scalar can be encoded using plain style (no + * quotes, not a literal/folded block scalar). */ +RYML_EXPORT bool scalar_style_query_plain(csubstr s) noexcept; + +/** YAML-sense query of nullity. returns true if the scalar points + * to `nullptr` or is otherwise equal to one of the strings + * `"~"`,`"null"`,`"Null"`,`"NULL"` */ +RYML_EXPORT inline C4_NO_INLINE bool scalar_is_null(csubstr s) noexcept +{ + return s.str == nullptr || + s == "~" || + s == "null" || + s == "Null" || + s == "NULL"; +} + +/** @} */ + + +/** @} */ + +} // namespace yml +} // namespace c4 + +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP + +#endif /* C4_YML_NODE_TYPE_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/parse.hpp b/3rdparty/rapidyaml/include/c4/yml/parse.hpp index 915942657d4047..240709be25e104 100644 --- a/3rdparty/rapidyaml/include/c4/yml/parse.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/parse.hpp @@ -1,664 +1,230 @@ #ifndef _C4_YML_PARSE_HPP_ #define _C4_YML_PARSE_HPP_ -/** @file parse.hpp Utilities to parse YAML and JSON */ - -#ifndef _C4_YML_TREE_HPP_ -#include "c4/yml/tree.hpp" -#endif - -#ifndef _C4_YML_NODE_HPP_ -#include "c4/yml/node.hpp" -#endif - -#ifndef _C4_YML_DETAIL_STACK_HPP_ -#include "c4/yml/detail/stack.hpp" -#endif - -#include - -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#ifndef _C4_YML_COMMON_HPP_ +#include "c4/yml/common.hpp" #endif namespace c4 { namespace yml { -/** @addtogroup doc_parse - * - * @{ - */ - -/** Options to initialize a @ref Parser object. */ -struct RYML_EXPORT ParserOptions -{ -private: - - typedef enum : uint32_t { - LOCATIONS = (1 << 0), - DEFAULTS = 0, - } Flags_e; - - uint32_t flags = DEFAULTS; -public: - ParserOptions() = default; +class Tree; +class NodeRef; +template class ParseEngine; +struct EventHandlerTree; +RYML_EXPORT id_type estimate_tree_capacity(csubstr src); - /** @name source location tracking */ - /** @{ */ - /** enable/disable source location tracking */ - ParserOptions& locations(bool enabled) - { - if(enabled) - flags |= LOCATIONS; - else - flags &= ~LOCATIONS; - return *this; - } - bool locations() const { return (flags & LOCATIONS) != 0u; } +/** @addtogroup doc_parse + * @{ */ - /** @} */ -}; +/** This is the main ryml parser, where the parser events are handled + * to create a ryml tree. + * + * @warning Because the ryml @ref Tree does not accept containers as + * keys, this class cannot successfully parse YAML source with this + * feature. See @ref ParseEngine for more details. + * + * @see ParserOptions + * @see ParseEngine + * @see EventHandlerTree + * */ +using Parser = RYML_EXPORT ParseEngine; //----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -/** A reusable object to parse YAML/JSON and create the ryml @ref Tree. */ -class RYML_EXPORT Parser -{ -public: - - /** @name construction and assignment */ - /** @{ */ - - Parser(Callbacks const& cb, ParserOptions opts={}); - Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {} - ~Parser(); - - Parser(Parser &&); - Parser(Parser const&); - Parser& operator=(Parser &&); - Parser& operator=(Parser const&); - - /** @} */ - -public: - - /** @name modifiers */ - /** @{ */ - - /** Reserve a certain capacity for the parsing stack. - * This should be larger than the expected depth of the parsed - * YAML tree. - * - * The parsing stack is the only (potential) heap memory used by - * the parser. - * - * If the requested capacity is below the default - * stack size of 16, the memory is used directly in the parser - * object; otherwise it will be allocated from the heap. - * - * @note this reserves memory only for the parser itself; all the - * allocations for the parsed tree will go through the tree's - * allocator. - * - * @note the tree and the arena can (and should) also be reserved. */ - void reserve_stack(size_t capacity) - { - m_stack.reserve(capacity); - } - - /** Reserve a certain capacity for the array used to track node - * locations in the source buffer. */ - void reserve_locations(size_t num_source_lines) - { - _resize_locations(num_source_lines); - } - - /** Reserve a certain capacity for the character arena used to - * filter scalars. */ - void reserve_filter_arena(size_t num_characters) - { - _resize_filter_arena(num_characters); - } - - /** @} */ - -public: - - /** @name getters and modifiers */ - /** @{ */ - - /** Get the current callbacks in the parser. */ - Callbacks callbacks() const { return m_stack.m_callbacks; } - - /** Get the name of the latest file parsed by this object. */ - csubstr filename() const { return m_file; } - - /** Get the latest YAML buffer parsed by this object. */ - csubstr source() const { return m_buf; } - - size_t stack_capacity() const { return m_stack.capacity(); } - size_t locations_capacity() const { return m_newline_offsets_capacity; } - size_t filter_arena_capacity() const { return m_filter_arena.len; } - - ParserOptions const& options() const { return m_options; } - - /** @} */ - -public: - - /** @name parse_in_place - * - * parse a mutable buffer in situ, potentially mutating it. - */ - /** @{ */ - - /** Create a new tree and parse into its root. - * The tree is created with the callbacks currently in the parser. */ - Tree parse_in_place(csubstr filename, substr src) - { - Tree t(callbacks()); - t.reserve(_estimate_capacity(src)); - this->parse_in_place(filename, src, &t, t.root_id()); - return t; - } - - /** Parse into an existing tree, starting at its root node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, Tree *t) - { - this->parse_in_place(filename, src, t, t->root_id()); - } - - /** Parse into an existing node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id); - // ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy - - /** Parse into an existing node. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_place(csubstr filename, substr src, NodeRef node) - { - this->parse_in_place(filename, src, node.tree(), node.id()); - } - - RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); } - RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); } - - /** @} */ - -public: - - /** @name parse_in_arena - * - * copy the YAML source buffer to the tree's arena, then parse the - * copy in situ - * - * @note overloads receiving a substr YAML buffer are intentionally - * left undefined, such that calling parse_in_arena() with a substr - * will cause a linker error. This is to prevent an accidental - * copy of the source buffer to the tree's arena, because substr - * is implicitly convertible to csubstr. If you really intend to parse - * a mutable buffer in the tree's arena, convert it first to immutable - * by assigning the substr to a csubstr prior to calling parse_in_arena(). - * This is not needed for parse_in_place() because csubstr is not - * implicitly convertible to substr. */ - /** @{ */ - - // READ THE NOTE ABOVE! - #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error." - /** @cond dev */ - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); - RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node); - /** @endcond */ - - /** Create a new tree and parse into its root. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - Tree parse_in_arena(csubstr filename, csubstr csrc) - { - Tree t(callbacks()); - substr src = t.copy_to_arena(csrc); - t.reserve(_estimate_capacity(csrc)); - this->parse_in_place(filename, src, &t, t.root_id()); - return t; - } - - /** Parse into an existing tree, starting at its root node. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, Tree *t) - { - substr src = t->copy_to_arena(csrc); - this->parse_in_place(filename, src, t, t->root_id()); - } - - /** Parse into a specific node in an existing tree. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id) - { - substr src = t->copy_to_arena(csrc); - this->parse_in_place(filename, src, t, node_id); - } - - /** Parse into a specific node in an existing tree. - * The immutable YAML source is first copied to the tree's arena, - * and parsed from there. - * The callbacks in the tree are kept, and used to allocate - * the tree members, if any allocation is required. */ - void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node) - { - substr src = node.tree()->copy_to_arena(csrc); - this->parse_in_place(filename, src, node.tree(), node.id()); - } - - /** @cond dev */ - RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); } - RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); } - /** @endcond */ - - /** @} */ - -public: - - /** @name locations */ - /** @{ */ - - /** Get the location of a node of the last tree to be parsed by this parser. */ - Location location(Tree const& tree, size_t node_id) const; - /** Get the location of a node of the last tree to be parsed by this parser. */ - Location location(ConstNodeRef node) const; - /** Get the string starting at a particular location, to the end - * of the parsed source buffer. */ - csubstr location_contents(Location const& loc) const; - /** Given a pointer to a buffer position, get the location. @p val - * must be pointing to somewhere in the source buffer that was - * last parsed by this object. */ - Location val_location(const char *val) const; - - /** @} */ - -private: - - typedef enum { - BLOCK_LITERAL, //!< keep newlines (|) - BLOCK_FOLD //!< replace newline with single space (>) - } BlockStyle_e; - - typedef enum { - CHOMP_CLIP, //!< single newline at end (default) - CHOMP_STRIP, //!< no newline at end (-) - CHOMP_KEEP //!< all newlines from end (+) - } BlockChomp_e; - -private: - - using flag_t = int; - - static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } - - void _reset(); - - bool _finished_file() const; - bool _finished_line() const; - - csubstr _peek_next_line(size_t pos=npos) const; - bool _advance_to_peeked(); - void _scan_line(); - - csubstr _slurp_doc_scalar(); - - /** - * @param [out] quoted - * Will only be written to if this method returns true. - * Will be set to true if the scanned scalar was quoted, by '', "", > or |. - */ - bool _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - bool _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); - - csubstr _scan_comment(); - csubstr _scan_squot_scalar(); - csubstr _scan_dquot_scalar(); - csubstr _scan_block(); - substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation); - substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line); - substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); - csubstr _scan_to_next_nonempty_line(size_t indentation); - csubstr _extend_scanned_scalar(csubstr currscalar); - - csubstr _filter_squot_scalar(const substr s); - csubstr _filter_dquot_scalar(substr s); - csubstr _filter_plain_scalar(substr s, size_t indentation); - csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); - template - bool _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation); - template - void _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos); - bool _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp); - - void _handle_finished_file(); - void _handle_line(); - - bool _handle_indentation(); - - bool _handle_unk(); - bool _handle_map_flow(); - bool _handle_map_blck(); - bool _handle_seq_flow(); - bool _handle_seq_blck(); - bool _handle_top(); - bool _handle_types(); - bool _handle_key_anchors_and_refs(); - bool _handle_val_anchors_and_refs(); - void _move_val_tag_to_key_tag(); - void _move_key_tag_to_val_tag(); - void _move_key_tag2_to_key_tag(); - void _move_val_anchor_to_key_anchor(); - void _move_key_anchor_to_val_anchor(); - - void _push_level(bool explicit_flow_chars = false); - void _pop_level(); - - void _start_unk(bool as_child=true); - - void _start_map(bool as_child=true); - void _start_map_unk(bool as_child); - void _stop_map(); - - void _start_seq(bool as_child=true); - void _stop_seq(); - - void _start_seqimap(); - void _stop_seqimap(); - - void _start_doc(bool as_child=true); - void _stop_doc(); - void _start_new_doc(csubstr rem); - void _end_stream(); - - NodeData* _append_val(csubstr val, flag_t quoted=false); - NodeData* _append_key_val(csubstr val, flag_t val_quoted=false); - bool _rval_dash_start_or_continue_seq(); - - void _store_scalar(csubstr s, flag_t is_quoted); - csubstr _consume_scalar(); - void _move_scalar_from_top(); - - inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); (void)str; return _append_val({nullptr, size_t(0)}); } - inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); (void)str; return _append_key_val({nullptr, size_t(0)}); } - inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); (void)str; _store_scalar({nullptr, size_t(0)}, false); } - - void _set_indentation(size_t behind); - void _save_indentation(size_t behind=0); - bool _maybe_set_indentation_from_anchor_or_tag(); - - void _write_key_anchor(size_t node_id); - void _write_val_anchor(size_t node_id); - - void _handle_directive(csubstr directive); - - void _skipchars(char c); - template - void _skipchars(const char (&chars)[N]); - -private: - - static size_t _count_nlines(csubstr src); - -private: - - typedef enum : flag_t { - RTOP = 0x01 << 0, ///< reading at top level - RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq - RMAP = 0x01 << 2, ///< reading a map - RSEQ = 0x01 << 3, ///< reading a seq - FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} - QMRK = 0x01 << 5, ///< reading an explicit key (`? key`) - RKEY = 0x01 << 6, ///< reading a scalar as key - RVAL = 0x01 << 7, ///< reading a scalar as val - RNXT = 0x01 << 8, ///< read next val or keyval - SSCL = 0x01 << 9, ///< there's a stored scalar - QSCL = 0x01 << 10, ///< stored scalar was quoted - RSET = 0x01 << 11, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html - NDOC = 0x01 << 12, ///< no document mode. a document has ended and another has not started yet. - //! reading an implicit map nested in an explicit seq. - //! eg, {key: [key2: value2, key3: value3]} - //! is parsed as {key: [{key2: value2}, {key3: value3}]} - RSEQIMAP = 0x01 << 13, - } State_e; - - struct LineContents - { - csubstr full; ///< the full line, including newlines on the right - csubstr stripped; ///< the stripped line, excluding newlines on the right - csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character - size_t indentation; ///< the number of spaces on the beginning of the line - - LineContents() : full(), stripped(), rem(), indentation() {} - - void reset_with_next_line(csubstr buf, size_t pos); - - void reset(csubstr full_, csubstr stripped_) - { - full = full_; - stripped = stripped_; - rem = stripped_; - // find the first column where the character is not a space - indentation = full.first_not_of(' '); - } - - size_t current_col() const - { - return current_col(rem); - } - - size_t current_col(csubstr s) const - { - RYML_ASSERT(s.str >= full.str); - RYML_ASSERT(full.is_super(s)); - size_t col = static_cast(s.str - full.str); - return col; - } - }; - - struct State - { - flag_t flags; - size_t level; - size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes - csubstr scalar; - size_t scalar_col; // the column where the scalar (or its quotes) begin - - Location pos; - LineContents line_contents; - size_t indref; - - State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} - - void reset(const char *file, size_t node_id_) - { - flags = RUNK|RTOP; - level = 0; - pos.name = to_csubstr(file); - pos.offset = 0; - pos.line = 1; - pos.col = 1; - node_id = node_id_; - scalar_col = 0; - scalar.clear(); - indref = 0; - } - }; - - void _line_progressed(size_t ahead); - void _line_ended(); - void _line_ended_undo(); - - void _prepare_pop() - { - RYML_ASSERT(m_stack.size() > 1); - State const& curr = m_stack.top(); - State & next = m_stack.top(1); - next.pos = curr.pos; - next.line_contents = curr.line_contents; - next.scalar = curr.scalar; - } - - inline bool _at_line_begin() const - { - return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); - } - inline bool _at_line_end() const - { - csubstr r = m_state->line_contents.rem; - return r.empty() || r.begins_with(' ', r.len); - } - inline bool _token_is_from_this_line(csubstr token) const - { - return token.is_sub(m_state->line_contents.full); - } - - inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } - inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } - inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } - - inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; } - inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; } - inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; } - - static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; } - static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; } - static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; } - - inline void set_flags(flag_t f) { set_flags(f, m_state); } - inline void add_flags(flag_t on) { add_flags(on, m_state); } - inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); } - inline void rem_flags(flag_t off) { rem_flags(off, m_state); } - - void set_flags(flag_t f, State * s); - void add_flags(flag_t on, State * s); - void addrem_flags(flag_t on, flag_t off, State * s); - void rem_flags(flag_t off, State * s); - - void _resize_filter_arena(size_t num_characters); - void _grow_filter_arena(size_t num_characters); - substr _finish_filter_arena(substr dst, size_t pos); - - void _prepare_locations(); - void _resize_locations(size_t sz); - bool _locations_dirty() const; - - bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const; - bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const; - -private: - - void _free(); - void _clr(); - void _cp(Parser const* that); - void _mv(Parser *that); - -#ifdef RYML_DBG - template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; -#endif - template [[noreturn]] void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; - template void _fmt_msg(DumpFn &&dumpfn) const; - static csubstr _prfl(substr buf, flag_t v); -private: - - ParserOptions m_options; +/** @defgroup doc_parse_in_place__with_existing_parser Parse in place with existing parser + * + * @brief parse a mutable YAML source buffer. Scalars requiring + * filtering are mutated in place (except in the rare cases where the + * filtered scalar is longer than the original scalar, or where + * filtering was disabled before the call). These overloads accept an + * existing parser object, and provide the opportunity to use special + * parser options. + * + * @see ParserOptions + * + * @{ + */ - csubstr m_file; - substr m_buf; +// this is vertically aligned to highlight the parameter differences. + +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id); /**< (1) parse YAML into an existing tree node. + * + * The filename will be used in any error messages + * arising during the parse. The callbacks in the + * tree are kept, and used to allocate + * the tree members, if any allocation is required. */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, Tree *t, id_type node_id); /**< (2) like (1) but no filename will be reported */ +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t ); /**< (3) parse YAML into an existing tree, into its root node. + * + * The filename will be used in any error messages + * arising during the parse. The callbacks in the + * tree are kept, and used to allocate + * the tree members, if any allocation is required. */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, Tree *t ); /**< (4) like (3) but no filename will be reported */ +RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node ); /**< (5) like (1) but the node is given as a NodeRef */ +RYML_EXPORT void parse_in_place(Parser *parser, substr yaml, NodeRef node ); /**< (6) like (5) but no filename will be reported */ +RYML_EXPORT Tree parse_in_place(Parser *parser, csubstr filename, substr yaml ); /**< (7) create a new tree, and parse YAML into its root node. + * + * The filename will be used in any error messages + * arising during the parse. The tree is created with + * the callbacks currently in the parser. + */ +RYML_EXPORT Tree parse_in_place(Parser *parser, substr yaml ); /**< (8) like (7) but no filename will be reported */ + + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_place(Parser *parser, substr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_place(Parser *parser, csubstr filename, substr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_place(Parser *parser, substr json ); ///< (8) like (7) but no filename will be reported - size_t m_root_id; - Tree * m_tree; +/** @} */ - detail::stack m_stack; - State * m_state; - size_t m_key_tag_indentation; - size_t m_key_tag2_indentation; - csubstr m_key_tag; - csubstr m_key_tag2; - size_t m_val_tag_indentation; - csubstr m_val_tag; +//----------------------------------------------------------------------------- - bool m_key_anchor_was_before; - size_t m_key_anchor_indentation; - csubstr m_key_anchor; - size_t m_val_anchor_indentation; - csubstr m_val_anchor; +/** @defgroup doc_parse_in_place___with_temporary_parser Parse in place with temporary parser + * + * @brief parse a mutable YAML source buffer. Scalars requiring + * filtering are mutated in place (except in the rare cases where the + * filtered scalar is longer than the original scalar). + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to enable the user to easily parse + * YAML without the need to explicitly instantiate a parser and event + * handler. Note that some properties (notably node locations in the + * original source code) are only available through the parser + * class. If you need access to any of these properties, use + * the appropriate overload from @ref doc_parse_in_place__with_existing_parser + * + * @{ + */ - substr m_filter_arena; +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_place( substr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_place( substr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_place( substr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_place(csubstr filename, substr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_place( substr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_place( substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_place( substr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_place( substr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_place(csubstr filename, substr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_place( substr json ); ///< (8) like (7) but no filename will be reported - size_t *m_newline_offsets; - size_t m_newline_offsets_size; - size_t m_newline_offsets_capacity; - csubstr m_newline_offsets_buf; -}; +/** @} */ //----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -/** @defgroup doc_parse_in_place Parse in place + +/** @defgroup doc_parse_in_arena__with_existing_parser Parse in arena with existing parser * - * Parse a mutable YAML source buffer to create a ryml @ref Tree, - * potentially mutating the buffer. + * @brief parse a read-only (immutable) YAML source buffer. This is + * achieved by first copying the contents of the buffer to the tree's + * arena, and then calling @ref parse_in_arena() . All the resulting + * scalars will be filtered in the arena. These overloads accept an + * existing parser object, and provide the opportunity to use special + * parser options. * - * These freestanding functions use a temporary parser object, and are - * convenience functions to easily parse YAML without the need to - * instantiate a separate parser. Note that some properties (notably - * node locations in the original source code) are only available - * through the parser object after it has parsed the code. If you need - * access to any of these properties, use Parser::parse_in_place(). + * @see ParserOptions * - * @see Parser */ -/** @{ */ - -inline Tree parse_in_place( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } //!< parse in-situ a modifiable YAML source buffer. -inline Tree parse_in_place(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. -inline void parse_in_place( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer -inline void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser class. If you need access to any of + * these properties, use the appropriate overload from @ref + * doc_parse_in_arena__with_existing_parser + * + * @warning overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental copy of + * the source buffer to the tree's arena, because substr (which is + * mutable) is implicitly convertible to csubstr (which is + * immutable). If you really intend to parse a mutable buffer in the + * tree's arena, convert it first to immutable by assigning the substr + * to a csubstr prior to calling parse_in_arena(). This is not needed + * for parse_in_place() because csubstr is not implicitly convertible + * to substr. To be clear: + * ```c++ + * substr mutable_buffer = ...; + * parser.parse_in_arena(mutable_buffer); // linker error + * + * csubstr immutable_buffer = ...; + * parser.parse_in_arena(immutable_buffer); // ok + * ``` + * + * @{ + */ +#define RYML_DONT_PARSE_SUBSTR_IN_ARENA "" \ + "Do not pass a (mutable) substr to parse_in_arena(); " \ + "if you have a substr, it should be parsed in place. " \ + "Consider using parse_in_place() instead, or convert " \ + "the buffer to csubstr prior to calling. This function " \ + " is deliberately left undefined, so that calling it " \ + "will cause a linker error." + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_arena(Parser *parser, csubstr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_arena(Parser *parser, csubstr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_arena(Parser *parser, csubstr json ); ///< (8) like (7) but no filename will be reported + +/* READ THE DEPRECATION NOTE! + * + * All of the functions below are intentionally left undefined, to + * prevent them being used. + * + */ /** @cond dev */ -RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } -RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } -RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser, csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser, substr json ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser, csubstr filename, substr json ); /** @endcond */ /** @} */ @@ -666,62 +232,84 @@ RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filena //----------------------------------------------------------------------------- -/** @defgroup doc_parse_in_arena Parse in arena + +/** @defgroup doc_parse_in_arena__with_temporary_parser Parse in arena with temporary parser * - * Parse a read-only YAML source buffer to create a ryml @ref Tree, - * copying the buffer first to the tree's arena. The copy of the - * buffer is then parsed in place. + * @brief parse a read-only (immutable) YAML source buffer. This is + * achieved by first copying the contents of the buffer to the tree's + * arena, and then calling @ref parse_in_arena() . * - * These freestanding functions use a temporary parser object, and are - * convenience functions to easily parse YAML without the need to - * instantiate a separate parser. Note that some properties (notably - * node locations in the original source code) are only available - * through the parser object after it has parsed the code. If you need - * access to any of these properties, use Parser::parse_in_arena(). + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily one-off parse YAML without + * the need to instantiate a separate parser. Note that some + * properties (notably node locations in the original source code) are + * only available through the parser class. If you need access to any + * of these properties, use the appropriate overload from @ref + * doc_parse_in_arena__with_existing_parser * - * @note overloads receiving a substr YAML buffer are intentionally + * @warning overloads receiving a substr YAML buffer are intentionally * left undefined, such that calling parse_in_arena() with a substr - * will cause a linker error. This is to prevent an accidental - * copy of the source buffer to the tree's arena, because substr - * is implicitly convertible to csubstr. If you really intend to parse - * a mutable buffer in the tree's arena, convert it first to immutable - * by assigning the substr to a csubstr prior to calling parse_in_arena(). - * This is not needed for parse_in_place() because csubstr is not - * implicitly convertible to substr. + * will cause a linker error. This is to prevent an accidental copy of + * the source buffer to the tree's arena, because substr (which is + * mutable) is implicitly convertible to csubstr (which is + * immutable). If you really intend to parse a mutable buffer in the + * tree's arena, convert it first to immutable by assigning the substr + * to a csubstr prior to calling parse_in_arena(). This is not needed + * for parse_in_place() because csubstr is not implicitly convertible + * to substr. To be clear: + * ```c++ + * substr mutable_buffer = ...; + * parser.parse_in_arena(mutable_buffer); // linker error * - * @see Parser */ -/** @{ */ - -/* READ THE NOTE ABOVE! */ -/** @cond dev */ -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, size_t node_id); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); -RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); -/** @endcond */ - -inline Tree parse_in_arena( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -inline void parse_in_arena( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + * csubstr immutable_buffer = ...; + * parser.parse_in_arena(immutable_buffer); // ok + * ``` + * + * @{ + */ +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_in_arena( csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ); ///< (3) parse YAML into an existing tree, into its root node. +RYML_EXPORT void parse_in_arena( csubstr yaml, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_in_arena( csubstr yaml, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_in_arena(csubstr filename, csubstr yaml ); ///< (7) create a new tree, and parse YAML into its root node. +RYML_EXPORT Tree parse_in_arena( csubstr yaml ); ///< (8) like (7) but no filename will be reported + +// this is vertically aligned to highlight the parameter differences. +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse. +RYML_EXPORT void parse_json_in_arena( csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t ); ///< (3) parse JSON into an existing tree, into its root node. +RYML_EXPORT void parse_json_in_arena( csubstr json, Tree *t ); ///< (4) like (3) but no filename will be reported +RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node ); ///< (5) like (1) but the node is given as a NodeRef +RYML_EXPORT void parse_json_in_arena( csubstr json, NodeRef node ); ///< (6) like (5) but no filename will be reported +RYML_EXPORT Tree parse_json_in_arena(csubstr filename, csubstr json ); ///< (7) create a new tree, and parse JSON into its root node. +RYML_EXPORT Tree parse_json_in_arena( csubstr json ); ///< (8) like (7) but no filename will be reported + +/* READ THE DEPRECATION NOTE! + * + * All of the functions below are intentionally left undefined, to + * prevent them being used. + */ /** @cond dev */ -RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. -RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t, id_type node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena( substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena( substr json ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(csubstr filename, substr json ); /** @endcond */ /** @} */ @@ -730,8 +318,4 @@ RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filena } // namespace yml } // namespace c4 -#if defined(_MSC_VER) -# pragma warning(pop) -#endif - #endif /* _C4_YML_PARSE_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/parse_engine.def.hpp b/3rdparty/rapidyaml/include/c4/yml/parse_engine.def.hpp new file mode 100644 index 00000000000000..458b6c85de7e31 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/parse_engine.def.hpp @@ -0,0 +1,8142 @@ +#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_ +#define _C4_YML_PARSE_ENGINE_DEF_HPP_ + +#include "c4/yml/parse_engine.hpp" +#include "c4/error.hpp" +#include "c4/charconv.hpp" +#include "c4/utf.hpp" +#include + +#include + +#include "c4/yml/detail/parser_dbg.hpp" +#include "c4/yml/filter_processor.hpp" +#ifdef RYML_DBG +#include "c4/yml/detail/print.hpp" +#endif + + +#if defined(RYML_WITH_TAB_TOKENS) +#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITHOUT_TAB_TOKENS(...) +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with +#else +#define _RYML_WITH_TAB_TOKENS(...) +#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without +#endif + + +// scaffold: +#define _c4dbgnextline() \ + do { \ + _c4dbgq("\n-----------"); \ + _c4dbgt("handling line={}, offset={}B", \ + m_evt_handler->m_curr->pos.line, \ + m_evt_handler->m_curr->pos.offset); \ + } while(0) + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +# pragma warning(disable: 4702/*unreachable code*/) +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma clang diagnostic ignored "-Wformat-nonliteral" +# pragma clang diagnostic ignored "-Wold-style-cast" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# pragma GCC diagnostic ignored "-Wold-style-cast" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wduplicated-branches" +# endif +#endif + +namespace c4 { +namespace yml { + +namespace { + +C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept +{ + RYML_ASSERT(s.len > 0); + RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?'); + return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))); +} + +inline bool _is_doc_begin_token(csubstr s) +{ + RYML_ASSERT(s.begins_with('-')); + RYML_ASSERT(!s.ends_with("\n")); + RYML_ASSERT(!s.ends_with("\r")); + return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); +} + +inline bool _is_doc_end_token(csubstr s) +{ + RYML_ASSERT(s.begins_with('.')); + RYML_ASSERT(!s.ends_with("\n")); + RYML_ASSERT(!s.ends_with("\r")); + return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); +} + +inline bool _is_doc_token(csubstr s) noexcept +{ + // + // NOTE: this function was failing under some scenarios when + // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely + // related to optimizer assumptions on the input string and + // possibly caused from UB around assignment to that string (the + // call site was in _scan_block()). For more details see: + // + // https://github.com/biojppm/rapidyaml/issues/440 + // + // The current version does not suffer this problem, but it may + // appear again. + // + if(s.len >= 3) + { + switch(s.str[0]) + { + case '-': + //return _is_doc_begin_token(s); // this was failing with gcc -O2 + return (s.str[1] == '-' && s.str[2] == '-') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); + case '.': + //return _is_doc_end_token(s); // this was failing with gcc -O2 + return (s.str[1] == '.' && s.str[2] == '.') + && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t'))); + } + } + return false; +} + +inline size_t _is_special_json_scalar(csubstr s) +{ + RYML_ASSERT(s.len); + switch(s.str[0]) + { + case 'f': + if(s.len >= 5 && s.begins_with("false")) + return 5u; + break; + case 't': + if(s.len >= 4 && s.begins_with("true")) + return 4u; + break; + case 'n': + if(s.len >= 4 && s.begins_with("null")) + return 4u; + break; + } + return 0u; +} + + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) +{ + return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); +} + +//! look for the next newline chars, and jump to the right of those +inline substr from_next_line(substr rem) +{ + size_t nlpos = rem.first_of("\r\n"); + if(nlpos == csubstr::npos) + return {}; + const char nl = rem[nlpos]; + rem = rem.right_of(nlpos); + if(rem.empty()) + return {}; + if(_extend_from_combined_newline(nl, rem.front())) + rem = rem.sub(1); + return rem; +} + + +//----------------------------------------------------------------------------- + +inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + ++numnl_following; + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; + } + return numnl_following; +} + +/** @p i is set to the first non whitespace character after the line + * @return the number of empty lines after the initial position */ +inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + if(indentation == 0) + { + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + ++numnl_following; + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; + } + } + else + { + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + { + ++numnl_following; + // skip the indentation after the newline + size_t stop = *i + indentation; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] != ' ' && r.str[*i] != '\r') + break; + RYML_ASSERT(*i < stop); + } + C4_UNUSED(stop); + } + // skip leading whitespace + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') + ; + else + break; + } + } + return numnl_following; +} + +} // anon namespace + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +ParseEngine::~ParseEngine() +{ + _free(); + _clr(); +} + +template +ParseEngine::ParseEngine(EventHandler *evt_handler, ParserOptions opts) + : m_options(opts) + , m_file() + , m_buf() + , m_evt_handler(evt_handler) + , m_pending_anchors() + , m_pending_tags() + , m_newline_offsets() + , m_newline_offsets_size(0) + , m_newline_offsets_capacity(0) + , m_newline_offsets_buf() +{ + RYML_CHECK(evt_handler); +} + +template +ParseEngine::ParseEngine(ParseEngine &&that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_evt_handler(that.m_evt_handler) + , m_pending_anchors(that.m_pending_anchors) + , m_pending_tags(that.m_pending_tags) + , m_newline_offsets(that.m_newline_offsets) + , m_newline_offsets_size(that.m_newline_offsets_size) + , m_newline_offsets_capacity(that.m_newline_offsets_capacity) + , m_newline_offsets_buf(that.m_newline_offsets_buf) +{ + that._clr(); +} + +template +ParseEngine::ParseEngine(ParseEngine const& that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_evt_handler(that.m_evt_handler) + , m_pending_anchors(that.m_pending_anchors) + , m_pending_tags(that.m_pending_tags) + , m_newline_offsets() + , m_newline_offsets_size() + , m_newline_offsets_capacity() + , m_newline_offsets_buf() +{ + if(that.m_newline_offsets_capacity) + { + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + } +} + +template +ParseEngine& ParseEngine::operator=(ParseEngine &&that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_evt_handler = that.m_evt_handler; + m_pending_anchors = that.m_pending_anchors; + m_pending_tags = that.m_pending_tags; + m_newline_offsets = (that.m_newline_offsets); + m_newline_offsets_size = (that.m_newline_offsets_size); + m_newline_offsets_capacity = (that.m_newline_offsets_capacity); + m_newline_offsets_buf = (that.m_newline_offsets_buf); + that._clr(); + return *this; +} + +template +ParseEngine& ParseEngine::operator=(ParseEngine const& that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_evt_handler = that.m_evt_handler; + m_pending_anchors = that.m_pending_anchors; + m_pending_tags = that.m_pending_tags; + if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + m_newline_offsets_buf = that.m_newline_offsets_buf; + return *this; +} + +template +void ParseEngine::_clr() +{ + m_options = {}; + m_file = {}; + m_buf = {}; + m_evt_handler = {}; + m_pending_anchors = {}; + m_pending_tags = {}; + m_newline_offsets = {}; + m_newline_offsets_size = {}; + m_newline_offsets_capacity = {}; + m_newline_offsets_buf = {}; +} + +template +void ParseEngine::_free() +{ + if(m_newline_offsets) + { + _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = nullptr; + m_newline_offsets_size = 0u; + m_newline_offsets_capacity = 0u; + m_newline_offsets_buf = 0u; + } +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_reset() +{ + m_pending_anchors = {}; + m_pending_tags = {}; + if(m_options.locations()) + { + _prepare_locations(); + } + m_was_inside_qmrk = false; +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_relocate_arena(csubstr prev_arena, substr next_arena) +{ + #define _ryml_relocate(s) \ + if(s.is_sub(prev_arena)) \ + { \ + s.str = next_arena.str + (s.str - prev_arena.str); \ + } + _ryml_relocate(m_buf); + _ryml_relocate(m_newline_offsets_buf); + for(size_t i = 0; i < m_pending_tags.num_entries; ++i) + _ryml_relocate(m_pending_tags.annotations[i].str); + for(size_t i = 0; i < m_pending_anchors.num_entries; ++i) + _ryml_relocate(m_pending_anchors.annotations[i].str); + #undef _ryml_relocate +} + +template +void ParseEngine::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena) +{ + ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena); +} + + +//----------------------------------------------------------------------------- + +template +template +void ParseEngine::_fmt_msg(DumpFn &&dumpfn) const +{ + auto const *const C4_RESTRICT st = m_evt_handler->m_curr; + auto const& lc = st->line_contents; + csubstr contents = lc.stripped; + if(contents.len) + { + // print the yaml src line + size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col); + if(m_file.len) + { + detail::_dump(dumpfn, "{}:", m_file); + offs += m_file.len + 1; + } + detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col); + csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); + csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); + detail::_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + // highlight the remaining portion of the previous line + size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); + size_t lastcol = firstcol + lc.rem.len; + for(size_t i = 0; i < offs + firstcol; ++i) + dumpfn(" "); + dumpfn("^"); + for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) + dumpfn("~"); + detail::_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + } + else + { + dumpfn("\n"); + } + +#ifdef RYML_DBG + // next line: print the state flags + { + char flagbuf_[128]; + detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags)); + } +#endif +} + + +//----------------------------------------------------------------------------- + +template +template +void ParseEngine::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + char errmsg[RYML_ERRMSG_SIZE]; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + detail::_dump(dumpfn, fmt, args...); + writer.append('\n'); + _fmt_msg(dumpfn); + size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + m_evt_handler->cancel_parse(); + m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data); +} + + +//----------------------------------------------------------------------------- +#ifdef RYML_DBG +template +template +void ParseEngine::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + if(_dbg_enabled()) + { + auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); }; + detail::_dump(dumpfn, fmt, args...); + dumpfn("\n"); + _fmt_msg(dumpfn); + } +} +#endif + + +//----------------------------------------------------------------------------- +template +bool ParseEngine::_finished_file() const +{ + bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len; + if(ret) + { + _c4dbgp("finished file!!!"); + } + return ret; +} + +template +C4_HOT C4_ALWAYS_INLINE bool ParseEngine::_finished_line() const +{ + return m_evt_handler->m_curr->line_contents.rem.empty(); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_maybe_skip_whitespace_tokens() +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t'))) + { + size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(pos == npos) + pos = rem.len; // maybe the line is just all whitespace + _c4dbgpf("skip {} whitespace characters", pos); + _line_progressed(pos); + } +} + +template +void ParseEngine::_maybe_skipchars(char c) +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && rem.str[0] == c) + { + size_t pos = rem.first_not_of(c); + if(pos == npos) + pos = rem.len; // maybe the line is just all c + _c4dbgpf("skip {}x'{}'", pos, c); + _line_progressed(pos); + } +} + +#ifdef RYML_NO_COVERAGE__TO_BE_DELETED +template +void ParseEngine::_maybe_skipchars_up_to(char c, size_t max_to_skip) +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.len && rem.str[0] == c) + { + size_t pos = rem.first_not_of(c); + if(pos == npos) + pos = rem.len; // maybe the line is just all c + if(pos > max_to_skip) + pos = max_to_skip; + _c4dbgpf("skip {}x'{}'", pos, c); + _line_progressed(pos); + } +} +#endif + +template +template +void ParseEngine::_skipchars(const char (&chars)[N]) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars)); + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} characters", pos); + _line_progressed(pos); +} + +template +void ParseEngine::_skip_comment() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#')); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full)); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + csubstr full = m_evt_handler->m_curr->line_contents.full; + // raise an error if the comment is not preceded by whitespace + if(!full.begins_with('#')) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str); + const char c = full[(size_t)(rem.str - full.str - 1)]; + if(C4_UNLIKELY(c != ' ' && c != '\t')) + _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str); + } + _c4dbgpf("comment was '{}'", rem); + _line_progressed(rem.len); +} + +template +void ParseEngine::_maybe_skip_comment() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' '); + if(s.begins_with('#')) + { + _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str)); + _skip_comment(); + } +} + +template +bool ParseEngine::_maybe_scan_following_colon() noexcept +{ + if(m_evt_handler->m_curr->line_contents.rem.len) + { + if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t') + { + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t"); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces + _c4dbgpf("skip {}x'{}'", pos, ' '); + _line_progressed(pos); + } + if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':')) + { + _c4dbgp("found ':' colon next"); + _line_progressed(1); + return true; + } + } + return false; +} + +template +bool ParseEngine::_maybe_scan_following_comma() noexcept +{ + if(m_evt_handler->m_curr->line_contents.rem.len) + { + if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t') + { + size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t"); + if(pos == npos) + pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces + _c4dbgpf("skip {}x'{}'", pos, ' '); + _line_progressed(pos); + } + if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ',')) + { + _c4dbgp("found ',' comma next"); + _line_progressed(1); + return true; + } + } + return false; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_scan_anchor() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&')); + csubstr anchor = s.range(1, s.first_of(' ')); + _line_progressed(1u + anchor.len); + _maybe_skipchars(' '); + return anchor; +} + +template +csubstr ParseEngine::_scan_ref_seq() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*')); + csubstr ref = s.first(s.first_of(",] :")); + _line_progressed(ref.len); + return ref; +} + +template +csubstr ParseEngine::_scan_ref_map() +{ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*')); + csubstr ref = s.first(s.first_of(",} ")); + _line_progressed(ref.len); + return ref; +} + +template +csubstr ParseEngine::_scan_tag() +{ + csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!')); + csubstr t; + if(rem.begins_with("!!")) + { + _c4dbgp("begins with '!!'"); + if(has_any(FLOW)) + t = rem.left_of(rem.first_of(" ,")); + else + t = rem.left_of(rem.first_of(' ')); + } + else if(rem.begins_with("!<")) + { + _c4dbgp("begins with '!<'"); + t = rem.left_of(rem.first_of('>'), true); + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(rem.begins_with("!h!")) + { + _c4dbgp("begins with '!h!'"); + t = rem.left_of(rem.first_of(' ')); + } + #endif + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!')); + _c4dbgp("begins with '!'"); + if(has_any(FLOW)) + t = rem.left_of(rem.first_of(" ,")); + else + t = rem.left_of(rem.first_of(' ')); + } + _line_progressed(t.len); + _maybe_skip_whitespace_tokens(); + return t; +} + + +//----------------------------------------------------------------------------- + +template +bool ParseEngine::_is_valid_start_scalar_plain_flow(csubstr s) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty()); + + // it's not a scalar if it starts with any of these characters: + switch(s.str[0]) + { + // these are all legal tokens which mean no scalar is starting: + case '[': + case ']': + case '{': + case '}': + case '!': + case '&': + case '*': + case '|': + case '>': + case '#': + _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0])); + return false; + // '-' and ':' are illegal at the beginning if not followed by a scalar character + case '-': + case ':': + if(s.len > 1) + { + switch(s.str[1]) + { + case '\n': + case '\r': + case '{': + case '[': + //_RYML_WITHOUT_TAB_TOKENS(case '\t'): + _c4err("invalid token \":{}\"", _c4prc(s.str[1])); + break; + case ' ': + case '}': + case ']': + if(s.str[0] == ':') + { + _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]); + return false; + } + break; + default: + break; + } + } + else + { + return false; + } + break; + case '?': + if(s.len > 1) + { + switch(s.str[1]) + { + case ' ': + case '\n': + case '\r': + _RYML_WITHOUT_TAB_TOKENS(case '\t':) + _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1])); + return false; + case '{': + case '}': + case '[': + case ']': + _c4err("invalid token \"?{}\"", _c4prc(s.str[1])); + break; + default: + break; + } + } + else + { + return false; + } + break; + // everything else is a legal starting character + default: + break; + } + + return true; +} + +template +bool ParseEngine::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n')); + + if(!s.len) + return false; + + if(!_is_valid_start_scalar_plain_flow(s)) + return false; + + _c4dbgp("scanning seqflow scalar..."); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + bool needs_filter = false; + while(true) + { + _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s); + for(size_t i = 0; i < s.len; ++i) + { + const char c = s.str[i]; + switch(c) + { + case ',': + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + if(m_evt_handler->m_curr->pos.offset + i > start_offset) + { + goto ended_scalar; + } + else + { + _c4dbgp("at the beginning. no scalar here."); + return false; + } + break; + case ']': + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + goto ended_scalar; + break; + case '#': + _c4dbgp("found suspicious '#'"); + if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))) + { + _c4dbgpf("found terminating character at {}: '{}'", i, c); + _line_progressed(i); + goto ended_scalar; + } + break; + case ':': + _c4dbgp("found suspicious ':'"); + if(s.len > i+1) + { + const char next = s.str[i+1]; + _c4dbgpf("next char is '{}'", _c4prc(next)); + if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t')) + { + _c4dbgp("map starting!"); + if(m_evt_handler->m_curr->pos.offset + i > start_offset) + { + _c4dbgp("scalar finished!"); + _line_progressed(i); + goto ended_scalar; + } + else + { + _c4dbgp("at the beginning. no scalar here."); + return false; + } + } + else + { + _c4dbgp("it's a scalar indeed."); + ++i; // skip the next char + } + } + else if(s.len == i+1) + { + _c4dbgp("':' at line end. map starting!"); + return false; + } + break; + case '[': + case '{': + case '}': + _line_progressed(i); + _c4err("invalid character: '{}'", c); // noreturn + default: + ; + } + } + _line_progressed(s.len); + if(!_finished_file()) + { + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); + } + else + { + _c4dbgp("file finished!"); + goto ended_scalar; + } + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; + } + +ended_scalar: + + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + sc->needs_filter = needs_filter; + + _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true); + + return true; +} + +template +bool ParseEngine::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + + if(!s.len) + return false; + + if(!_is_valid_start_scalar_plain_flow(s)) + return false; + + _c4dbgp("scanning scalar..."); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + bool needs_filter = false; + while(true) + { + for(size_t i = 0; i < s.len; ++i) + { + const char c = s.str[i]; + switch(c) + { + case ',': + case '}': + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case ':': + if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t')) + { + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + case '{': + case '[': + _line_progressed(i); + _c4err("invalid character: '{}'", c); // noreturn + break; + case ']': + _line_progressed(i); + if(has_any(RSEQIMAP)) + goto ended_scalar; + else + _c4err("invalid character: '{}'", c); // noreturn + break; + case '#': + if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')) + { + _line_progressed(i); + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + default: + ; + } + } + _c4dbgp("next line!"); + _line_progressed(s.len); + if(!_finished_file()) + { + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); + } + else + { + _c4dbgp("file finished!"); + goto ended_scalar; + } + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; + } + +ended_scalar: + + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r")); + sc->needs_filter = needs_filter; + + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + + return true; +} + +template +bool ParseEngine::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + + if(!s.len) + return false; + + _c4dbgp("scanning scalar..."); + + switch(s.str[0]) + { + case ']': + case '{': + case ',': + _c4dbgp("not a scalar."); + return false; + } + + { + const size_t len = _is_special_json_scalar(s); + if(len) + { + sc->scalar = s.first(len); + sc->needs_filter = false; + _c4dbgpf("special json scalar: '{}'", sc->scalar); + _line_progressed(len); + return true; + } + } + + // must be a number + size_t i = 0; + for( ; i < s.len; ++i) + { + const char c = s.str[i]; + switch(c) + { + case ',': + case ']': + case ' ': + case '\t': + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case '#': + if(!i || s.str[i-1] == ' ') + { + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + default: + ; + } + } + +ended_scalar: + + if(C4_LIKELY(i > 0)) + { + _line_progressed(i); + sc->scalar = s.first(i); + sc->needs_filter = false; + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + return true; + } + + return false; +} + +template +bool ParseEngine::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + + if(!s.len) + return false; + + _c4dbgp("scanning scalar..."); + + { + const size_t len = _is_special_json_scalar(s); + if(len) + { + sc->scalar = s.first(len); + sc->needs_filter = false; + _c4dbgpf("special json scalar: '{}'", sc->scalar); + _line_progressed(len); + return true; + } + } + + // must be a number + size_t i = 0; + for( ; i < s.len; ++i) + { + const char c = s.str[i]; + switch(c) + { + case ',': + case '}': + case ' ': + case '\t': + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + case '#': + if(!i || s.str[i-1] == ' ') + { + _c4dbgpf("found terminating character: '{}'", c); + goto ended_scalar; + } + break; + default: + ; + } + } + +ended_scalar: + + if(C4_LIKELY(i > 0)) + { + _line_progressed(i); + sc->scalar = s.first(i); + sc->needs_filter = false; + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + return true; + } + + return false; +} + +template +bool ParseEngine::_is_doc_begin(csubstr s) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-'); + return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s)); +} + +template +bool ParseEngine::_is_doc_end(csubstr s) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.'); + return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s)); +} + +template +bool ParseEngine::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY)); + + substr s = m_evt_handler->m_curr->line_contents.rem; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' ')); + + if(!s.len) + return false; + + switch(s.str[0]) + { + case '-': + if(_is_blck_token(s)) + { + return false; + } + else if(_is_doc_begin(s)) + { + _c4dbgp("token is doc start"); + return false; + } + break; + case ':': + case '?': + if(_is_blck_token(s)) + return false; + break; + case '[': + case '{': + case '&': + case '*': + case '!': + _RYML_WITH_TAB_TOKENS(case '\t':) + return false; + case '.': + if(_is_doc_end(s)) + { + _c4dbgp("token is doc end"); + return false; + } + break; + } + + _c4dbgpf("plain scalar! indentation={}", indentation); + + const size_t start_offset = m_evt_handler->m_curr->pos.offset; + const size_t start_line = m_evt_handler->m_curr->pos.line; + + bool needs_filter = false; + while(true) + { + _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s); + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + //_c4dbgpf("[{}]='{}'", i, _c4prc(curr)); + switch(curr) + { + case ':': + _c4dbgpf("[{}]: got suspicious ':'", i); + // are there more characters? + if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t')))) + { + _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1])); + _line_progressed(i); + // ': ' is accepted only on the first line + if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line)) + { + _c4dbgp("start line. scalar ends here"); + goto ended_scalar; + } + else + { + _c4err("parse error"); + } + } + else + { + size_t j = i; + while(j + 1 < s.len && s.str[j+1] == ':') + { + _c4dbgp("skip colon"); + ++j; + } + i = j > i ? j-1 : i; + _c4dbgp("nothing to see here"); + } + break; + case '#': + _c4dbgp("got suspicious '#'"); + if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t')) + { + _c4dbgp("comment! scalar ends here"); + _line_progressed(i); + goto ended_scalar; + } + else + { + _c4dbgp("nothing to see here"); + } + break; + } + } + _line_progressed(s.len); + csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset); + next_peeked = next_peeked.trimr("\n\r"); + const size_t next_indentation = next_peeked.first_not_of(' '); + _c4dbgpf("indentation curr={} next={}", indentation, next_indentation); + if(next_indentation < indentation) + { + _c4dbgp("smaller indentation! scalar ended"); + goto ended_scalar; + } + else if(next_indentation == 0 && next_peeked.len > 0) + { + const char first = next_peeked.str[0]; + switch(first) + { + case '-': + next_peeked = next_peeked.trimr("\n\r"); + _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : ""); + if(_is_doc_begin_token(next_peeked)) + { + _c4dbgp("doc begin! scalar ended"); + goto ended_scalar; + } + break; + case '.': + next_peeked = next_peeked.trimr("\n\r"); + _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : ""); + if(_is_doc_end_token(next_peeked)) + { + _c4dbgp("doc end! scalar ended"); + goto ended_scalar; + } + break; + } + } + // load with next line + _c4dbgp("next line!"); + if(!_finished_file()) + { + _c4dbgp("next line!"); + _line_ended(); + _scan_line(); + } + else + { + _c4dbgp("file finished!"); + goto ended_scalar; + } + s = m_evt_handler->m_curr->line_contents.rem; + needs_filter = true; + } + +ended_scalar: + + sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t"); + sc->needs_filter = needs_filter; + + _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar); + + return true; +} + +template +bool ParseEngine::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u); +} + +template +bool ParseEngine::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u); +} + +template +bool ParseEngine::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY)); + return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref); +} + + +//----------------------------------------------------------------------------- + +template +substr ParseEngine::_peek_next_line(size_t pos) const +{ + substr rem{}; // declare here because of the goto + size_t nlpos{}; // declare here because of the goto + pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos; + if(pos >= m_buf.len) + goto next_is_empty; + + // look for the next newline chars, and jump to the right of those + rem = from_next_line(m_buf.sub(pos)); + if(rem.empty()) + goto next_is_empty; + + // now get everything up to and including the following newline chars + nlpos = rem.first_of("\r\n"); + if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) + nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); + rem = rem.left_of(nlpos, /*include_pos*/true); + + _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); + return rem; + +next_is_empty: + _c4dbgpf("peek next line @ {}: (len=0)''", pos); + return {}; +} + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_scan_line() +{ + if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len)) + m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset); + else + m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0)); +} + +template +void ParseEngine::_line_progressed(size_t ahead) +{ + _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead); + m_evt_handler->m_curr->pos.offset += ahead; + m_evt_handler->m_curr->pos.col += ahead; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1); + m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead); +} + +template +void ParseEngine::_line_ended() +{ + _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}", + m_evt_handler->m_curr->pos.line, + m_evt_handler->m_curr->line_contents.full.len, + m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len, + m_evt_handler->m_curr->pos.col, 1); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1); + m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len; + ++m_evt_handler->m_curr->pos.line; + m_evt_handler->m_curr->pos.col = 1; +} + +template +void ParseEngine::_line_ended_undo() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len); + const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len; + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta); + m_evt_handler->m_curr->pos.offset -= delta; + --m_evt_handler->m_curr->pos.line; + m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u; + // don't forget to undo also the changes to the remainder of the line + //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r'); + m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0); +} + + +//----------------------------------------------------------------------------- +template +void ParseEngine::_set_indentation(size_t indentation) +{ + m_evt_handler->m_curr->indref = indentation; + _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); +} + +template +void ParseEngine::_save_indentation() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin()); + m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_end_map_blck() +{ + _c4dbgp("mapblck: end"); + if(has_any(RKCL|RVAL)) + { + _c4dbgp("mapblck: set missing val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + } + else if(has_any(QMRK)) + { + _c4dbgp("mapblck: set missing keyval"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_map(); +} + +template +void ParseEngine::_end_seq_blck() +{ + if(has_any(RVAL)) + { + _c4dbgp("seqblck: set missing val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_seq(); +} + +template +void ParseEngine::_end2_map() +{ + _c4dbgp("map: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP)); + if(has_any(BLCK)) + { + _end_map_blck(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY)); + m_evt_handler->_pop(); + } +} + +template +void ParseEngine::_end2_seq() +{ + _c4dbgp("seq: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ)); + if(has_any(BLCK)) + { + _end_seq_blck(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY)); + m_evt_handler->_pop(); + } +} + +template +void ParseEngine::_begin2_doc() +{ + m_doc_empty = true; + add_flags(RDOC); + m_evt_handler->begin_doc(); + m_evt_handler->m_curr->indref = 0; // ? +} + +template +void ParseEngine::_begin2_doc_expl() +{ + m_doc_empty = true; + add_flags(RDOC); + m_evt_handler->begin_doc_expl(); + m_evt_handler->m_curr->indref = 0; // ? +} + +template +void ParseEngine::_end2_doc() +{ + _c4dbgp("doc: end"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC)); + if(m_doc_empty) + { + _c4dbgp("doc was empty; add empty val"); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_doc(); +} + +template +void ParseEngine::_end2_doc_expl() +{ + _c4dbgp("doc: end"); + if(m_doc_empty) + { + _c4dbgp("doc: no children; add empty val"); + m_evt_handler->set_val_scalar_plain({}); + } + m_evt_handler->end_doc_expl(); +} + +template +void ParseEngine::_maybe_begin_doc() +{ + if(has_none(RDOC)) + { + _c4dbgp("doc must be started"); + _begin2_doc(); + } +} +template +void ParseEngine::_maybe_end_doc() +{ + if(has_any(RDOC)) + { + _c4dbgp("doc must be finished"); + _end2_doc(); + } +} + +template +void ParseEngine::_end_doc_suddenly__pop() +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + if(m_evt_handler->m_stack[0].flags & RDOC) + { + _c4dbgp("root is RDOC"); + if(m_evt_handler->m_curr->level != 0) + _handle_indentation_pop(&m_evt_handler->m_stack[0]); + } + else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC)) + { + _c4dbgp("root is STREAM"); + if(m_evt_handler->m_curr->level != 1) + _handle_indentation_pop(&m_evt_handler->m_stack[1]); + } + else + { + _c4err("internal error"); + } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC)); +} + +template +void ParseEngine::_end_doc_suddenly() +{ + _c4dbgp("end doc suddenly"); + _end_doc_suddenly__pop(); + _end2_doc_expl(); + addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC); +} + +template +void ParseEngine::_start_doc_suddenly() +{ + _c4dbgp("start doc suddenly"); + _end_doc_suddenly__pop(); + _end2_doc(); + _begin2_doc_expl(); +} + +template +void ParseEngine::_end_stream() +{ + _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id); + if(has_all(RSEQ|FLOW)) + _c4err("missing terminating ]"); + else if(has_all(RMAP|FLOW)) + _c4err("missing terminating }"); + if(m_evt_handler->m_stack.size() > 1) + _handle_indentation_pop(m_evt_handler->m_stack.begin()); + if(has_all(RDOC)) + { + _end2_doc(); + } + else if(has_all(RTOP|RUNK)) + { + if(m_pending_anchors.num_entries || m_pending_tags.num_entries) + { + if(m_doc_empty) + { + m_evt_handler->begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_doc(); + } + } + } + m_evt_handler->end_stream(); +} + + +template +void ParseEngine::_handle_indentation_pop(ParserState const* popto) +{ + _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref); + while(m_evt_handler->m_curr != popto) + { + if(has_any(RSEQ)) + { + _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr); + _end2_seq(); + } + else if(has_any(RMAP)) + { + _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr); + _end2_map(); + } + else + { + break; + } + } + _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); +} + +template +void ParseEngine::_handle_indentation_pop_from_block_seq() +{ + // search the stack frame to jump to based on its indentation + using state_type = typename EventHandler::state; + state_type const* popto = nullptr; + auto &stack = m_evt_handler->m_stack; + _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous + _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end()); + const size_t ind = m_evt_handler->m_curr->line_contents.indentation; + #ifdef RYML_DBG + if(_dbg_enabled()) + { + char flagbuf_[128]; + for(state_type const& s : stack) + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); + } + #endif + for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s) + { + _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); + if(s->indref == ind) + { + _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); + popto = s; + break; + } + } + if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level) + { + _c4err("parse error: incorrect indentation?"); + } + _handle_indentation_pop(popto); +} + +template +void ParseEngine::_handle_indentation_pop_from_block_map() +{ + // search the stack frame to jump to based on its indentation + using state_type = typename EventHandler::state; + auto &stack = m_evt_handler->m_stack; + _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous + _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end()); + const size_t ind = m_evt_handler->m_curr->line_contents.indentation; + state_type const* popto = nullptr; + #ifdef RYML_DBG + char flagbuf_[128]; + if(_dbg_enabled()) + { + for(state_type const& s : stack) + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); + } + #endif + for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root + { + _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags)); + if(s->indref < ind) + { + break; + } + else if(s->indref == ind) + { + _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id); + if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s)) + { + break; + } + popto = s; + if(has_all(RSEQ|BLCK, s)) + { + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + const size_t first = rem.first_not_of(' '); + _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos); + rem = rem.right_of(first, true); + _c4dbgpf("indentless? rem='{}' first={}", rem, first); + if(rem.begins_with('-') && _is_blck_token(rem)) + { + _c4dbgp("parent was indentless seq"); + break; + } + } + } + } + if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level) + { + _c4err("parse error: incorrect indentation?"); + } + _handle_indentation_pop(popto); +} + + +//----------------------------------------------------------------------------- +template +typename ParseEngine::ScannedScalar ParseEngine::_scan_scalar_squot() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_evt_handler->m_curr->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_evt_handler->m_curr->pos.offset; // take this into account + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\'')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_evt_handler->m_curr->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr == '\'') // single quotes are escaped with two single quotes + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + if(next != '\'') // so just look for the first quote + { // without another after it + pos = i; + break; + } + else + { + needs_filter = true; // needs filter to remove escaped quotes + ++i; // skip the escaped quote + } + } + else if(curr != ' ') + { + line_is_blank = false; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\''); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file while looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); + s = s.sub(0, pos-1); + } + + _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true); + + return ScannedScalar { s, needs_filter }; +} + + +//----------------------------------------------------------------------------- +template +typename ParseEngine::ScannedScalar ParseEngine::_scan_scalar_dquot() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_evt_handler->m_curr->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_evt_handler->m_curr->pos.offset; // take this into account + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_evt_handler->m_curr->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr != ' ') + line_is_blank = false; + // every \ is an escape + if(curr == '\\') + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + needs_filter = true; + if(next == '"' || next == '\\') + ++i; + } + else if(curr == '"') + { + pos = i; + break; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"'); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + s = s.sub(0, pos-1); + } + + _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true); + + return ScannedScalar { s, needs_filter }; +} + + +//----------------------------------------------------------------------------- +template +void ParseEngine::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref) +{ + _c4dbgpf("blck: indref={}", indref); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos); + + // nice explanation here: http://yaml-multiline.info/ + csubstr s = m_evt_handler->m_curr->line_contents.rem; + csubstr trimmed = s.triml(' '); + if(trimmed.str > s.str) + { + _c4dbgp("skipping whitespace"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str); + _line_progressed(static_cast(trimmed.str - s.str)); + s = trimmed; + } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); + + _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s); + + // parse the spec + BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used + size_t indentation = npos; // have to find out if no spec is given + csubstr digits; + if(s.len > 1) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>")); + csubstr t = s.sub(1); + _c4dbgpf("blck: spec is multichar: '{}'", t); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1); + size_t pos = t.first_of("-+"); + _c4dbgpf("blck: spec chomp char at {}", pos); + if(pos != npos) + { + if(t[pos] == '-') + chomp = CHOMP_STRIP; + else if(t[pos] == '+') + chomp = CHOMP_KEEP; + if(pos == 0) + t = t.sub(1); + else + t = t.first(pos); + } + // from here to the end, only digits are considered + digits = t.left_of(t.first_not_of("0123456789")); + if( ! digits.empty()) + { + if(C4_UNLIKELY(digits.len > 1)) + _c4err("parse error: invalid indentation"); + _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits); + if(C4_UNLIKELY( ! c4::atou(digits, &indentation))) + _c4err("parse error: could not read indentation as decimal"); + if(C4_UNLIKELY( ! indentation)) + _c4err("parse error: null indentation"); + _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref); + indentation += m_evt_handler->m_curr->indref; + } + } + + _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + + // finish the current line + _line_progressed(s.len); + _line_ended(); + _scan_line(); + + // start with a zero-length block, already pointing at the right place + substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin()); + + // read every full line into a raw block, + // from which newlines are to be stripped as needed. + // + // If no explicit indentation was given, pick it from the first + // non-empty line. See + // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator + size_t num_lines = 0; + size_t first = m_evt_handler->m_curr->pos.line; + size_t provisional_indentation = npos; + LineContents lc; + while(( ! _finished_file())) + { + // peek next line, but do not advance immediately + lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset); + _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped); + // evaluate termination conditions + if(indentation != npos) + { + _c4dbgpf("blck: indentation={}", indentation); + // stop when the line is deindented and not empty + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty())) + { + if(raw_block.len) + { + _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation); + } + else + { + _c4err("indentation decreased without any scalar"); + } + break; + } + else if(indentation == 0) + { + _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem); + if(_is_doc_token(lc.rem)) + { + _c4dbgp("blck: stop. indentation=0 and doc ended"); + break; + } + } + } + else + { + const size_t fns = lc.stripped.first_not_of(' '); + _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns); + if(fns != npos) // non-empty line + { + _RYML_WITH_TAB_TOKENS( + if(C4_UNLIKELY(lc.stripped.begins_with('\t'))) + _c4err("parse error"); + ) + _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation); + if(provisional_indentation == npos) + { + if(lc.indentation < indref) + { + _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref); + if(raw_block.len == 0) + { + _c4dbgp("blck: was empty, undo next line"); + _line_ended_undo(); + } + break; + } + else if(lc.indentation == m_evt_handler->m_curr->indref) + { + if(has_any(RSEQ|RMAP)) + { + _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref); + break; + } + } + _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation); + indentation = lc.indentation; + } + else + { + if(lc.indentation >= provisional_indentation) + { + _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); + //indentation = provisional_indentation ? provisional_indentation : lc.indentation; + indentation = lc.indentation; + } + else + { + break; + //_c4err("parse error: first non-empty block line should have at least the original indentation"); + } + } + } + else // empty line + { + _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); + if(provisional_indentation != npos) + { + if(lc.stripped.len >= provisional_indentation) + { + _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); + provisional_indentation = lc.stripped.len; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(lc.indentation >= provisional_indentation && lc.indentation != npos) + { + _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); + provisional_indentation = lc.indentation; + } + #endif + } + else + { + provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + if(provisional_indentation == npos) + { + provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + } + if(provisional_indentation < indref) + { + provisional_indentation = indref; + _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation); + } + } + } + } + // advance now that we know the folded scalar continues + m_evt_handler->m_curr->line_contents = lc; + _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem); + raw_block.len += m_evt_handler->m_curr->line_contents.full.len; + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + _line_ended(); + ++num_lines; + } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0)); + C4_UNUSED(num_lines); + C4_UNUSED(first); + + if(indentation == npos) + { + _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation); + indentation = provisional_indentation; + } + + if(num_lines) + _line_ended_undo(); + + _c4prscalar("scanned block", raw_block, /*keep_newlines*/true); + + sb->scalar = raw_block; + sb->indentation = indentation; + sb->chomp = chomp; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// a debugging scaffold: +#if 0 +#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfws(...) +#endif + +template +template +bool ParseEngine::_filter_ws_handle_to_first_non_space(FilterProcessor &proc) +{ + _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr())); + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t'); + + const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos); + if(first_pos != npos) + { + const char first_char = proc.src[first_pos]; + _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos); + if(first_char == '\n' || first_char == '\r') // skip trailing whitespace + { + _c4dbgfws("whitespace is trailing on line", ""); + proc.skip(first_pos - proc.rpos); + } + else // a legit whitespace + { + proc.copy(); + _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + } + return true; + } + _c4dbgfws("whitespace is trailing on line", ""); + return false; +} + +template +template +void ParseEngine::_filter_ws_copy_trailing(FilterProcessor &proc) +{ + if(!_filter_ws_handle_to_first_non_space(proc)) + { + _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos); + proc.copy(proc.src.len - proc.rpos); + } +} + +template +template +void ParseEngine::_filter_ws_skip_trailing(FilterProcessor &proc) +{ + if(!_filter_ws_handle_to_first_non_space(proc)) + { + _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos); + proc.skip(proc.src.len - proc.rpos); + } +} + +#undef _c4dbgfws + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/* plain scalars */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfps(fmt, ...) +#endif + +template +template +void ParseEngine::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + + _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation); + if(numnl_following) + { + proc.set('\n', numnl_following); + _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); + } + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) + { + proc.set(' '); + _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + else + { + _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len); + ii = proc.src.len; + } + } + proc.rpos = ii; +} + +template +template +auto ParseEngine::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result()) +{ + _RYML_CB_ASSERT(this->callbacks(), indentation != npos); + _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src); + + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case ' ': + _RYML_WITH_TAB_TOKENS(case '\t':) + _c4dbgfps("whitespace", curr); + _filter_ws_skip_trailing(proc); + break; + case '\n': + _c4dbgfps("newline", curr); + _filter_nl_plain(proc, /*indentation*/indentation); + break; + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 + _c4dbgfps("carriage return, ignore", curr); + proc.skip(); + break; + default: + proc.copy(); + break; + } + } + + _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfps + + +template +FilterResult ParseEngine::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_plain(proc, indentation); +} + +template +FilterResult ParseEngine::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation) +{ + FilterProcessorInplaceEndExtending proc(dst, cap); + return _filter_plain(proc, indentation); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/* single quoted */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfsq(fmt, ...) +#endif + +template +template +void ParseEngine::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + + _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii); + if(numnl_following) + { + proc.set('\n', numnl_following); + _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); + } + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) + { + proc.set(' '); + _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + else + { + proc.set(' '); + _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + } + proc.rpos = ii; +} + +template +template +auto ParseEngine::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()) +{ + _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src); + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case ' ': + case '\t': + _c4dbgfsq("whitespace", curr); + _filter_ws_copy_trailing(proc); + break; + case '\n': + _c4dbgfsq("newline", curr); + _filter_nl_squoted(proc); + break; + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 + _c4dbgfsq("skip cr", curr); + proc.skip(); + break; + case '\'': + _c4dbgfsq("squote", curr); + if(proc.next() == '\'') + { + _c4dbgfsq("two consecutive squotes", curr); + proc.skip(); + proc.copy(); + } + else + { + _c4err("filter error"); + } + break; + default: + proc.copy(); + break; + } + } + + _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfsq + +template +FilterResult ParseEngine::filter_scalar_squoted(csubstr scalar, substr dst) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_squoted(proc); +} + +template +FilterResult ParseEngine::filter_scalar_squoted_in_place(substr dst, size_t cap) +{ + FilterProcessorInplaceEndExtending proc(dst, cap); + return _filter_squoted(proc); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/* double quoted */ + +// a debugging scaffold: +#if 0 +#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfdq(...) +#endif + +template +template +void ParseEngine::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + + _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); + size_t ii = proc.rpos; + const size_t numnl_following = _count_following_newlines(proc.src, &ii); + if(numnl_following) + { + proc.set('\n', numnl_following); + _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii); + } + else + { + const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1); + if(ret != npos) + { + proc.set(' '); + _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + else + { + proc.set(' '); + _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar()); + } + if(ii < proc.src.len && proc.src.str[ii] == '\\') + { + _c4dbgfdq("backslash at [{}]", ii); + const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0'; + if(next == ' ' || next == '\t') + { + _c4dbgfdq("extend skip to backslash", ""); + ++ii; + } + } + } + proc.rpos = ii; +} + +template +template +void ParseEngine::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc) +{ + char next = proc.next(); + _c4dbgfdq("backslash, next='{}'", _c4prc(next)); + if(next == '\r') + { + if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n') + { + proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented) + next = '\n'; + _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos); + } + } + + if(next == '\n') + { + size_t ii = proc.rpos + 2; + for( ; ii < proc.src.len; ++ii) + { + // skip leading whitespace + if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t') + ; + else + break; + } + proc.skip(ii - proc.rpos); + } + else if(next == '"' || next == '/' || next == ' ' || next == '\t') + { + // escapes for json compatibility + proc.translate_esc(next); + _c4dbgfdq("here, used '{}'", _c4prc(next)); + } + else if(next == '\r') + { + proc.skip(); + } + else if(next == 'n') + { + proc.translate_esc('\n'); + } + else if(next == 'r') + { + proc.translate_esc('\r'); + } + else if(next == 't') + { + proc.translate_esc('\t'); + } + else if(next == '\\') + { + proc.translate_esc('\\'); + } + else if(next == 'x') // UTF8 + { + if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len)) + _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos); + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u); + _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos)); + uint8_t byteval = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &byteval))) + _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos); + proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u); + _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos)); + } + else if(next == 'u') // UTF16 + { + if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len)) + _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos); + char readbuf[8]; + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u); + uint32_t codepoint_val = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val))) + _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos); + const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + if(C4_UNLIKELY(numbytes == 0)) + _c4err("failed to decode code point={}", proc.rpos); + _RYML_CB_ASSERT(callbacks(), numbytes <= 4); + proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u); + } + else if(next == 'U') // UTF32 + { + if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len)) + _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos); + char readbuf[8]; + csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u); + uint32_t codepoint_val = {}; + if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val))) + _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos); + const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + if(C4_UNLIKELY(numbytes == 0)) + _c4err("failed to decode code point={}", proc.rpos); + _RYML_CB_ASSERT(callbacks(), numbytes <= 4); + proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u); + } + // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char + else if(next == '0') + { + proc.translate_esc('\0'); + } + else if(next == 'b') // backspace + { + proc.translate_esc('\b'); + } + else if(next == 'f') // form feed + { + proc.translate_esc('\f'); + } + else if(next == 'a') // bell character + { + proc.translate_esc('\a'); + } + else if(next == 'v') // vertical tab + { + proc.translate_esc('\v'); + } + else if(next == 'e') // escape character + { + proc.translate_esc('\x1b'); + } + else if(next == '_') // unicode non breaking space \u00a0 + { + // https://www.compart.com/en/unicode/U+00a0 + const char payload[] = { + _RYML_CHCONST(-0x3e, 0xc2), + _RYML_CHCONST(-0x60, 0xa0), + }; + proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1); + } + else if(next == 'N') // unicode next line \u0085 + { + // https://www.compart.com/en/unicode/U+0085 + const char payload[] = { + _RYML_CHCONST(-0x3e, 0xc2), + _RYML_CHCONST(-0x7b, 0x85), + }; + proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1); + } + else if(next == 'L') // unicode line separator \u2028 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + const char payload[] = { + _RYML_CHCONST(-0x1e, 0xe2), + _RYML_CHCONST(-0x80, 0x80), + _RYML_CHCONST(-0x58, 0xa8), + }; + proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1); + } + else if(next == 'P') // unicode paragraph separator \u2029 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + const char payload[] = { + _RYML_CHCONST(-0x1e, 0xe2), + _RYML_CHCONST(-0x80, 0x80), + _RYML_CHCONST(-0x57, 0xa9), + }; + proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1); + } + else if(next == '\0') + { + proc.skip(); + } + else + { + _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos); + } + _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar()); +} + + +template +template +auto ParseEngine::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()) +{ + _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src); + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case ' ': + case '\t': + { + _c4dbgfdq("whitespace", curr); + _filter_ws_copy_trailing(proc); + break; + } + case '\n': + { + _c4dbgfdq("newline", curr); + _filter_nl_dquoted(proc); + break; + } + case '\r': // skip \r --- https://stackoverflow.com/questions/1885900 + { + _c4dbgfdq("carriage return, ignore", curr); + proc.skip(); + break; + } + case '\\': + { + _filter_dquoted_backslash(proc); + break; + } + default: + { + proc.copy(); + break; + } + } + } + _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar()); + return proc.result(); +} + +#undef _c4dbgfdq + + +template +FilterResult ParseEngine::filter_scalar_dquoted(csubstr scalar, substr dst) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_dquoted(proc); +} + +template +FilterResultExtending ParseEngine::filter_scalar_dquoted_in_place(substr dst, size_t cap) +{ + FilterProcessorInplaceMidExtending proc(dst, cap); + return _filter_dquoted(proc); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// block filtering helpers + +template +template +void ParseEngine::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation) +{ + _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP); + _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos); + + // a debugging scaffold: + #if 0 + #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) + #else + #define _c4dbgchomp(...) + #endif + + // advance to the last line having spaces beyond the indentation + { + size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation); + if(last != npos) + { + _c4dbgchomp("found newline and larger indentation. last={}", last); + last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read. + _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len); + // remove indentation spaces, copy the rest + while((proc.rpos < last) && proc.has_more_chars()) + { + const char curr = proc.curr(); + _c4dbgchomp("curr='{}'", _c4prc(curr)); + switch(curr) + { + case '\n': + { + _c4dbgchomp("newline! remlen={}", proc.rem().len); + proc.copy(); + // are there spaces after the newline? + csubstr at_next_line = proc.rem(); + if(at_next_line.begins_with(' ')) + { + _c4dbgchomp("next line begins with spaces. indentation={}", indentation); + // there are spaces. + size_t first_non_space = at_next_line.first_not_of(' '); + _c4dbgchomp("first_non_space={}", first_non_space); + if(first_non_space == npos) + { + _c4dbgchomp("{} spaces, to the end", at_next_line.len); + first_non_space = at_next_line.len; + } + if(first_non_space <= indentation) + { + _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation); + proc.skip(first_non_space); + } + else + { + _c4dbgchomp("skip indentation={}{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfb(...) +#endif + +template +template +void ParseEngine::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation) +{ + csubstr rem = proc.rem(); // remaining + if(rem.len) + { + size_t first = rem.first_not_of(' '); + if(first != npos) + { + _c4dbgfb("{} spaces follow before next nonws character", first); + if(first < indentation) + { + _c4dbgfb("skip {}<{} spaces from indentation", first, indentation); + proc.skip(first); + } + else + { + _c4dbgfb("skip {} spaces from indentation", indentation); + proc.skip(indentation); + } + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else + { + _c4dbgfb("all spaces to the end: {} spaces", first); + first = rem.len; + if(first) + { + if(first < indentation) + { + _c4dbgfb("skip everything", first); + proc.skip(proc.src.len - proc.rpos); + } + else + { + _c4dbgfb("skip {} spaces from indentation", indentation); + proc.skip(indentation); + } + } + } + #endif + } +} + +template +template +size_t ParseEngine::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp) +{ + csubstr contents = proc.src.trimr(" \n\r"); + _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len); + if(!contents.len) + { + _c4dbgfb("ws: all whitespace: len={}", proc.src.len); + if(chomp == CHOMP_KEEP && proc.src.len) + { + _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n')); + while(proc.has_more_chars()) + { + const char curr = proc.curr(); + if(curr == '\n') + proc.copy(); + else + proc.skip(); + } + if(!proc.wpos) + { + proc.set('\n'); + } + } + } + return contents.len; +} + +template +template +size_t ParseEngine::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len) +{ + _c4dbgfb("contents_len={}", contents_len); + + _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u); + + // extend contents to just before the first newline at the end, + // in case it is preceded by spaces + size_t firstnewl = proc.src.first_of('\n', contents_len); + if(firstnewl != npos) + { + contents_len = firstnewl; + _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl); + } + else + { + contents_len = proc.src.len; + _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len); + } + + return contents_len; +} + +#undef _c4dbgfb + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// a debugging scaffold: +#if 0 +#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfbl(...) +#endif + +template +template +auto ParseEngine::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()) +{ + _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src); + + size_t contents_len = _handle_all_whitespace(proc, chomp); + if(!contents_len) + return proc.result(); + + contents_len = _extend_to_chomp(proc, contents_len); + + _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len)); + + _filter_block_indentation(proc, indentation); + + // now filter the bulk + while(proc.has_more_chars(/*maxpos*/contents_len)) + { + const char curr = proc.curr(); + _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + { + _c4dbgfbl("found newline. skip indentation on the next line", curr); + proc.copy(); // copy the newline + _filter_block_indentation(proc, indentation); + break; + } + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; + } + } + + _c4dbgfbl("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar()); + + _filter_chomp(proc, chomp, indentation); + + _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfbl + +template +FilterResult ParseEngine::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_block_literal(proc, indentation, chomp); +} + +template +FilterResult ParseEngine::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorInplaceEndExtending proc(scalar, cap); + return _filter_block_literal(proc, indentation, chomp); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// a debugging scaffold: +#if 0 +#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__) +#else +#define _c4dbgfbf(...) +#endif + + +template +template +void ParseEngine::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len) +{ + _filter_block_indentation(proc, indentation); + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + _c4dbgfbf("newline.", curr); + proc.copy(); + _filter_block_indentation(proc, indentation); + break; + case '\r': + proc.skip(); + break; + case ' ': + case '\t': + { + size_t first = proc.rem().first_not_of(" \t"); + _c4dbgfbf("space. first={}", first); + if(first == npos) + first = proc.rem().len; + _c4dbgfbf("... indentation increased to {}", first); + _filter_block_folded_indented_block(proc, indentation, len, first); + break; + } + default: + _c4dbgfbf("newl leading: not space, not newline. stop.", 0); + return; + } + } +} + +template +template +size_t ParseEngine::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl) +{ + switch(num_newl) + { + case 1u: + _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos); + wpos_at_first_newl = proc.wpos; + proc.skip(); + proc.set(' '); + break; + case 2u: + _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl); + _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos); + _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' '); + _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos); + proc.skip(); + proc.set_at(wpos_at_first_newl, '\n'); + _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n'); + break; + default: + _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl); + proc.copy(); + break; + } + return wpos_at_first_newl; +} + +template +template +void ParseEngine::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len) +{ + _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n'); + size_t num_newl = 0; + size_t wpos_at_first_newl = npos; + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + { + _c4dbgfbf("newline. sofar={}", num_newl); + // NOTE: vs2022-32bit-release builds were giving wrong + // results in this block, if it was written as either + // as a switch(num_newl) or its equivalent if-form. + // + // For this reason, we're using a dedicated function + // (**_compress), which seems to work around the issue. + // + // The manifested problem was that somewhere between the + // assignment to curr and this point, proc.wpos (the + // write-position of the processor) jumped to npos, which + // made the write wrap-around! To make things worse, + // enabling prints via _c4dbgpf() and _c4dbgfbf() made the + // problem go away! + // + // The only way to make the problem appear with prints + // enabled was by disabling all prints in this function + // (including in the block which was moved to the compress + // function) and then selectively enabling only some of + // those prints. + // + // This may be due to some bug in the cl-x86 optimizer; or + // it may be triggered by some UB which may be + // inadvertedly present in this function or in the filter + // processor. This is despite our best efforts to weed out + // any such UB problem: neither clang-tidy nor none of the + // sanitizers, or gcc's -fanalyzer pointed to any problems + // in this code. + // + // In the end, moving this block to a separate function + // was the only way to bury the problem. But it may + // resurface again, as The Undead, rising to from the + // grave to haunt us with his terrible presence. + // + // We may have to revisit this. With a stake, and lots of + // garlic. + wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl); + _filter_block_indentation(proc, indentation); + break; + } + case ' ': + case '\t': + { + size_t first = proc.rem().first_not_of(" \t"); + _c4dbgfbf("space. first={}", first); + if(first == npos) + first = proc.rem().len; + _c4dbgfbf("... indentation increased to {}", first); + if(num_newl) + { + _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl); + proc.set_at(wpos_at_first_newl, '\n'); + } + if(num_newl > 1u) + { + _c4dbgfbf("... add missing newline", wpos_at_first_newl); + proc.set('\n'); + } + _filter_block_folded_indented_block(proc, indentation, len, first); + num_newl = 0; + wpos_at_first_newl = npos; + break; + } + case '\r': + proc.skip(); + break; + default: + _c4dbgfbf("not space, not newline. stop.", 0); + return; + } + } +} + + +template +template +void ParseEngine::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept +{ + _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos)); + if(curr_indentation) + proc.copy(curr_indentation); + while(proc.has_more_chars(len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + { + proc.copy(); + _filter_block_indentation(proc, indentation); + csubstr rem = proc.rem(); + const size_t first = rem.first_not_of(' '); + _c4dbgfbf("newline. firstns={}", first); + if(first == 0) + { + const char c = rem[first]; + _c4dbgfbf("firstns={}='{}'", first, _c4prc(c)); + if(c == '\n' || c == '\r') + { + ; + } + else + { + _c4dbgfbf("done with indented block", first); + goto endloop; + } + } + else if(first != npos) + { + proc.copy(first); + _c4dbgfbf("copy all {} spaces", first); + } + break; + } + break; + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; + } + } + endloop: + return; +} + + +template +template +auto ParseEngine::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()) +{ + _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src); + + size_t contents_len = _handle_all_whitespace(proc, chomp); + if(!contents_len) + return proc.result(); + + contents_len = _extend_to_chomp(proc, contents_len); + + _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len)); + + _filter_block_folded_newlines_leading(proc, indentation, contents_len); + + // now filter the bulk + while(proc.has_more_chars(/*maxpos*/contents_len)) + { + const char curr = proc.curr(); + _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar()); + switch(curr) + { + case '\n': + { + _c4dbgfbf("found newline", curr); + _filter_block_folded_newlines(proc, indentation, contents_len); + break; + } + case '\r': + proc.skip(); + break; + default: + proc.copy(); + break; + } + } + + _c4dbgfbf("before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar()); + + _filter_chomp(proc, chomp, indentation); + + _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar()); + + return proc.result(); +} + +#undef _c4dbgfbf + +template +FilterResult ParseEngine::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorSrcDst proc(scalar, dst); + return _filter_block_folded(proc, indentation, chomp); +} + +template +FilterResult ParseEngine::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp) +{ + FilterProcessorInplaceEndExtending proc(scalar, cap); + return _filter_block_folded(proc, indentation, chomp); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_filter_scalar_plain(substr s, size_t indentation) +{ + _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid()); + _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); +} + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_filter_scalar_squot(substr s) +{ + _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_squoted_in_place(s, s.len); + _RYML_CB_ASSERT(this->callbacks(), r.valid()); + _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_filter_scalar_dquot(substr s) +{ + _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len); + if(C4_LIKELY(r.valid())) + { + _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); + } + else + { + const size_t len = r.required_len(); + _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len); + substr dst = m_evt_handler->alloc_arena(len, &s); + _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len); + _RYML_CB_ASSERT(this->callbacks(), dst.len == len); + FilterResult rsd = this->filter_scalar_dquoted(s, dst); + _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len); + _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller! + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); + } +} + + +//----------------------------------------------------------------------------- +template +csubstr ParseEngine::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp) +{ + _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp); + if(C4_LIKELY(r.valid())) + { + _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); + } + else + { + _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); + FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); + } +} + + +//----------------------------------------------------------------------------- +template +csubstr ParseEngine::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp) +{ + _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s); + FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp); + if(C4_LIKELY(r.valid())) + { + _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get()); + return r.get(); + } + else + { + _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len); + substr dst = m_evt_handler->alloc_arena(r.required_len(), &s); + FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid()); + _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get()); + return rsd.get(); + } +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_plain(sc.scalar, indentation); + } + else + { + _c4dbgp("plain scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + } + else + { + _c4dbgp("plain scalar doesn't need filtering"); + } + return maybe_filtered; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_plain(sc.scalar, indentation); + } + else + { + _c4dbgp("plain scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + } + else + { + _c4dbgp("plain scalar doesn't need filtering"); + } + return maybe_filtered; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_squot(sc.scalar); + } + else + { + _c4dbgp("squo key scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + } + else + { + _c4dbgp("squo key scalar doesn't need filtering"); + } + return maybe_filtered; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_squot(sc.scalar); + } + else + { + _c4dbgp("squo val scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + } + else + { + _c4dbgp("squo val scalar doesn't need filtering"); + } + return maybe_filtered; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_dquot(sc.scalar); + } + else + { + _c4dbgp("dquo scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + } + else + { + _c4dbgp("dquo scalar doesn't need filtering"); + } + return maybe_filtered; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc) +{ + csubstr maybe_filtered = sc.scalar; + if(sc.needs_filter) + { + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_dquot(sc.scalar); + } + else + { + _c4dbgp("dquo scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + } + else + { + _c4dbgp("dquo scalar doesn't need filtering"); + } + return maybe_filtered; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb) +{ + csubstr maybe_filtered = sb.scalar; + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp); + } + else + { + _c4dbgp("literal scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + return maybe_filtered; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb) +{ + csubstr maybe_filtered = sb.scalar; + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp); + } + else + { + _c4dbgp("literal scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + return maybe_filtered; +} + + +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb) +{ + csubstr maybe_filtered = sb.scalar; + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp); + } + else + { + _c4dbgp("folded scalar left unfiltered"); + m_evt_handler->mark_key_scalar_unfiltered(); + } + return maybe_filtered; +} + +template +csubstr ParseEngine::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb) +{ + csubstr maybe_filtered = sb.scalar; + if(m_options.scalar_filtering()) + { + maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp); + } + else + { + _c4dbgp("folded scalar left unfiltered"); + m_evt_handler->mark_val_scalar_unfiltered(); + } + return maybe_filtered; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#ifdef RYML_DBG // !!! <---------------------------------- + +template +void ParseEngine::add_flags(ParserFlag_t on, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, on); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on); + _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3); + s->flags |= on; +} + +template +void ParseEngine::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, on); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, off); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags); + csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off))); + _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4); + s->flags |= on; + s->flags &= ~off; +} + +template +void ParseEngine::rem_flags(ParserFlag_t off, ParserState * s) +{ + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = detail::_parser_flags_to_str(buf1_, off); + csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags); + csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off)); + _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3); + s->flags &= ~off; +} + +inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags) +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl) \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = #fl; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + } + + _prflag(RTOP); + _prflag(RUNK); + _prflag(RMAP); + _prflag(RSEQ); + _prflag(FLOW); + _prflag(BLCK); + _prflag(QMRK); + _prflag(RKEY); + _prflag(RVAL); + _prflag(RKCL); + _prflag(RNXT); + _prflag(SSCL); + _prflag(QSCL); + _prflag(RSET); + _prflag(RDOC); + _prflag(NDOC); + _prflag(USTY); + _prflag(RSEQIMAP); + + #undef _prflag + + if(pos == 0) + if(buf.len > 0) + buf[pos++] = '0'; + + RYML_CHECK(pos <= buf.len); + + return buf.first(pos); +} + +#endif // RYML_DBG !!! <---------------------------------- + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +csubstr ParseEngine::location_contents(Location const& loc) const +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len); + return m_buf.sub(loc.offset); +} + +template +Location ParseEngine::location(ConstNodeRef node) const +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable()); + return location(*node.tree(), node.id()); +} + +template +Location ParseEngine::location(Tree const& tree, id_type node) const +{ + // try hard to avoid getting the location from a null string. + Location loc; + if(_location_from_node(tree, node, &loc, 0)) + return loc; + return val_location(m_buf.str); +} + +template +bool ParseEngine::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const +{ + if(tree.has_key(node)) + { + csubstr k = tree.key(node); + if(C4_LIKELY(k.str != nullptr)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k)); + *loc = val_location(k.str); + return true; + } + } + + if(tree.has_val(node)) + { + csubstr v = tree.val(node); + if(C4_LIKELY(v.str != nullptr)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v)); + *loc = val_location(v.str); + return true; + } + } + + if(tree.is_container(node)) + { + if(_location_from_cont(tree, node, loc)) + return true; + } + + if(tree.type(node) != NOTYPE && level == 0) + { + // try the prev sibling + { + const id_type prev = tree.prev_sibling(node); + if(prev != NONE) + { + if(_location_from_node(tree, prev, loc, level+1)) + return true; + } + } + // try the next sibling + { + const id_type next = tree.next_sibling(node); + if(next != NONE) + { + if(_location_from_node(tree, next, loc, level+1)) + return true; + } + } + // try the parent + { + const id_type parent = tree.parent(node); + if(parent != NONE) + { + if(_location_from_node(tree, parent, loc, level+1)) + return true; + } + } + } + + return false; +} + +template +bool ParseEngine::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node)); + if(!tree.is_stream(node)) + { + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) + { + id_type child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(k.str && node_start > k.str) + node_start = k.str; + } + } + *loc = val_location(node_start); + return true; + } + else // it's a stream + { + *loc = val_location(m_buf.str); // just return the front of the buffer + } + return true; +} + + +template +Location ParseEngine::val_location(const char *val) const +{ + if(C4_UNLIKELY(val == nullptr)) + return {m_file, 0, 0, 0}; + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations()); + // NOTE: if any of these checks fails, the parser needs to be + // instantiated with locations enabled. + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty()); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0); + // NOTE: the pointer needs to belong to the buffer that was used to parse. + csubstr src = m_buf; + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); + // ok. search the first stored newline after the given ptr + using lineptr_type = size_t const* C4_RESTRICT; + lineptr_type lineptr = nullptr; + size_t offset = (size_t)(val - src.begin()); + if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD) + { + // just do a linear search if the size is small. + for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) + { + if(*curr > offset) + { + lineptr = curr; + break; + } + } + } + else + { + // do a bisection search if the size is not small. + // + // We could use std::lower_bound but this is simple enough and + // spares the costly include of . + size_t count = m_newline_offsets_size; + size_t step; + lineptr_type it; + lineptr = m_newline_offsets; + while(count) + { + step = count >> 1; + it = lineptr + step; + if(*it < offset) + { + lineptr = ++it; + count -= step + 1; + } + else + { + count = step; + } + } + } + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset); + Location loc; + loc.name = m_file; + loc.offset = offset; + loc.line = (size_t)(lineptr - m_newline_offsets); + if(lineptr > m_newline_offsets) + loc.col = (offset - *(lineptr-1) - 1u); + else + loc.col = offset; + return loc; +} + +template +void ParseEngine::_prepare_locations() +{ + m_newline_offsets_buf = m_buf; + size_t numnewlines = 1u + m_buf.count('\n'); + _resize_locations(numnewlines); + m_newline_offsets_size = 0; + for(size_t i = 0; i < m_buf.len; i++) + if(m_buf[i] == '\n') + m_newline_offsets[m_newline_offsets_size++] = i; + m_newline_offsets[m_newline_offsets_size++] = m_buf.len; + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines); +} + +template +void ParseEngine::_resize_locations(size_t numnewlines) +{ + if(numnewlines > m_newline_offsets_capacity) + { + if(m_newline_offsets) + _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); + m_newline_offsets_capacity = numnewlines; + } +} + +template +bool ParseEngine::_locations_dirty() const +{ + return !m_newline_offsets_size; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_flow_skip_whitespace() +{ + if(m_evt_handler->m_curr->line_contents.rem.len > 0) + { + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(rem.str[0] == ' ' || rem.str[0] == '\t') + { + _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0])); + _skipchars(" \t"); + rem = m_evt_handler->m_curr->line_contents.rem; + } + // comments + if(rem.begins_with('#')) + { + _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + } + } +} + + +//----------------------------------------------------------------------------- + + +template +void ParseEngine::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line) +{ + _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line); + if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations))) + _c4err("too many annotations"); + dst->annotations[dst->num_entries].str = str; + dst->annotations[dst->num_entries].indentation = indentation; + dst->annotations[dst->num_entries].line = line; + ++dst->num_entries; +} + +template +void ParseEngine::_clear_annotations(Annotation *C4_RESTRICT dst) +{ + dst->num_entries = 0; +} + +#ifdef RYML_NO_COVERAGE__TO_BE_DELETED +template +bool ParseEngine::_handle_indentation_from_annotations() +{ + if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line); + size_t to_skip = m_evt_handler->m_curr->indref; + if(m_pending_anchors.num_entries) + to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip; + if(m_pending_tags.num_entries) + to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip; + _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip); + _maybe_skipchars_up_to(' ', to_skip); + return true; + } + return false; +} +#endif + +template +bool ParseEngine::_annotations_require_key_container() const +{ + return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1; +} + +template +void ParseEngine::_check_tag(csubstr tag) +{ + if(!tag.begins_with("!<")) + { + if(C4_UNLIKELY(tag.first_of("[]{},") != npos)) + _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos); + } + else + { + if(C4_UNLIKELY(!tag.ends_with('>'))) + _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos); + } +} + +template +void ParseEngine::_handle_annotations_before_blck_key_scalar() +{ + _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id); + if(m_pending_tags.num_entries) + { + _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries); + if(C4_LIKELY(m_pending_tags.num_entries == 1)) + { + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + } + else + { + _c4err("too many tags"); + } + } + if(m_pending_anchors.num_entries) + { + _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries); + if(C4_LIKELY(m_pending_anchors.num_entries == 1)) + { + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + } + else + { + _c4err("too many anchors"); + } + } +} + +template +void ParseEngine::_handle_annotations_before_blck_val_scalar() +{ + _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id); + if(m_pending_tags.num_entries) + { + _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries); + if(C4_LIKELY(m_pending_tags.num_entries == 1)) + { + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + } + else + { + _c4err("too many tags"); + } + } + if(m_pending_anchors.num_entries) + { + _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries); + if(C4_LIKELY(m_pending_anchors.num_entries == 1)) + { + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + } + else + { + _c4err("too many anchors"); + } + } +} + +template +void ParseEngine::_handle_annotations_before_start_mapblck(size_t current_line) +{ + _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line); + if(m_pending_tags.num_entries == 2) + { + _c4dbgp("2 tags, setting entry 0"); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); + } + else if(m_pending_tags.num_entries == 1) + { + _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line); + if(m_pending_tags.annotations[0].line < current_line) + { + _c4dbgp("...tag is for the map. setting it."); + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + } + } + // + if(m_pending_anchors.num_entries == 2) + { + _c4dbgp("2 anchors, setting entry 0"); + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); + } + else if(m_pending_anchors.num_entries == 1) + { + _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line); + if(m_pending_anchors.annotations[0].line < current_line) + { + _c4dbgp("...anchor is for the map. setting it."); + m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + } + } +} + +template +void ParseEngine::_handle_annotations_before_start_mapblck_as_key() +{ + _c4dbgp("annotations_before_start_mapblck_as_key"); + if(m_pending_tags.num_entries == 2) + { + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); + } + if(m_pending_anchors.num_entries == 2) + { + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); + } +} + +template +void ParseEngine::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line) +{ + _c4dbgp("annotations_after_start_mapblck"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2); + if(m_pending_anchors.num_entries || m_pending_tags.num_entries) + { + key_indentation = _select_indentation_from_annotations(key_indentation, key_line); + switch(m_pending_tags.num_entries) + { + case 1u: + _check_tag(m_pending_tags.annotations[0].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str); + _clear_annotations(&m_pending_tags); + break; + case 2u: + _check_tag(m_pending_tags.annotations[1].str); + m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str); + _clear_annotations(&m_pending_tags); + break; + } + switch(m_pending_anchors.num_entries) + { + case 1u: + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str); + _clear_annotations(&m_pending_anchors); + break; + case 2u: + m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str); + _clear_annotations(&m_pending_anchors); + break; + } + } + _set_indentation(key_indentation); +} + +template +size_t ParseEngine::_select_indentation_from_annotations(size_t val_indentation, size_t val_line) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries); + // select the left-most annotation on the max line + auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0]; + for(size_t i = 0; i < m_pending_anchors.num_entries; ++i) + { + auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i]; + if(ann.line > curr->line) + curr = &ann; + else if(ann.indentation < curr->indentation) + curr = &ann; + } + for(size_t j = 0; j < m_pending_tags.num_entries; ++j) + { + auto const& C4_RESTRICT ann = m_pending_tags.annotations[j]; + if(ann.line > curr->line) + curr = &ann; + else if(ann.indentation < curr->indentation) + curr = &ann; + } + return curr->line < val_line ? val_indentation : curr->indentation; +} + +template +void ParseEngine::_handle_directive(csubstr rem) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem)); + const size_t pos = rem.find('#'); + _c4dbgpf("handle_directive: pos={} rem={}", pos, rem); + if(pos == npos) // no comments + { + m_evt_handler->add_directive(rem); + _line_progressed(rem.len); + } + else + { + csubstr to_comment = rem.first(pos); + csubstr trimmed = to_comment.trimr(" \t"); + m_evt_handler->add_directive(trimmed); + _line_progressed(pos); + _skip_comment(); + } +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_seq_json() +{ +seqjson_start: + _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT)); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqjson_again; + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RVAL]: '{}'", first); + switch(first) + { + case '"': + { + _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + break; + } + case '[': + { + _c4dbgp("seqjson[RVAL]: start child seqjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + break; + } + case '{': + { + _c4dbgp("seqjson[RVAL]: start child mapjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT); + _line_progressed(1); + goto seqjson_finish; + } + case ']': // this happens on a trailing comma like ", ]" + { + _c4dbgp("seqjson[RVAL]: end!"); + rem_flags(RSEQ); + m_evt_handler->end_seq(); + _line_progressed(1); + if(!has_all(RSEQ|FLOW)) + goto seqjson_finish; + break; + } + default: + { + ScannedScalar sc; + if(_scan_scalar_seq_json(&sc)) + { + _c4dbgp("seqjson[RVAL]: it's a plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("parse error"); + } + } + } + } + else // RNXT + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RNXT]: '{}'", first); + switch(first) + { + case ',': + { + _c4dbgp("seqjson[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + break; + } + case ']': + { + _c4dbgp("seqjson[RNXT]: end!"); + m_evt_handler->end_seq(); + _line_progressed(1); + goto seqjson_finish; + } + default: + _c4err("parse error"); + } + } + + seqjson_again: + _c4dbgt("seqjson: go again", 0); + if(_finished_line()) + { + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating ]"); + } + } + goto seqjson_start; + + seqjson_finish: + _c4dbgp("seqjson: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_map_json() +{ +mapjson_start: + _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT))); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapjson_again; + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RKEY]: '{}'", first); + switch(first) + { + case '"': + { + _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, RKEY); + break; + } + case '}': // this happens on a trailing comma like ", }" + { + _c4dbgp("mapjson[RKEY]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapjson_finish; + } + default: + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RVAL]: '{}'", first); + switch(first) + { + case '"': + { + _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar"); + ScannedScalar sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + break; + } + case '[': + { + _c4dbgp("mapjson[RVAL]: start val seqjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _line_progressed(1); + goto mapjson_finish; + } + case '{': + { + _c4dbgp("mapjson[RVAL]: start val mapjson"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + // keep going in this function + break; + } + default: + { + ScannedScalar sc; + if(_scan_scalar_map_json(&sc)) + { + _c4dbgp("mapjson[RVAL]: plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("parse error"); + } + break; + } + } + } + else if(has_any(RKCL)) // read the key colon + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapjson[RKCL]: '{}'", first); + if(first == ':') + { + _c4dbgp("mapjson[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]); + if(rem.begins_with(',')) + { + _c4dbgp("mapjson[RNXT]: expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + } + else if(rem.begins_with('}')) + { + _c4dbgp("mapjson[RNXT]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapjson_finish; + } + else + { + _c4err("parse error"); + } + } + + mapjson_again: + _c4dbgt("mapjson: go again", 0); + if(_finished_line()) + { + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating }"); + } + } + goto mapjson_start; + + mapjson_finish: + _c4dbgp("mapjson: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_seq_imap() +{ +seqimap_start: + _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqimap_again; + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first)); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '"') + { + _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("seqimap[RVAL]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '[') + { + _c4dbgp("seqimap[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RVAL, RNXT|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == '{') + { + _c4dbgp("seqimap[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[RVAL]: finish without val."); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgp("seqimap[RVAL]: anchor!"); + m_evt_handler->set_val_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgp("seqimap[RVAL]: ref!"); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first)); + if(first == ',' || first == ']') + { + // we may get here because a map or a seq started and we + // return later + _c4dbgp("seqimap: done"); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first)); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + else if(first == '"') + { + _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("seqimap[QMRK]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); + goto seqimap_again; + } + else if(first == '[') + { + _c4dbgp("seqimap[QMRK]: start child seqflow"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == '{') + { + _c4dbgp("seqimap[QMRK]: start child mapflow"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto seqimap_finish; + } + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[QMRK]: finish without key."); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgp("seqimap[QMRK]: anchor!"); + m_evt_handler->set_key_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgp("seqimap[QMRK]: ref!"); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RKCL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL)); + const char first = rem.str[0]; + _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first)); + if(first == ':') + { + _c4dbgp("seqimap[RKCL]: found ':'"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + goto seqimap_again; + } + else if(first == ',' || first == ']') + { + _c4dbgp("seqimap[RKCL]: found ','. finish without val"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + goto seqimap_finish; + } + else + { + _c4err("parse error"); + } + } + + seqimap_again: + _c4dbgt("seqimap: go again", 0); + if(_finished_line()) + { + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("parse error"); + } + } + goto seqimap_start; + + seqimap_finish: + _c4dbgp("seqimap: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_seq_flow() +{ +seqflow_start: + _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqflow_again; + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '"') + { + _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_seq_flow(&sc)) + { + _c4dbgp("seqflow[RVAL]: it's a scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '[') + { + _c4dbgp("seqflow[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + } + else if(first == '{') + { + _c4dbgp("seqflow[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == ']') // this happens on a trailing comma like ", ]" + { + _c4dbgp("seqflow[RVAL]: end!"); + _line_progressed(1); + m_evt_handler->end_seq(); + goto seqflow_finish; + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_val_anchor(anchor); + if(_maybe_scan_following_comma()) + { + _c4dbgp("seqflow[RVAL]: empty scalar!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_val_tag(tag); + if(_maybe_scan_following_comma()) + { + _c4dbgp("seqflow[RVAL]: empty scalar!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + } + } + else if(first == ':') + { + _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == '?') + { + _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key"); + addrem_flags(RNXT, RVAL); + m_was_inside_qmrk = true; + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqflow_finish; + } + else + { + _c4err("parse error"); + } + } + else // RNXT + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + const char first = rem.str[0]; + if(first == ',') + { + _c4dbgp("seqflow[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + } + else if(first == ']') + { + _c4dbgp("seqflow[RNXT]: end!"); + m_evt_handler->end_seq(); + _line_progressed(1); + goto seqflow_finish; + } + else if(first == ':') + { + _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id); + m_evt_handler->actually_val_is_first_key_of_new_map_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + addrem_flags(RSEQIMAP|RVAL, RNXT); + goto seqflow_finish; + } + else + { + _c4err("parse error"); + } + } + + seqflow_again: + _c4dbgt("seqflow: go again", 0); + if(_finished_line()) + { + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating ]"); + } + } + goto seqflow_start; + + seqflow_finish: + _c4dbgp("seqflow: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_map_flow() +{ +mapflow_start: + _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK))); + + _handle_flow_skip_whitespace(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapflow_again; + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RKEY]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); + } + else if(first == '"') + { + _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("mapflow[RKEY]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, RKEY|QMRK); + } + else if(first == '?') + { + _c4dbgp("mapflow[RKEY]: explicit key"); + _line_progressed(1); + addrem_flags(QMRK, RKEY); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':') + { + _c4dbgp("mapflow[RKEY]: setting empty key"); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '}') // this happens on a trailing comma like ", }" + { + _c4dbgp("mapflow[RKEY]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_key_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, RKEY); + } + else if(first == '[') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[RKEY]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RMAP|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[RKEY]: start child mapflow (!)"); + addrem_flags(RKCL, RKEY); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RKEY, RVAL|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + // keep going in this function + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_key_tag(tag); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RKCL)) // read the key colon + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RKCL]: '{}'", first); + if(first == ':') + { + _c4dbgp("mapflow[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + } + else if(first == '}') + { + _c4dbgp("mapflow[RKCL]: end with missing val!"); + addrem_flags(RVAL, RKCL); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == ',') + { + _c4dbgp("mapflow[RKCL]: got comma. val is missing"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RKCL); + _line_progressed(1); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[RVAL]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '"') + { + _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("mapflow[RVAL]: plain scalar."); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '[') + { + _c4dbgp("mapflow[RVAL]: start val seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '{') + { + _c4dbgp("mapflow[RVAL]: start val mapflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + // keep going in this function + } + else if(first == '}') + { + _c4dbgp("mapflow[RVAL]: end!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_val_anchor(anchor); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_val_tag(tag); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]); + if(rem.begins_with(',')) + { + _c4dbgp("mapflow[RNXT]: expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + } + else if(rem.begins_with('}')) + { + _c4dbgp("mapflow[RNXT]: end!"); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + const char first = rem.str[0]; + _c4dbgpf("mapflow[QMRK]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(first == '"') + { + _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + // block scalars (ie | and >) cannot appear in flow containers + else if(_scan_scalar_plain_map_flow(&sc)) + { + _c4dbgp("mapflow[QMRK]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(first == ':') + { + _c4dbgp("mapflow[QMRK]: setting empty key"); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, QMRK); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '}') // this happens on a trailing comma like ", }" + { + _c4dbgp("mapflow[QMRK]: end!"); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_map(); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->set_key_anchor(anchor); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); + } + else if(first == '[') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[QMRK]: start child seqflow (!)"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|RVAL, RMAP|RKCL); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapflow_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree sink. Other sink types may be + // able to handle it. + _c4dbgp("mapflow[QMRK]: start child mapflow (!)"); + addrem_flags(RKCL, QMRK); + m_evt_handler->begin_map_key_flow(); + _set_indentation(m_evt_handler->m_parent->indref); + addrem_flags(RKEY, RKCL); + _line_progressed(1); + // keep going in this function + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag); + _check_tag(tag); + m_evt_handler->set_key_tag(tag); + } + else + { + _c4err("parse error"); + } + } + + mapflow_again: + _c4dbgt("mapflow: go again", 0); + if(_finished_line()) + { + if(C4_LIKELY(!_finished_file())) + { + _line_ended(); + _scan_line(); + _c4dbgnextline(); + } + else + { + _c4err("missing terminating }"); + } + } + goto mapflow_start; + + mapflow_finish: + _c4dbgp("mapflow: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_seq_block() +{ +seqblck_start: + _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT))); + + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + + if(has_any(RVAL)) + { + _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + if(m_evt_handler->m_curr->at_line_beginning()) + { + _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(m_evt_handler->m_curr->indentation_ge()) + { + _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("seqblck[RVAL]: smaller indentation!"); + _handle_indentation_pop_from_block_seq(); + goto seqblck_finish; + } + else if(m_evt_handler->m_curr->line_contents.indentation == npos) + { + _c4dbgp("seqblck[RVAL]: empty line!"); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + goto seqblck_again; + } + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else + { + // accomodate annotation on the previous line. eg: + // - &elm + // foo # <-- on this line + // - &elm + // &foo foo: bar # <-- on this line + if(rem.str[0] == ' ') + { + if(_handle_indentation_from_annotations()) + { + _c4dbgp("seqblck[RVAL]: annotations!"); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + } + } + #endif + _RYML_CB_ASSERT(callbacks(), rem.len); + _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id); + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + // warning: the gcc optimizer on x86 builds is brittle with + // this function: + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("seqblck[RVAL]: single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL! + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + else if(first == '"') + { + _c4dbgp("seqblck[RVAL]: double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL! + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + // block scalars can only appear as keys when in QMRK scope + // (ie, after ? tokens), so no need to scan following colon in + // here. + else if(first == '|') + { + _c4dbgp("seqblck[RVAL]: block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '>') + { + _c4dbgp("seqblck[RVAL]: block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(_scan_scalar_plain_seq_blck(&sc)) + { + _c4dbgp("seqblck[RVAL]: plain scalar."); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL! + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent > m_evt_handler->m_curr->indref) + { + _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent)) + { + _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->end_seq(); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RVAL, RNXT|RKEY); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } + } + } + else if(first == '[') + { + _c4dbgp("seqblck[RVAL]: start child seqflow"); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(FLOW|RVAL, BLCK|RNXT); + _line_progressed(1); + _set_indentation(m_evt_handler->m_parent->indref + 1u); + goto seqblck_finish; + } + else if(first == '{') + { + _c4dbgp("seqblck[RVAL]: start child mapflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT); + _line_progressed(1); + _set_indentation(m_evt_handler->m_parent->indref + 1u); + goto seqblck_finish; + } + else if(first == '-') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("seqblck[RVAL]: prev val was empty"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + // keep in RVAL, but for the next sibling + m_evt_handler->add_sibling(); + } + else + { + _c4dbgp("seqblck[RVAL]: start child seqblck"); + _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RVAL, RNXT); + _save_indentation(); + // keep going on inside this function + } + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':') + { + _c4dbgp("seqblck[RVAL]: start child mapblck with empty key"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else if(first == '&') + { + const csubstr anchor = _scan_anchor(); + _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + // we need to buffer the anchors, as there may be two + // consecutive anchors in here + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '*') + { + csubstr ref = _scan_ref_seq(); + _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("seqblck[RVAL]: set ref as val!"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else + { + _c4dbgp("seqblck[RVAL]: ref is key of map"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + addrem_flags(RMAP|RVAL, RSEQ|RNXT); + _set_indentation(startindent); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '?') + { + _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key"); + addrem_flags(RNXT, RVAL); + m_was_inside_qmrk = true; + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|QMRK, RSEQ|RNXT); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } + } + else // RNXT + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + // + // handle indentation + // + _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(C4_UNLIKELY(!_at_line_begin())) + _c4err("parse error"); + if(m_evt_handler->m_curr->indentation_ge()) + { + _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + _maybe_skip_whitespace_tokens(); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("seqblck[RNXT]: smaller indentation!"); + _handle_indentation_pop_from_block_seq(); + if(has_all(RSEQ|BLCK)) + { + _c4dbgp("seqblck[RNXT]: still seqblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT)); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + else + { + _c4dbgp("seqblck[RNXT]: no longer seqblck!"); + goto seqblck_finish; + } + } + else if(m_evt_handler->m_curr->line_contents.indentation == npos) + { + _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto seqblck_again; + } + // + // now handle the tokens + // + const char first = rem.str[0]; + _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id); + if(first == '-') + { + if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem)) + { + _c4dbgp("seqblck[RNXT]: expect next val"); + addrem_flags(RVAL, RNXT); + m_evt_handler->add_sibling(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4dbgp("seqblck[RNXT]: start doc"); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + } + else if(first == ':') + { + // This happens for example in `- [a: b]: c` (after + // terminating the seq, ie, after `]`). All other cases + // (ie colon after scalars) are caught elsewhere (ie, in + // RVAL state). + auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent; + if(C4_LIKELY(prev_state && (prev_state->flags & RMAP))) + { + _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map"); + m_evt_handler->end_seq(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("seqblck[RNXT]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("seqblck[RNXT]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto seqblck_finish; + } + else + { + _c4err("parse error"); + } + } + else + { + // may be an indentless sequence nested in a map... + //if(m_evt_handler->m_stack.size() >= 2) + #ifdef RYML_DBG + char flagbuf_[128]; + for(auto const& s : m_evt_handler->m_stack) + { + _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags)); + } + #endif + if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref) + { + _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id); + _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent); + _handle_indentation_pop(m_evt_handler->m_parent); + _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK)); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + goto seqblck_finish; + } + else //if(first != '*') + { + _c4err("parse error"); + } + } + } + + seqblck_again: + _c4dbgt("seqblck: go again", 0); + if(_finished_line()) + { + _line_ended(); + _scan_line(); + if(_finished_file()) + { + _c4dbgp("seqblck: finish!"); + _end_seq_blck(); + goto seqblck_finish; + } + _c4dbgnextline(); + } + goto seqblck_start; + + seqblck_finish: + _c4dbgp("seqblck: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_map_block() +{ +mapblck_start: + _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref); + + // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK))); + + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[RKEY]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(has_all(RMAP|BLCK)) + { + _c4dbgp("mapblck[RKEY]: still mapblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY)); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else + { + _c4dbgp("mapblck[RKEY]: no longer mapblck!"); + goto mapblck_finish; + } + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt()); + _c4err("invalid indentation"); + } + } + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[RKEY]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '"') + { + _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + // block scalars (| and >) can not be used as keys unless they + // appear in an explicit QMRK scope (ie, after the ? token), + else if(C4_UNLIKELY(first == '|')) + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(C4_UNLIKELY(first == '>')) + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(_scan_scalar_plain_map_blck(&sc)) + { + _c4dbgp("mapblck[RKEY]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?') + { + _c4dbgp("mapblck[RKEY]: key token!"); + addrem_flags(QMRK, RKEY); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + m_was_inside_qmrk = true; + goto mapblck_again; + } + else if(first == ':') + { + _c4dbgp("mapblck[RKEY]: setting empty key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_ref(ref); + addrem_flags(RVAL, RKEY); + if(!_maybe_scan_following_colon()) + _c4err("could not find ':' colon after key"); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag); + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '[') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree handler. Other handlers may be + // able to handle it. + _c4dbgp("mapblck[RKEY]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL); + _line_progressed(1); + _set_indentation(startindent); + goto mapblck_finish; + } + else if(first == '{') + { + // RYML's tree cannot store container keys, but that's + // handled inside the tree handler. Other handlers may be + // able to handle it. + _c4dbgp("mapblck[RKEY]: start child mapflow (!)"); + addrem_flags(RKCL, RKEY); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_map_key_flow(); + addrem_flags(FLOW|RKEY, BLCK|RKCL); + _line_progressed(1); + _set_indentation(startindent); + goto mapblck_finish; + } + else if(first == '-') + { + _c4dbgp("mapblck[RKEY]: maybe doc?"); + if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem)) + { + _c4dbgp("mapblck[RKEY]: end+start doc"); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("mapblck[RKEY]: maybe end doc?"); + if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem)) + { + _c4dbgp("mapblck[RKEY]: end doc"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + _RYML_WITH_TAB_TOKENS( + else if(first == '\t') + { + _c4dbgp("mapblck[RKEY]: skip tabs"); + _maybe_skipchars('\t'); + }) + else + { + _c4err("parse error"); + } + } + else if(has_any(RKCL)) // read the key colon + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())) + { + _c4err("invalid indentation"); + } + } + const char first = rem.str[0]; + _c4dbgpf("mapblck[RKCL]: '{}'", first); + if(first == ':') + { + _c4dbgp("mapblck[RKCL]: found the colon"); + addrem_flags(RVAL, RKCL); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?') + { + _c4dbgp("mapblck[RKCL]: got '?'. val was empty"); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(QMRK, RKCL); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '-') + { + if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem)) + { + _c4dbgp("mapblck[RKCL]: end+start doc"); + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem)); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("mapblck[RKCL]: maybe end doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("mapblck[RKCL]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else if(m_was_inside_qmrk) + { + _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq()); + _c4dbgp("mapblck[RKCL]: missing :"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + m_was_inside_qmrk = false; + addrem_flags(RKEY, RKCL); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + m_evt_handler->m_curr->more_indented = false; + if(m_evt_handler->m_curr->indref == npos) + { + _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgp("mapblck[RVAL]: skip indentation!"); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + // TODO: this is valid: + // + // ```yaml + // a: + // b: + // --- + // a: + // b + // --- + // a: + // b: c + // ``` + // + // ... but this is not: + // + // ```yaml + // a: + // v + // --- + // a: b: c + // ``` + // + // here, we probably need to set a boolean on the state + // to disambiguate between these cases. + } + else if(m_evt_handler->m_curr->indentation_gt()) + { + _c4dbgp("mapblck[RVAL]: more indented!"); + m_evt_handler->m_curr->more_indented = true; + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[RVAL]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + if(has_all(RMAP|BLCK)) + { + _c4dbgp("mapblck[RVAL]: still mapblck!"); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + goto mapblck_again; + } + else + { + _c4dbgp("mapblck[RVAL]: no longer mapblck!"); + goto mapblck_finish; + } + } + else if(m_evt_handler->m_curr->line_contents.indentation == npos) + { + _c4dbgp("mapblck[RVAL]: empty line!"); + _line_progressed(m_evt_handler->m_curr->line_contents.rem.len); + goto mapblck_again; + } + } + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[RVAL]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL! + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } + } + } + else if(first == '"') + { + _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL! + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + addrem_flags(RNXT, RVAL); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } + } + } + // block scalars can only appear as keys when in QMRK scope + // (ie, after ? tokens), so no need to scan following colon + else if(first == '|') + { + _c4dbgp("mapblck[RVAL]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(first == '>') + { + _c4dbgp("mapblck[RVAL]: scanning block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else if(_scan_scalar_plain_map_blck(&sc)) + { + _c4dbgp("mapblck[RVAL]: plain scalar."); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL! + m_evt_handler->set_val_scalar_plain(maybe_filtered); + addrem_flags(RNXT, RVAL); + } + else + { + if(startindent != m_evt_handler->m_curr->indref) + { + _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(m_evt_handler->m_curr->line_contents.indentation); + // keep the child state on RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: prev val empty+this is a key"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + m_evt_handler->set_key_scalar_plain(maybe_filtered); + // keep going on RVAL + _maybe_skip_whitespace_tokens(); + } + } + } + else if(first == '-') + { + if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t')) + { + _c4dbgp("mapblck[RVAL]: start val seqblck"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|RVAL, RMAP|RNXT); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem)) + { + _c4dbgp("mapblck[RVAL]: end+start doc"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem)); + _start_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else if(first == '[') + { + _c4dbgp("mapblck[RVAL]: start val seqflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK); + _set_indentation(m_evt_handler->m_curr->indref + 1u); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '{') + { + _c4dbgp("mapblck[RVAL]: start val mapflow"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT); + m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation; + _set_indentation(m_evt_handler->m_curr->indref + 1u); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref); + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + else + { + _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref); + if(_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[RVAL]: start child map, block"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_ref(ref); + _set_indentation(startindent); + // keep going in RVAL + addrem_flags(RVAL, RNXT); + } + else + { + _c4dbgp("mapblck[RVAL]: was val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + addrem_flags(RNXT, RVAL); + } + } + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RVAL); + } + // we need to buffer the anchors, as there may be two + // consecutive anchors in here + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag); + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RVAL); + } + // we need to buffer the tags, as there may be two + // consecutive tags in here + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '?') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: got '?'. val was empty"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + addrem_flags(QMRK, RVAL); + } + else if(startindent > m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: start val mapblck"); + addrem_flags(RNXT, RVAL); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(QMRK|BLCK, RNXT); + _set_indentation(startindent); + } + else + { + _c4err("parse error"); + } + m_was_inside_qmrk = true; + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_again; + } + else if(first == ':') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well"); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + goto mapblck_again; + } + else + { + _c4err("parse error"); + } + } + else if(first == '.') + { + _c4dbgp("mapblck[RVAL]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("seqblck[RVAL]: end doc expl"); + _end_doc_suddenly(); + _line_progressed(3); + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + _RYML_WITH_TAB_TOKENS( + else if(first == '\t') + { + _c4dbgp("mapblck[RVAL]: skip tabs"); + _maybe_skipchars('\t'); + }) + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation); + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + _c4dbgp("mapblck[RNXT]: speculatively expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[RNXT]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + if(has_all(RMAP|BLCK)) + { + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(!has_any(RKCL)) + { + _c4dbgp("mapblck[RNXT]: speculatively expect next keyval"); + m_evt_handler->add_sibling(); + addrem_flags(RKEY, RNXT); + } + goto mapblck_again; + } + else + { + goto mapblck_finish; + } + } + } + // + // handle tokens + // + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0); + const char first = rem.str[0]; + _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first)); + if(first == ':') + { + if(m_evt_handler->m_curr->more_indented) + { + _c4dbgp("mapblck[RNXT]: start child block map"); + C4_NOT_IMPLEMENTED(); + //m_evt_handler->actually_as_block_map(); + _line_progressed(1); + _set_indentation(m_evt_handler->m_curr->scalar_col); + m_evt_handler->m_curr->more_indented = false; + goto mapblck_again; + } + else + { + _c4err("parse error"); + } + } + else if(first == ' ') + { + _c4dbgp("mapblck[RNXT]: skip spaces"); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4err("parse error"); + } + } + else if(has_any(QMRK)) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT)); + // + // handle indentation + // + if(m_evt_handler->m_curr->at_line_beginning()) + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos); + if(m_evt_handler->m_curr->indentation_eq()) + { + _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref); + _line_progressed(m_evt_handler->m_curr->indref); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else if(m_evt_handler->m_curr->indentation_lt()) + { + _c4dbgp("mapblck[QMRK]: smaller indentation!"); + _handle_indentation_pop_from_block_map(); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + if(has_all(RMAP|BLCK)) + { + _c4dbgp("mapblck[QMRK]: still mapblck!"); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK)); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + else + { + _c4dbgp("mapblck[QMRK]: no longer mapblck!"); + goto mapblck_finish; + } + } + // indentation can be larger in QMRK state + else + { + _c4dbgp("mapblck[QMRK]: larger indentation !"); + _line_progressed(m_evt_handler->m_curr->line_contents.indentation); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + goto mapblck_again; + } + } + // + // now handle the tokens + // + const char first = rem.str[0]; + const size_t startline = m_evt_handler->m_curr->pos.line; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(); + _c4dbgpf("mapblck[QMRK]: '{}'", first); + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '"') + { + _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '|') + { + _c4dbgp("mapblck[QMRK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY! + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_literal(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(first == '>') + { + _c4dbgp("mapblck[QMRK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, m_evt_handler->m_curr->indref + 1); + csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY! + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_folded(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else if(_scan_scalar_plain_map_blck(&sc)) + { + _c4dbgp("mapblck[QMRK]: plain scalar"); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY! + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == ':') + { + if(startindent == m_evt_handler->m_curr->indref) + { + _c4dbgp("mapblck[QMRK]: empty key"); + addrem_flags(RVAL, QMRK); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_start_mapblck_as_key(); + m_evt_handler->begin_map_key_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_scalar_plain({}); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("mapblck[QMRK]: set ref as key"); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->set_key_ref(ref); + addrem_flags(RKCL, QMRK); + } + else + { + _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key"); + addrem_flags(RKCL, QMRK); + _handle_annotations_before_blck_key_scalar(); + m_evt_handler->begin_map_key_block(); + m_evt_handler->set_key_ref(ref); + _set_indentation(startindent); + // keep the child state on RVAL + addrem_flags(RVAL, RKCL|QMRK); + } + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor); + _add_annotation(&m_pending_anchors, anchor, startindent, startline); + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag); + _add_annotation(&m_pending_tags, tag, startindent, startline); + } + else if(first == '-') + { + _c4dbgp("mapblck[QMRK]: maybe doc?"); + csubstr rs = rem.sub(1); + if(rs == "--" || rs.begins_with("-- ")) + { + _c4dbgp("mapblck[QMRK]: end+start doc"); + _start_doc_suddenly(); + _line_progressed(3); + } + else + { + _c4dbgp("mapblck[QMRK]: start child seqblck (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_seq_key_block(); + addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK); + _set_indentation(startindent); + _line_progressed(1); + } + _maybe_skip_whitespace_tokens(); + goto mapblck_finish; + } + else if(first == '[') + { + _c4dbgp("mapblck[QMRK]: start child seqflow (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '{') + { + _c4dbgp("mapblck[QMRK]: start child mapblck (!)"); + addrem_flags(RKCL, RKEY|QMRK); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK); + _set_indentation(m_evt_handler->m_parent->indref); + _line_progressed(1); + goto mapblck_finish; + } + else if(first == '?') + { + _c4dbgp("mapblck[QMRK]: another QMRK '?'"); + m_evt_handler->set_key_scalar_plain({}); + m_evt_handler->set_val_scalar_plain({}); + m_evt_handler->add_sibling(); + _line_progressed(1); + } + else if(first == '.') + { + _c4dbgp("mapblck[QMRK]: maybe end doc?"); + csubstr rs = rem.sub(1); + if(rs == ".." || rs.begins_with(".. ")) + { + _c4dbgp("mapblck[QMRK]: end+start doc"); + _end_doc_suddenly(); + _line_progressed(3); + goto mapblck_finish; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("parse error"); + } + } + + mapblck_again: + _c4dbgt("mapblck: again", 0); + if(_finished_line()) + { + _line_ended(); + _scan_line(); + if(_finished_file()) + { + _c4dbgp("mapblck: file finished!"); + _end_map_blck(); + goto mapblck_finish; + } + _c4dbgnextline(); + } + goto mapblck_start; + + mapblck_finish: + _c4dbgp("mapblck: finish"); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_unk_json() +{ + _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP)); + + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping indentation of {}", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } + + if(rem.begins_with('[')) + { + _c4dbgp("it's a seq"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + m_doc_empty = false; + _line_progressed(1); + } + else if(rem.begins_with('{')) + { + _c4dbgp("it's a map"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC); + m_doc_empty = false; + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + _line_progressed(1); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + _maybe_skip_whitespace_tokens(); + csubstr s = m_evt_handler->m_curr->line_contents.rem; + if(!s.len) + return; + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const char first = s.str[0]; + ScannedScalar sc; + if(first == '"') + { + _c4dbgp("runk_json: scanning double-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_dquot(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk_json: set as val"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + } + else + { + _c4err("parse error"); + } + } + else if(_scan_scalar_plain_unk(&sc)) + { + _c4dbgp("runk_json: got a plain scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk_json: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("parse error"); + } + } +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::_handle_unk() +{ + _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP)); + + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping {} whitespace characters", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } + + if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin()) + { + const char first = rem.str[0]; + _c4dbgp("rtop: zero indent + at line begin"); + if(first == '-') + { + _c4dbgp("rtop: suspecting doc"); + if(_is_doc_begin_token(rem)) + { + _c4dbgp("rtop: begin doc"); + _maybe_end_doc(); + _begin2_doc_expl(); + _set_indentation(0); + addrem_flags(RDOC|RUNK, NDOC); + _line_progressed(3u); + _maybe_skip_whitespace_tokens(); + return; + } + } + else if(first == '.') + { + _c4dbgp("rtop: suspecting doc end"); + if(_is_doc_end_token(rem)) + { + _c4dbgp("rtop: end doc"); + if(has_any(RDOC)) + { + _end2_doc_expl(); + } + else + { + _c4dbgp("rtop: ignore end doc"); + } + addrem_flags(NDOC|RUNK, RDOC); + _line_progressed(3u); + _maybe_skip_whitespace_tokens(); + return; + } + } + else if(first == '%') + { + _c4dbgpf("directive: {}", rem); + if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC))) + _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives"); + _handle_directive(rem); + return; + } + } + + /* no else-if! */ + char first = rem.str[0]; + + if(first == '[') + { + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem); + if(C4_LIKELY( ! _annotations_require_key_container())) + { + _c4dbgp("it's a seq, flow"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC); + _set_indentation(startindent); + } + else + { + _c4dbgp("start new block map, set flow seq as key (!)"); + _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC); + _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_seq_key_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL); + _set_indentation(startindent); + } + _line_progressed(1); + } + else if(first == '{') + { + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem); + if(C4_LIKELY( ! _annotations_require_key_container())) + { + _c4dbgp("it's a map, flow"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC); + _set_indentation(startindent); + } + else + { + _c4dbgp("start new block map, set flow map as key (!)"); + _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC); + _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line); + m_evt_handler->begin_map_key_flow(); + addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL); + _set_indentation(startindent); + } + _line_progressed(1); + } + else if(first == '-' && _is_blck_token(rem)) + { + _c4dbgp("it's a seq, block"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC); + m_doc_empty = false; + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("it's a map + this key is complex"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK); + m_doc_empty = false; + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':' && _is_blck_token(rem)) + { + if(m_doc_empty) + { + _c4dbgp("it's a map with an empty key"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_scalar_plain({}); + m_doc_empty = false; + _save_indentation(); + } + else + { + _c4dbgp("actually prev val is a key!"); + size_t prev_indentation = m_evt_handler->m_curr->indref; + m_evt_handler->actually_val_is_first_key_of_new_map_block(); + _set_indentation(prev_indentation); + } + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + m_doc_empty = false; + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + } + else + { + _c4dbgp("runk: start new block map, set ref as key"); + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + _maybe_skip_whitespace_tokens(); + csubstr s = m_evt_handler->m_curr->line_contents.rem; + if(!s.len) + return; + const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + first = s.str[0]; + ScannedScalar sc; + if(first == '\'') + { + _c4dbgp("runk: scanning single-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + } + else + { + _c4dbgp("runk: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + else if(first == '"') + { + _c4dbgp("runk: scanning double-quoted scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + } + else + { + _c4dbgp("runk: start new block map, set double-quoted scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + else if(first == '|') + { + _c4dbgp("runk: scanning block-literal scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + ScannedBlock sb; + _scan_block(&sb, startindent); + if(C4_LIKELY(!_maybe_scan_following_colon())) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + } + else + { + _c4err("block literal keys must be enclosed in '?'"); + } + } + else if(first == '>') + { + _c4dbgp("runk: scanning block-folded scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + ScannedBlock sb; + _scan_block(&sb, startindent); + if(C4_LIKELY(!_maybe_scan_following_colon())) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + } + else + { + _c4err("block folded keys must be enclosed in '?'"); + } + } + else if(_scan_scalar_plain_unk(&sc)) + { + _c4dbgp("runk: got a plain scalar"); + m_evt_handler->check_trailing_doc_token(); + _maybe_begin_doc(); + add_flags(RDOC); + m_doc_empty = false; + if(!_maybe_scan_following_colon()) + { + _c4dbgp("runk: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + } + else + { + _c4dbgp("runk: start new block map, set scalar as key"); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC); + } + } + } +} + + +//----------------------------------------------------------------------------- + +template +C4_COLD void ParseEngine::_handle_usty() +{ + _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id); + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW)); + + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(has_any(RNXT)) + { + _c4dbgp("usty[RNXT]: finishing!"); + _end_stream(); + } + #endif + + _maybe_skip_comment(); + csubstr rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + + size_t pos = rem.first_not_of(" \t"); + if(pos) + { + pos = pos != npos ? pos : rem.len; + _c4dbgpf("skipping indentation of {}", pos); + _line_progressed(pos); + rem = m_evt_handler->m_curr->line_contents.rem; + if(!rem.len) + return; + _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem); + } + + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0); + size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save + char first = rem.str[0]; + if(has_any(RSEQ)) // destination is a sequence + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP)); + _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first)); + if(first == '[') + { + _c4dbgp("usty[RSEQ]: it's a flow seq. merging it"); + add_flags(RNXT); + m_evt_handler->_push(); + addrem_flags(FLOW|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '-' && _is_blck_token(rem)) + { + _c4dbgp("usty[RSEQ]: it's a block seq. merging it"); + add_flags(RNXT); + m_evt_handler->_push(); + addrem_flags(BLCK|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else + { + _c4err("can only parse a seq into an existing seq"); + } + } + else if(has_any(RMAP)) // destination is a map + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ)); + _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first)); + if(first == '{') + { + _c4dbgp("usty[RMAP]: it's a flow map. merging it"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("usty[RMAP]: it's a block map + this key is complex"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY); + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':' && _is_blck_token(rem)) + { + _c4dbgp("usty[RMAP]: it's a map with an empty key"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->_push(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(rem.begins_with('&')) + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set ref as key"); + const size_t startline = m_evt_handler->m_curr->pos.line; // save + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else if(first == '[' || (first == '-' && _is_blck_token(rem))) + { + _c4err("cannot parse a seq into an existing map"); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + ScannedScalar sc; + _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first)); + if(first == '\'') + { + _c4dbgp("usty[RMAP]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else if(first == '"') + { + _c4dbgp("usty[RMAP]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else if(first == '|') + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(first == '>') + { + _c4err("block literal keys must be enclosed in '?'"); + } + else if(_scan_scalar_plain_unk(&sc)) + { + _c4dbgp("usty[RMAP]: got a plain scalar"); + if(!_maybe_scan_following_colon()) + { + _c4err("cannot read a VAL to a map"); + } + else + { + _c4dbgp("usty[RMAP]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->_push(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else + { + _c4err("parse error"); + } + } + } + else // destination is unknown + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ)); + _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first)); + if(first == '[') + { + _c4dbgp("usty[UNK]: it's a flow seq"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_flow(); + addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '-' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a block seq"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_seq_val_block(); + addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '{') + { + _c4dbgp("usty[UNK]: it's a flow map"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_flow(); + addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY); + _set_indentation(startindent); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '?' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a map + this key is complex"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY); + m_was_inside_qmrk = true; + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == ':' && _is_blck_token(rem)) + { + _c4dbgp("usty[UNK]: it's a map with an empty key"); + add_flags(RNXT); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->begin_map_val_block(); + m_evt_handler->set_key_scalar_plain({}); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _save_indentation(); + _line_progressed(1); + _maybe_skip_whitespace_tokens(); + } + else if(first == '&') + { + csubstr anchor = _scan_anchor(); + _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor); + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_anchors, anchor, indentation, line); + _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem)); + } + else if(first == '*') + { + csubstr ref = _scan_ref_map(); + _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set val ref"); + _handle_annotations_before_blck_val_scalar(); + m_evt_handler->set_val_ref(ref); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set ref as key"); + const size_t startline = m_evt_handler->m_curr->pos.line; // save + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + m_evt_handler->set_key_ref(ref); + _maybe_skip_whitespace_tokens(); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + } + } + else if(first == '!') + { + csubstr tag = _scan_tag(); + _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag); + // we need to buffer the tags, as there may be two + // consecutive tags in here + const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem); + const size_t line = m_evt_handler->m_curr->pos.line; + _add_annotation(&m_pending_tags, tag, indentation, line); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL)); + startindent = m_evt_handler->m_curr->line_contents.indentation; // save + const size_t startline = m_evt_handler->m_curr->pos.line; // save + first = rem.str[0]; + ScannedScalar sc; + _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first)); + if(first == '\'') + { + _c4dbgp("usty[UNK]: scanning single-quoted scalar"); + sc = _scan_scalar_squot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); + m_evt_handler->set_val_scalar_squoted(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); + m_evt_handler->set_key_scalar_squoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else if(first == '"') + { + _c4dbgp("usty[UNK]: scanning double-quoted scalar"); + sc = _scan_scalar_dquot(); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); + m_evt_handler->set_val_scalar_dquoted(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); + m_evt_handler->set_key_scalar_dquoted(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else if(first == '|') + { + _c4dbgp("usty[UNK]: scanning block-literal scalar"); + ScannedBlock sb; + _scan_block(&sb, startindent); + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb); + m_evt_handler->set_val_scalar_literal(maybe_filtered); + _end_stream(); + } + else if(first == '>') + { + _c4dbgp("usty[UNK]: scanning block-folded scalar"); + ScannedBlock sb; + _scan_block(&sb, startindent); + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb); + m_evt_handler->set_val_scalar_folded(maybe_filtered); + _end_stream(); + } + else if(_scan_scalar_plain_unk(&sc)) + { + _c4dbgp("usty[UNK]: got a plain scalar"); + if(!_maybe_scan_following_colon()) + { + _c4dbgp("usty[UNK]: set as val"); + _handle_annotations_before_blck_val_scalar(); + csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent); + m_evt_handler->set_val_scalar_plain(maybe_filtered); + _end_stream(); + } + else + { + _c4dbgp("usty[UNK]: start new block map, set scalar as key"); + add_flags(RNXT); + _handle_annotations_before_start_mapblck(startline); + m_evt_handler->begin_map_val_block(); + _handle_annotations_and_indentation_after_start_mapblck(startindent, startline); + csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent); + m_evt_handler->set_key_scalar_plain(maybe_filtered); + _set_indentation(startindent); + addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY); + _maybe_skip_whitespace_tokens(); + } + } + else + { + _c4err("parse error"); + } + } + } +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::parse_json_in_place_ev(csubstr filename, substr src) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + m_file = filename; + m_buf = src; + _reset(); + m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this); + m_evt_handler->begin_stream(); + while( ! _finished_file()) + { + _scan_line(); + while( ! _finished_line()) + { + _c4dbgnextline(); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty()); + if(has_any(RSEQ)) + { + _handle_seq_json(); + } + else if(has_any(RMAP)) + { + _handle_map_json(); + } + else if(has_any(RUNK)) + { + _handle_unk_json(); + } + else + { + _c4err("internal error"); + } + } + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); + } + _end_stream(); + m_evt_handler->finish_parse(); +} + + +//----------------------------------------------------------------------------- + +template +void ParseEngine::parse_in_place_ev(csubstr filename, substr src) +{ + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1); + m_file = filename; + m_buf = src; + _reset(); + m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this); + m_evt_handler->begin_stream(); + while( ! _finished_file()) + { + _scan_line(); + while( ! _finished_line()) + { + _c4dbgnextline(); + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty()); + if(has_any(FLOW)) + { + if(has_none(RSEQIMAP)) + { + if(has_any(RSEQ)) + { + _handle_seq_flow(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _handle_map_flow(); + } + } + else + { + _handle_seq_imap(); + } + } + else if(has_any(BLCK)) + { + if(has_any(RSEQ)) + { + _handle_seq_block(); + } + else + { + _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP)); + _handle_map_block(); + } + } + else if(has_any(RUNK)) + { + _handle_unk(); + } + else if(has_any(USTY)) + { + _handle_usty(); + } + else + { + _c4err("internal error"); + } + } + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); + } + _end_stream(); + m_evt_handler->finish_parse(); +} + +} // namespace yml +} // namespace c4 + +#undef _c4dbgnextline + +#if defined(_MSC_VER) +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_ diff --git a/3rdparty/rapidyaml/include/c4/yml/parse_engine.hpp b/3rdparty/rapidyaml/include/c4/yml/parse_engine.hpp new file mode 100644 index 00000000000000..f63653ebcaa56c --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/parse_engine.hpp @@ -0,0 +1,778 @@ +#ifndef _C4_YML_PARSE_ENGINE_HPP_ +#define _C4_YML_PARSE_ENGINE_HPP_ + +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#include "c4/yml/detail/parser_dbg.hpp" +#endif + +#ifndef _C4_YML_PARSER_STATE_HPP_ +#include "c4/yml/parser_state.hpp" +#endif + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#endif + + +namespace c4 { +namespace yml { + +/** @addtogroup doc_parse + * @{ */ + +/** @defgroup doc_event_handlers Event Handlers + * + * @brief rapidyaml implements its parsing logic with a two-level + * model, where a @ref ParseEngine object reads through the YAML + * source, and dispatches events to an EventHandler bound to the @ref + * ParseEngine. Because @ref ParseEngine is templated on the event + * handler, the binding uses static polymorphism, without any virtual + * functions. The actual handler object can be changed at run time, + * (but of course needs to be the type of the template parameter). + * This is thus a very efficient architecture, and further enables the + * user to provide his own custom handler if he wishes to bypass the + * rapidyaml @ref Tree. + * + * There are two handlers implemented in this project: + * + * - @ref EventHandlerTree is the handler responsible for creating the + * ryml @ref Tree + * + * - @ref EventHandlerYamlStd is the handler responsible for emitting + * standardized [YAML test suite + * events](https://github.com/yaml/yaml-test-suite), used (only) in + * the CI of this project. + * + * + * ### Event model + * + * The event model used by the parse engine and event handlers follows + * very closely the event model in the [YAML test + * suite](https://github.com/yaml/yaml-test-suite). + * + * Consider for example this YAML, + * ```yaml + * {foo: bar,foo2: bar2} + * ``` + * which would produce these events in the test-suite parlance: + * ``` + * +STR + * +DOC + * +MAP {} + * =VAL :foo + * =VAL :bar + * =VAL :foo2 + * =VAL :bar2 + * -MAP + * -DOC + * -STR + * ``` + * + * For reference, the @ref ParseEngine object will produce this + * sequence of calls to its bound EventHandler: + * ```cpp + * handler.begin_stream(); + * handler.begin_doc(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("foo"); + * handler.set_val_scalar_plain("bar"); + * handler.add_sibling(); + * handler.set_key_scalar_plain("foo2"); + * handler.set_val_scalar_plain("bar2"); + * handler.end_map(); + * handler.end_doc(); + * handler.end_stream(); + * ``` + * + * For many other examples of all areas of YAML and how ryml's parse + * model corresponds to the YAML standard model, refer to the [unit + * tests for the parse + * engine](https://github.com/biojppm/rapidyaml/tree/master/test/test_parse_engine.cpp). + * + * + * ### Special events + * + * Most of the parsing events adopted by rapidyaml in its event model + * are fairly obvious, but there are two less-obvious events requiring + * some explanation. + * + * These events exist to make it easier to parse some special YAML + * cases. They are called by the parser when a just-handled + * value/container is actually the first key of a new map: + * + * - `actually_val_is_first_key_of_new_map_flow()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerYamlStd") + * - `actually_val_is_first_key_of_new_map_block()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerYamlStd") + * + * For example, consider an implicit map inside a seq: `[a: b, c: + * d]` which is parsed as `[{a: b}, {c: d}]`. The standard event + * sequence for this YAML would be the following: + * ```cpp + * handler.begin_seq_val_flow(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("a"); + * handler.set_val_scalar_plain("b"); + * handler.end_map(); + * handler.add_sibling(); + * handler.begin_map_val_flow(); + * handler.set_key_scalar_plain("c"); + * handler.set_val_scalar_plain("d"); + * handler.end_map(); + * handler.end_seq(); + * ``` + * The problem with this event sequence is that it forces the + * parser to delay setting the val scalar (in this case "a" and + * "c") until it knows whether the scalar is a key or a val. This + * would require the parser to store the scalar until this + * time. For instance, in the example above, the parser should + * delay setting "a" and "c", because they are in fact keys and + * not vals. Until then, the parser would have to store "a" and + * "c" in its internal state. The downside is that this complexity + * cost would apply even if there is no implicit map -- every val + * in a seq would have to be delayed until one of the + * disambiguating subsequent tokens `,-]:` is found. + * By calling this function, the parser can avoid this complexity, + * by preemptively setting the scalar as a val. Then a call to + * this function will create the map and rearrange the scalar as + * key. Now the cost applies only once: when a seqimap starts. So + * the following (easier and cheaper) event sequence below has the + * same effect as the event sequence above: + * ```cpp + * handler.begin_seq_val_flow(); + * handler.set_val_scalar_plain("notmap"); + * handler.set_val_scalar_plain("a"); // preemptively set "a" as val! + * handler.actually_as_new_map_key(); // create a map, move the "a" val as the key of the first child of the new map + * handler.set_val_scalar_plain("b"); // now "a" is a key and "b" the val + * handler.end_map(); + * handler.set_val_scalar_plain("c"); // "c" also as val! + * handler.actually_as_block_flow(); // likewise + * handler.set_val_scalar_plain("d"); // now "c" is a key and "b" the val + * handler.end_map(); + * handler.end_seq(); + * ``` + * This also applies to container keys (although ryml's tree + * cannot accomodate these): the parser can preemptively set a + * container as a val, and call this event to turn that container + * into a key. For example, consider this yaml: + * ```yaml + * [aa, bb]: [cc, dd] + * # ^ ^ ^ + * # | | | + * # (2) (1) (3) <- event sequence + * ``` + * The standard event sequence for this YAML would be the + * following: + * ```cpp + * handler.begin_map_val_block(); // (1) + * handler.begin_seq_key_flow(); // (2) + * handler.set_val_scalar_plain("aa"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("bb"); + * handler.end_seq(); + * handler.begin_seq_val_flow(); // (3) + * handler.set_val_scalar_plain("cc"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("dd"); + * handler.end_seq(); + * handler.end_map(); + * ``` + * The problem with the sequence above is that, reading from + * left-to-right, the parser can only detect the proper calls at + * (1) and (2) once it reaches (1) in the YAML source. So, the + * parser would have to buffer the entire event sequence starting + * from the beginning until it reaches (1). Using this function, + * the parser can do instead: + * ```cpp + * handler.begin_seq_val_flow(); // (2) -- preemptively as val! + * handler.set_val_scalar_plain("aa"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("bb"); + * handler.end_seq(); + * handler.actually_as_new_map_key(); // (1) -- adjust when finding that the prev val was actually a key. + * handler.begin_seq_val_flow(); // (3) -- go on as before + * handler.set_val_scalar_plain("cc"); + * handler.add_sibling(); + * handler.set_val_scalar_plain("dd"); + * handler.end_seq(); + * handler.end_map(); + * ``` + */ + +class Tree; +class NodeRef; +class ConstNodeRef; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** Options to give to the parser to control its behavior. */ +struct RYML_EXPORT ParserOptions +{ +private: + + typedef enum : uint32_t { + SCALAR_FILTERING = (1u << 0), + LOCATIONS = (1u << 1), + DEFAULTS = SCALAR_FILTERING, + } Flags_e; + + uint32_t flags = DEFAULTS; + +public: + + ParserOptions() = default; + +public: + + /** @name source location tracking */ + /** @{ */ + + /** enable/disable source location tracking */ + ParserOptions& locations(bool enabled) noexcept + { + if(enabled) + flags |= LOCATIONS; + else + flags &= ~LOCATIONS; + return *this; + } + /** query source location tracking status */ + C4_ALWAYS_INLINE bool locations() const noexcept { return (flags & LOCATIONS); } + + /** @} */ + +public: + + /** @name scalar filtering status (experimental; disable at your discretion) */ + /** @{ */ + + /** enable/disable scalar filtering while parsing */ + ParserOptions& scalar_filtering(bool enabled) noexcept + { + if(enabled) + flags |= SCALAR_FILTERING; + else + flags &= ~SCALAR_FILTERING; + return *this; + } + /** query scalar filtering status */ + C4_ALWAYS_INLINE bool scalar_filtering() const noexcept { return (flags & SCALAR_FILTERING); } + + /** @} */ +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** This is the main driver of parsing logic: it scans the YAML or + * JSON source for tokens, and emits the appropriate sequence of + * parsing events to its event handler. The parse engine itself has no + * special limitations, and *can* accomodate containers as keys; it is the + * event handler may introduce additional constraints. + * + * There are two implemented handlers (see @ref doc_event_handlers, + * which has important notes about the event model): + * + * - @ref EventHandlerTree is the handler responsible for creating the + * ryml @ref Tree + * + * - @ref EventHandlerYamlStd is the handler responsible for emitting + * standardized [YAML test suite + * events](https://github.com/yaml/yaml-test-suite), used (only) in + * the CI of this project. This is not part of the library and is + * not installed. + */ +template +class ParseEngine +{ +public: + + using handler_type = EventHandler; + +public: + + /** @name construction and assignment */ + /** @{ */ + + ParseEngine(EventHandler *evt_handler, ParserOptions opts={}); + ~ParseEngine(); + + ParseEngine(ParseEngine &&); + ParseEngine(ParseEngine const&); + ParseEngine& operator=(ParseEngine &&); + ParseEngine& operator=(ParseEngine const&); + + /** @} */ + +public: + + /** @name modifiers */ + /** @{ */ + + /** Reserve a certain capacity for the parsing stack. + * This should be larger than the expected depth of the parsed + * YAML tree. + * + * The parsing stack is the only (potential) heap memory used + * directly by the parser. + * + * If the requested capacity is below the default + * stack size of 16, the memory is used directly in the parser + * object; otherwise it will be allocated from the heap. + * + * @note this reserves memory only for the parser itself; all the + * allocations for the parsed tree will go through the tree's + * allocator (when different). + * + * @note for maximum efficiency, the tree and the arena can (and + * should) also be reserved. */ + void reserve_stack(id_type capacity) + { + m_evt_handler->m_stack.reserve(capacity); + } + + /** Reserve a certain capacity for the array used to track node + * locations in the source buffer. */ + void reserve_locations(size_t num_source_lines) + { + _resize_locations(num_source_lines); + } + + RYML_DEPRECATED("filter arena no longer needed") + void reserve_filter_arena(size_t) {} + + /** @} */ + +public: + + /** @name getters */ + /** @{ */ + + /** Get the options used to build this parser object. */ + ParserOptions const& options() const { return m_options; } + + /** Get the current callbacks in the parser. */ + Callbacks const& callbacks() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.m_callbacks; } + + /** Get the name of the latest file parsed by this object. */ + csubstr filename() const { return m_file; } + + /** Get the latest YAML buffer parsed by this object. */ + csubstr source() const { return m_buf; } + + id_type stack_capacity() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.capacity(); } + size_t locations_capacity() const { return m_newline_offsets_capacity; } + + RYML_DEPRECATED("filter arena no longer needed") + size_t filter_arena_capacity() const { return 0u; } + + /** @} */ + +public: + + /** @name parse methods */ + /** @{ */ + + /** parse YAML in place, emitting events to the current handler */ + void parse_in_place_ev(csubstr filename, substr src); + + /** parse JSON in place, emitting events to the current handler */ + void parse_json_in_place_ev(csubstr filename, substr src); + + /** @} */ + +public: + + /** @name deprecated parse methods + * @{ */ + + /** @cond dev */ + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place(csubstr filename, substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_place( substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, csubstr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if::type parse_in_arena( csubstr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, Tree *t, size_t node_id); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, Tree *t ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml, NodeRef node ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena(csubstr filename, substr yaml ); + template RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if::type parse_in_arena( substr yaml ); + /** @endcond */ + + /** @} */ + +public: + + /** @name locations */ + /** @{ */ + + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(Tree const& tree, id_type node_id) const; + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(ConstNodeRef node) const; + /** Get the string starting at a particular location, to the end + * of the parsed source buffer. */ + csubstr location_contents(Location const& loc) const; + /** Given a pointer to a buffer position, get the location. + * @param[in] val must be pointing to somewhere in the source + * buffer that was last parsed by this object. */ + Location val_location(const char *val) const; + + /** @} */ + +public: + + /** @name scalar filtering */ + /** @{*/ + + /** filter a plain scalar */ + FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation); + /** filter a plain scalar in place */ + FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation); + + /** filter a single-quoted scalar */ + FilterResult filter_scalar_squoted(csubstr scalar, substr dst); + /** filter a single-quoted scalar in place */ + FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap); + + /** filter a double-quoted scalar */ + FilterResult filter_scalar_dquoted(csubstr scalar, substr dst); + /** filter a double-quoted scalar in place */ + FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap); + + /** filter a block-literal scalar */ + FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp); + /** filter a block-literal scalar in place */ + FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp); + + /** filter a block-folded scalar */ + FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp); + /** filter a block-folded scalar in place */ + FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp); + + /** @} */ + +private: + + struct ScannedScalar + { + substr scalar; + bool needs_filter; + }; + + struct ScannedBlock + { + substr scalar; + size_t indentation; + BlockChomp_e chomp; + }; + + bool _is_doc_begin(csubstr s); + bool _is_doc_end(csubstr s); + + bool _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation); + bool _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc); + bool _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc); + bool _is_valid_start_scalar_plain_flow(csubstr s); + + ScannedScalar _scan_scalar_squot(); + ScannedScalar _scan_scalar_dquot(); + + void _scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref); + + csubstr _scan_anchor(); + csubstr _scan_ref_seq(); + csubstr _scan_ref_map(); + csubstr _scan_tag(); + +public: // exposed for testing + + /** @cond dev */ + csubstr _filter_scalar_plain(substr s, size_t indentation); + csubstr _filter_scalar_squot(substr s); + csubstr _filter_scalar_dquot(substr s); + csubstr _filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp); + csubstr _filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp); + + csubstr _maybe_filter_key_scalar_plain(ScannedScalar const& sc, size_t indendation); + csubstr _maybe_filter_val_scalar_plain(ScannedScalar const& sc, size_t indendation); + csubstr _maybe_filter_key_scalar_squot(ScannedScalar const& sc); + csubstr _maybe_filter_val_scalar_squot(ScannedScalar const& sc); + csubstr _maybe_filter_key_scalar_dquot(ScannedScalar const& sc); + csubstr _maybe_filter_val_scalar_dquot(ScannedScalar const& sc); + csubstr _maybe_filter_key_scalar_literal(ScannedBlock const& sb); + csubstr _maybe_filter_val_scalar_literal(ScannedBlock const& sb); + csubstr _maybe_filter_key_scalar_folded(ScannedBlock const& sb); + csubstr _maybe_filter_val_scalar_folded(ScannedBlock const& sb); + /** @endcond */ + +private: + + void _handle_map_block(); + void _handle_seq_block(); + void _handle_map_flow(); + void _handle_seq_flow(); + void _handle_seq_imap(); + void _handle_map_json(); + void _handle_seq_json(); + + void _handle_unk(); + void _handle_unk_json(); + void _handle_usty(); + + void _handle_flow_skip_whitespace(); + + void _end_map_blck(); + void _end_seq_blck(); + void _end2_map(); + void _end2_seq(); + + void _begin2_doc(); + void _begin2_doc_expl(); + void _end2_doc(); + void _end2_doc_expl(); + + void _maybe_begin_doc(); + void _maybe_end_doc(); + + void _start_doc_suddenly(); + void _end_doc_suddenly(); + void _end_doc_suddenly__pop(); + void _end_stream(); + + void _set_indentation(size_t indentation); + void _save_indentation(); + void _handle_indentation_pop_from_block_seq(); + void _handle_indentation_pop_from_block_map(); + void _handle_indentation_pop(ParserState const* dst); + + void _maybe_skip_comment(); + void _skip_comment(); + void _maybe_skip_whitespace_tokens(); + void _maybe_skipchars(char c); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + void _maybe_skipchars_up_to(char c, size_t max_to_skip); + #endif + template + void _skipchars(const char (&chars)[N]); + bool _maybe_scan_following_colon() noexcept; + bool _maybe_scan_following_comma() noexcept; + +public: + + /** @cond dev */ + template auto _filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result()); + template auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()); + template auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result()); + template auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()); + template auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result()); + /** @endcond */ + +public: + + /** @cond dev */ + template void _filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation); + template void _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc); + template void _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc); + + template bool _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc); + template void _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc); + template void _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc); + + template void _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc); + + template void _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation); + template size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp); + template size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len); + template void _filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation); + template void _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len); + template size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl); + template void _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len); + template void _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept; + + /** @endcond */ + +private: + + void _line_progressed(size_t ahead); + void _line_ended(); + void _line_ended_undo(); + + bool _finished_file() const; + bool _finished_line() const; + + void _scan_line(); + substr _peek_next_line(size_t pos=npos) const; + + inline bool _at_line_begin() const + { + return m_evt_handler->m_curr->line_contents.rem.begin() == m_evt_handler->m_curr->line_contents.full.begin(); + } + + void _relocate_arena(csubstr prev_arena, substr next_arena); + static void _s_relocate_arena(void*, csubstr prev_arena, substr next_arena); + +private: + + C4_ALWAYS_INLINE bool has_all(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == f; } + C4_ALWAYS_INLINE bool has_any(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) != 0; } + C4_ALWAYS_INLINE bool has_none(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == 0; } + static C4_ALWAYS_INLINE bool has_all(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == f; } + static C4_ALWAYS_INLINE bool has_any(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) != 0; } + static C4_ALWAYS_INLINE bool has_none(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == 0; } + + #ifndef RYML_DBG + C4_ALWAYS_INLINE static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s) noexcept { s->flags |= on; } + C4_ALWAYS_INLINE static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; s->flags |= on; } + C4_ALWAYS_INLINE static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; } + C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; } + C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; } + C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; } + #else + static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s); + static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s); + static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s); + C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { add_flags(on, m_evt_handler->m_curr); } + C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { addrem_flags(on, off, m_evt_handler->m_curr); } + C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { rem_flags(off, m_evt_handler->m_curr); } + #endif + +private: + + void _prepare_locations(); + void _resize_locations(size_t sz); + bool _locations_dirty() const; + + bool _location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const; + bool _location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const; + +private: + + void _reset(); + void _free(); + void _clr(); + + #ifdef RYML_DBG + template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; + #endif + template void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; + template void _errloc(csubstr fmt, Location const& loc, Args const& C4_RESTRICT ...args) const; + + template void _fmt_msg(DumpFn &&dumpfn) const; + +private: + + /** store pending tag or anchor/ref annotations */ + struct Annotation + { + struct Entry + { + csubstr str; + size_t indentation; + size_t line; + }; + Entry annotations[2]; + size_t num_entries; + }; + + void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line); + void _clear_annotations(Annotation *C4_RESTRICT dst); + bool _has_pending_annotations() const { return m_pending_tags.num_entries || m_pending_anchors.num_entries; } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + bool _handle_indentation_from_annotations(); + #endif + bool _annotations_require_key_container() const; + void _handle_annotations_before_blck_key_scalar(); + void _handle_annotations_before_blck_val_scalar(); + void _handle_annotations_before_start_mapblck(size_t current_line); + void _handle_annotations_before_start_mapblck_as_key(); + void _handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line); + size_t _select_indentation_from_annotations(size_t val_indentation, size_t val_line); + void _handle_directive(csubstr rem); + + void _check_tag(csubstr tag); + +private: + + ParserOptions m_options; + + csubstr m_file; + substr m_buf; + +public: + + /** @cond dev */ + EventHandler *C4_RESTRICT m_evt_handler; + /** @endcond */ + +private: + + Annotation m_pending_anchors; + Annotation m_pending_tags; + + bool m_was_inside_qmrk; + bool m_doc_empty = true; + +private: + + size_t *m_newline_offsets; + size_t m_newline_offsets_size; + size_t m_newline_offsets_capacity; + csubstr m_newline_offsets_buf; + +}; + +/** @cond dev */ +RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept; +/** @endcond */ + + +/** Quickly inspect the source to estimate the number of nodes the + * resulting tree is likely have. If a tree is empty before + * parsing, considerable time will be spent growing it, so calling + * this to reserve the tree size prior to parsing is likely to + * result in a time gain. We encourage using this method before + * parsing, but as always measure its impact in performance to + * obtain a good trade-off. + * + * @note since this method is meant for optimizing performance, it + * is approximate. The result may be actually smaller than the + * resulting number of nodes, notably if the YAML uses implicit + * maps as flow seq members as in `[these: are, individual: + * maps]`. */ +RYML_EXPORT id_type estimate_tree_capacity(csubstr src); + +/** @} */ + +} // namespace yml +} // namespace c4 + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* _C4_YML_PARSE_ENGINE_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/parser_state.hpp b/3rdparty/rapidyaml/include/c4/yml/parser_state.hpp new file mode 100644 index 00000000000000..1977cbf57fd8e7 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/parser_state.hpp @@ -0,0 +1,205 @@ +#ifndef _C4_YML_PARSER_STATE_HPP_ +#define _C4_YML_PARSER_STATE_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "c4/yml/common.hpp" +#endif + +namespace c4 { +namespace yml { + +/** data type for @ref ParserState_e */ +using ParserFlag_t = int; + +/** Enumeration of the state flags for the parser */ +typedef enum : ParserFlag_t { + RTOP = 0x01 << 0, ///< reading at top level + RUNK = 0x01 << 1, ///< reading unknown state (when starting): must determine whether scalar, map or seq + RMAP = 0x01 << 2, ///< reading a map + RSEQ = 0x01 << 3, ///< reading a seq + FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} + BLCK = 0x01 << 5, ///< reading in block mode + QMRK = 0x01 << 6, ///< reading an explicit key (`? key`) + RKEY = 0x01 << 7, ///< reading a scalar as key + RVAL = 0x01 << 9, ///< reading a scalar as val + RKCL = 0x01 << 8, ///< reading the key colon (ie the : after the key in the map) + RNXT = 0x01 << 10, ///< read next val or keyval + SSCL = 0x01 << 11, ///< there's a stored scalar + QSCL = 0x01 << 12, ///< stored scalar was quoted + RSET = 0x01 << 13, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html + RDOC = 0x01 << 14, ///< reading a document + NDOC = 0x01 << 15, ///< no document mode. a document has ended and another has not started yet. + USTY = 0x01 << 16, ///< reading in unknown style mode - must determine FLOW or BLCK + //! reading an implicit map nested in an explicit seq. + //! eg, {key: [key2: value2, key3: value3]} + //! is parsed as {key: [{key2: value2}, {key3: value3}]} + RSEQIMAP = 0x01 << 17, +} ParserState_e; + +#ifdef RYML_DBG +/** @cond dev */ +namespace detail { +csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags); +} // namespace +/** @endcond */ +#endif + + +/** Helper to control the line contents while parsing a buffer */ +struct LineContents +{ + substr rem; ///< the stripped line remainder; initially starts at the first non-space character + size_t indentation; ///< the number of spaces on the beginning of the line + substr full; ///< the full line, including newlines on the right + substr stripped; ///< the stripped line, excluding newlines on the right + + LineContents() = default; + + void reset_with_next_line(substr buf, size_t offset) + { + RYML_ASSERT(offset <= buf.len); + size_t e = offset; + // get the current line stripped of newline chars + while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r')) + ++e; + RYML_ASSERT(e >= offset); + const substr stripped_ = buf.range(offset, e); + // advance pos to include the first line ending + if(e < buf.len && buf.str[e] == '\r') + ++e; + if(e < buf.len && buf.str[e] == '\n') + ++e; + const substr full_ = buf.range(offset, e); + reset(full_, stripped_); + } + + void reset(substr full_, substr stripped_) + { + rem = stripped_; + indentation = stripped_.first_not_of(' '); // find the first column where the character is not a space + full = full_; + stripped = stripped_; + } + + C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT + { + // WARNING: gcc x86 release builds were wrong (eg returning 0 + // when the result should be 4 ) when this function was like + // this: + // + //return current_col(rem); + // + // (see below for the full definition of the called overload + // of current_col()) + // + // ... so we explicitly inline the code in here: + RYML_ASSERT(rem.str >= full.str); + size_t col = static_cast(rem.str - full.str); + return col; + // + // this was happening only on builds specifically with (gcc + // AND x86 AND release); no other builds were having the + // problem: not in debug, not in x64, not in other + // architectures, not in clang, not in visual studio. WTF!? + // + // Enabling debug prints with RYML_DBG made the problem go + // away, so these could not be used to debug the + // problem. Adding prints inside the called current_col() also + // made the problem go away! WTF!??? + // + // a prize will be offered to anybody able to explain why this + // was happening. + } + + C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT + { + RYML_ASSERT(s.str >= full.str); + RYML_ASSERT(full.is_super(s)); + size_t col = static_cast(s.str - full.str); + return col; + } +}; +static_assert(std::is_standard_layout::value, "LineContents not standard"); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +struct ParserState +{ + LineContents line_contents; + Location pos; + ParserFlag_t flags; + size_t indref; ///< the reference indentation in the current block scope + id_type level; + id_type node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes + size_t scalar_col; // the column where the scalar (or its quotes) begin + bool more_indented; + bool has_children; + + ParserState() = default; + + void start_parse(const char *file, id_type node_id_) + { + level = 0; + pos.name = to_csubstr(file); + pos.offset = 0; + pos.line = 1; + pos.col = 1; + node_id = node_id_; + more_indented = false; + scalar_col = 0; + indref = 0; + has_children = false; + } + + void reset_after_push() + { + node_id = NONE; + indref = npos; + more_indented = false; + ++level; + has_children = false; + } + + C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop) + { + pos = to_pop.pos; + line_contents = to_pop.line_contents; + } + +public: + + C4_ALWAYS_INLINE bool at_line_beginning() const noexcept + { + return line_contents.rem.str == line_contents.full.str; + } + C4_ALWAYS_INLINE bool indentation_eq() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation == indref; + } + C4_ALWAYS_INLINE bool indentation_ge() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation >= indref; + } + C4_ALWAYS_INLINE bool indentation_gt() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation > indref; + } + C4_ALWAYS_INLINE bool indentation_lt() const noexcept + { + RYML_ASSERT(indref != npos); + return line_contents.indentation != npos && line_contents.indentation < indref; + } +}; +static_assert(std::is_standard_layout::value, "ParserState not standard"); + + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PARSER_STATE_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/reference_resolver.hpp b/3rdparty/rapidyaml/include/c4/yml/reference_resolver.hpp new file mode 100644 index 00000000000000..e9832ab6104867 --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/reference_resolver.hpp @@ -0,0 +1,74 @@ +#ifndef _C4_YML_REFERENCE_RESOLVER_HPP_ +#define _C4_YML_REFERENCE_RESOLVER_HPP_ + +#include "c4/yml/tree.hpp" +#include "c4/yml/detail/stack.hpp" + + +namespace c4 { +namespace yml { + +/** @addtogroup doc_ref_utils + * @{ + */ + +/** Reusable object to resolve references/aliases in the tree. */ +struct RYML_EXPORT ReferenceResolver +{ + ReferenceResolver() = default; + + /** Resolve references: for each reference, look for a matching + * anchor, and copy its contents to the ref node. + * + * This method first does a full traversal of the tree to gather + * all anchors and references in a separate collection, then it + * goes through that collection to locate the names, which it does + * by obeying the YAML standard diktat that "an alias node refers + * to the most recent node in the serialization having the + * specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear + * complexity (from the initial traversal). + * + * @todo verify sanity against anchor-ref attacks (https://en.wikipedia.org/wiki/Billion_laughs_attack ) + */ + void resolve(Tree *t_); + +public: + + /** @cond dev */ + + struct RefData + { + NodeType type; + id_type node; + id_type prev_anchor; + id_type target; + id_type parent_ref; + id_type parent_ref_sibling; + }; + + void reset_(Tree *t_); + void gather_anchors_and_refs_(); + void gather_anchors_and_refs__(id_type n); + id_type count_anchors_and_refs_(id_type n); + + id_type lookup_(RefData *C4_RESTRICT ra); + +public: + + Tree *C4_RESTRICT m_tree; + /** We're using this stack purely as an array. */ + detail::stack m_refs; + + /** @endcond */ +}; + +/** @} */ + +} // namespace ryml +} // namespace c4 + + +#endif // _C4_YML_REFERENCE_RESOLVER_HPP_ diff --git a/3rdparty/rapidyaml/include/c4/yml/std/vector.hpp b/3rdparty/rapidyaml/include/c4/yml/std/vector.hpp index 49963ee93064e2..44dedf02aa7167 100644 --- a/3rdparty/rapidyaml/include/c4/yml/std/vector.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/std/vector.hpp @@ -17,17 +17,19 @@ template void write(c4::yml::NodeRef *n, std::vector const& vec) { *n |= c4::yml::SEQ; - for(auto const& v : vec) + for(V const& v : vec) n->append_child() << v; } template bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) { - vec->resize(n.num_children()); + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") + vec->resize(static_cast(n.num_children())); + C4_SUPPRESS_WARNING_GCC_POP size_t pos = 0; - for(auto const ch : n) - ch >> (*vec)[pos++]; + for(ConstNodeRef const child : n) + child >> (*vec)[pos++]; return true; } @@ -36,12 +38,14 @@ bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) template bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) { - vec->resize(n.num_children()); + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast") + vec->resize(static_cast(n.num_children())); + C4_SUPPRESS_WARNING_GCC_POP size_t pos = 0; - bool tmp = false; - for(auto const ch : n) + bool tmp = {}; + for(ConstNodeRef const child : n) { - ch >> tmp; + child >> tmp; (*vec)[pos++] = tmp; } return true; diff --git a/3rdparty/rapidyaml/include/c4/yml/tag.hpp b/3rdparty/rapidyaml/include/c4/yml/tag.hpp new file mode 100644 index 00000000000000..bbda59d7ba0f0c --- /dev/null +++ b/3rdparty/rapidyaml/include/c4/yml/tag.hpp @@ -0,0 +1,84 @@ +#ifndef _C4_YML_TAG_HPP_ +#define _C4_YML_TAG_HPP_ + +#include + +namespace c4 { +namespace yml { + +class Tree; + +/** @addtogroup doc_tag_utils + * + * @{ + */ + + +#ifndef RYML_MAX_TAG_DIRECTIVES +/** the maximum number of tag directives in a Tree */ +#define RYML_MAX_TAG_DIRECTIVES 4 +#endif + +/** the integral type necessary to cover all the bits marking node tags */ +using tag_bits = uint16_t; + +/** a bit mask for marking tags for types */ +typedef enum : tag_bits { + TAG_NONE = 0, + // container types + TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ + TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ + TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ + TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ + TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ + // scalar types + TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ + TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ + TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ + TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ + TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ + TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ + TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ + TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ + TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ + TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ +} YamlTag_e; + +RYML_EXPORT YamlTag_e to_tag(csubstr tag); +RYML_EXPORT csubstr from_tag(YamlTag_e tag); +RYML_EXPORT csubstr from_tag_long(YamlTag_e tag); +RYML_EXPORT csubstr normalize_tag(csubstr tag); +RYML_EXPORT csubstr normalize_tag_long(csubstr tag); +RYML_EXPORT csubstr normalize_tag_long(csubstr tag, substr output); + +RYML_EXPORT bool is_custom_tag(csubstr tag); + + +struct RYML_EXPORT TagDirective +{ + /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr handle; + /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr prefix; + /** The next node to which this tag directive applies */ + id_type next_node_id; + + bool create_from_str(csubstr directive_); ///< leaves next_node_id unfilled + bool create_from_str(csubstr directive_, Tree *tree); + size_t transform(csubstr tag, substr output, Callbacks const& callbacks) const; +}; + +struct RYML_EXPORT TagDirectiveRange +{ + TagDirective const* C4_RESTRICT b; + TagDirective const* C4_RESTRICT e; + C4_ALWAYS_INLINE TagDirective const* begin() const noexcept { return b; } + C4_ALWAYS_INLINE TagDirective const* end() const noexcept { return e; } +}; + +/** @} */ + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_TAG_HPP_ */ diff --git a/3rdparty/rapidyaml/include/c4/yml/tree.hpp b/3rdparty/rapidyaml/include/c4/yml/tree.hpp index 25a6a18842dfa0..49b1ec7c27814d 100644 --- a/3rdparty/rapidyaml/include/c4/yml/tree.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/tree.hpp @@ -5,11 +5,22 @@ #include "c4/error.hpp" #include "c4/types.hpp" +#ifndef _C4_YML_FWD_HPP_ +#include "c4/yml/fwd.hpp" +#endif #ifndef _C4_YML_COMMON_HPP_ #include "c4/yml/common.hpp" #endif - +#ifndef C4_YML_NODE_TYPE_HPP_ +#include "c4/yml/node_type.hpp" +#endif +#ifndef _C4_YML_TAG_HPP_ +#include "c4/yml/tag.hpp" +#endif +#ifndef _C4_CHARCONV_HPP_ #include +#endif + #include #include @@ -19,20 +30,13 @@ C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clie C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' C4_SUPPRESS_WARNING_GCC_CLANG_PUSH C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") C4_SUPPRESS_WARNING_GCC("-Wtype-limits") namespace c4 { namespace yml { -struct NodeScalar; -struct NodeInit; -struct NodeData; -class NodeRef; -class ConstNodeRef; -class Tree; - - /** encode a floating point value to a string. */ template size_t to_chars_float(substr buf, T val) @@ -60,6 +64,10 @@ bool from_chars_float(csubstr buf, T *C4_RESTRICT val) { return true; } + else if(C4_UNLIKELY(buf.begins_with('+'))) + { + return from_chars(buf.sub(1), val); + } else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) { *val = std::numeric_limits::quiet_NaN(); @@ -87,240 +95,6 @@ bool from_chars_float(csubstr buf, T *C4_RESTRICT val) //----------------------------------------------------------------------------- -/** @addtogroup doc_tag_utils - * - * @{ - */ - -/** the integral type necessary to cover all the bits marking node tags */ -using tag_bits = uint16_t; - -/** a bit mask for marking tags for types */ -typedef enum : tag_bits { - // container types - TAG_NONE = 0, /**< no tag is set */ - TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ - TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ - TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ - TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ - TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ - // scalar types - TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ - TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ - TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ - TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ - TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ - TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ - TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ - TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ - TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ - TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ -} YamlTag_e; - -YamlTag_e to_tag(csubstr tag); -csubstr from_tag(YamlTag_e tag); -csubstr from_tag_long(YamlTag_e tag); -csubstr normalize_tag(csubstr tag); -csubstr normalize_tag_long(csubstr tag); - -struct TagDirective -{ - /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ - csubstr handle; - /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ - csubstr prefix; - /** The next node to which this tag directive applies */ - size_t next_node_id; -}; - -#ifndef RYML_MAX_TAG_DIRECTIVES -/** the maximum number of tag directives in a Tree */ -#define RYML_MAX_TAG_DIRECTIVES 4 -#endif - -/** @} */ - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - - -/** @addtogroup doc_node_type - * - * @{ - */ - - -/** the integral type necessary to cover all the bits marking node types */ -using type_bits = uint64_t; - - -/** a bit mask for marking node types. See NodeType */ -typedef enum : type_bits { - // a convenience define, undefined below - #define c4bit(v) (type_bits(1) << v) - NOTYPE = 0, ///< no node type is set - VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value - KEY = c4bit(1), ///< is member of a map, must have non-empty key - MAP = c4bit(2), ///< a map: a parent of keyvals - SEQ = c4bit(3), ///< a seq: a parent of vals - DOC = c4bit(4), ///< a document - STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs - KEYREF = c4bit(6), ///< a *reference: the key references an &anchor - VALREF = c4bit(7), ///< a *reference: the val references an &anchor - KEYANCH = c4bit(8), ///< the key has an &anchor - VALANCH = c4bit(9), ///< the val has an &anchor - KEYTAG = c4bit(10), ///< the key has an explicit tag/type - VALTAG = c4bit(11), ///< the val has an explicit tag/type - _TYMASK = c4bit(12)-1, // all the bits up to here - VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | - KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | - KEYVAL = KEY|VAL, - KEYSEQ = KEY|SEQ, - KEYMAP = KEY|MAP, - DOCMAP = DOC|MAP, - DOCSEQ = DOC|SEQ, - DOCVAL = DOC|VAL, - _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, - _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, - // these flags are from a work in progress and should be used with care - _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as `[val1,val2]`, maps as `{key: val, key2: val2}`) - _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as `[val1,\nval2]`, maps as `{key: val,\nkey2: val2}`) - _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as `- val\n`, maps as `key: val`) - _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | - _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | - _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > - _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > - _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted - _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted - _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted - _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted - _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) - _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) - _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, - _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, - _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents - _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents - _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents - _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents - _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents - _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents - #undef c4bit -} NodeType_e; - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -/** wraps a NodeType_e element with some syntactic sugar and predicates */ -struct NodeType -{ -public: - - NodeType_e type; - -public: - - C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} - C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} - C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} - - C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } - static const char* type_str(NodeType_e t); - - C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } - C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } - - C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } - C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } - - C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } - C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } - - C4_ALWAYS_INLINE void clear() { type = NOTYPE; } - -public: - - C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } - C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } - - C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } - C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } - -public: - - #if defined(__clang__) - # pragma clang diagnostic push - # pragma clang diagnostic ignored "-Wnull-dereference" - #elif defined(__GNUC__) - # pragma GCC diagnostic push - # if __GNUC__ >= 6 - # pragma GCC diagnostic ignored "-Wnull-dereference" - # endif - #endif - - C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } - C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } - C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } - C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } - C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } - C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } - C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } - C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } - C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; } - C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } - C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } - C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); } - C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } - C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } - C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } - C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } - C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } - C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } - C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } - C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } - C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } - C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } - C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } - C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } - C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } - - // these predicates are a work in progress and subject to change. Don't use yet. - C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } - C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } - C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } - C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } - C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } - C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } - C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } - C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } - C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } - C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } - C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } - C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } - C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } - C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } - C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } - - #if defined(__clang__) - # pragma clang diagnostic pop - #elif defined(__GNUC__) - # pragma GCC diagnostic pop - #endif - -}; - - -/** @} */ - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - - /** @addtogroup doc_tree * * @{ @@ -393,12 +167,14 @@ struct NodeInit NodeInit(NodeType_e t) : type(t), key(), val() {} /// initialize as a sequence member NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } + /// initialize as a sequence member with explicit type + NodeInit(NodeScalar const& v, NodeType_e t) : type(t|VAL), key(), val(v) { _add_flags(); } /// initialize as a mapping member - NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k), val(v) { _add_flags(); } /// initialize as a mapping member with explicit type - NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } - /// initialize as a mapping member with explicit type (eg SEQ or MAP) - NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } + NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t), key(k), val(v) { _add_flags(); } + /// initialize as a mapping member with explicit type (eg for SEQ or MAP) + NodeInit(NodeType_e t, NodeScalar const& k ) : type(t), key(k), val( ) { _add_flags(KEY); } public: @@ -449,11 +225,11 @@ struct NodeData NodeScalar m_key; NodeScalar m_val; - size_t m_parent; - size_t m_first_child; - size_t m_last_child; - size_t m_next_sibling; - size_t m_prev_sibling; + id_type m_parent; + id_type m_first_child; + id_type m_last_child; + id_type m_next_sibling; + id_type m_prev_sibling; }; C4_MUST_BE_TRIVIAL_COPY(NodeData); @@ -471,16 +247,16 @@ class RYML_EXPORT Tree Tree() : Tree(get_callbacks()) {} Tree(Callbacks const& cb); - Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} - Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); + Tree(id_type node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} + Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb); ~Tree(); Tree(Tree const& that); - Tree(Tree && that); + Tree(Tree && that) noexcept; Tree& operator= (Tree const& that); - Tree& operator= (Tree && that); + Tree& operator= (Tree && that) RYML_NOEXCEPT; /** @} */ @@ -489,7 +265,7 @@ class RYML_EXPORT Tree /** @name memory and sizing */ /** @{ */ - void reserve(size_t node_capacity); + void reserve(id_type node_capacity); /** clear the tree and zero every node * @note does NOT clear the arena @@ -499,9 +275,9 @@ class RYML_EXPORT Tree inline bool empty() const { return m_size == 0; } - inline size_t size() const { return m_size; } - inline size_t capacity() const { return m_cap; } - inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } + inline id_type size() const { return m_size; } + inline id_type capacity() const { return m_cap; } + inline id_type slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } Callbacks const& callbacks() const { return m_callbacks; } void callbacks(Callbacks const& cb) { m_callbacks = cb; } @@ -515,17 +291,17 @@ class RYML_EXPORT Tree //! get the index of a node belonging to this tree. //! @p n can be nullptr, in which case NONE is returned - size_t id(NodeData const* n) const + id_type id(NodeData const* n) const { if( ! n) return NONE; _RYML_CB_ASSERT(m_callbacks, n >= m_buf && n < m_buf + m_cap); - return static_cast(n - m_buf); + return static_cast(n - m_buf); } //! get a pointer to a node's NodeData. //! i can be NONE, in which case a nullptr is returned - inline NodeData *get(size_t node) + inline NodeData *get(id_type node) { if(node == NONE) return nullptr; @@ -534,7 +310,7 @@ class RYML_EXPORT Tree } //! get a pointer to a node's NodeData. //! i can be NONE, in which case a nullptr is returned. - inline NodeData const *get(size_t node) const + inline NodeData const *get(id_type node) const { if(node == NONE) return nullptr; @@ -544,22 +320,22 @@ class RYML_EXPORT Tree //! An if-less form of get() that demands a valid node index. //! This function is implementation only; use at your own risk. - inline NodeData * _p(size_t node) { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } + inline NodeData * _p(id_type node) { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } //! An if-less form of get() that demands a valid node index. //! This function is implementation only; use at your own risk. - inline NodeData const * _p(size_t node) const { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } + inline NodeData const * _p(id_type node) const { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; } //! Get the id of the root node - size_t root_id() { if(m_cap == 0) { reserve(16); } _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } + id_type root_id() { if(m_cap == 0) { reserve(16); } _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } //! Get the id of the root node - size_t root_id() const { _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } + id_type root_id() const { _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; } //! Get a NodeRef of a node by id - NodeRef ref(size_t node); + NodeRef ref(id_type node); //! Get a NodeRef of a node by id - ConstNodeRef ref(size_t node) const; + ConstNodeRef ref(id_type node) const; //! Get a NodeRef of a node by id - ConstNodeRef cref(size_t node) const; + ConstNodeRef cref(id_type node) const; //! Get the root as a NodeRef NodeRef rootref(); @@ -570,10 +346,13 @@ class RYML_EXPORT Tree //! get the i-th document of the stream //! @note @p i is NOT the node id, but the doc position within the stream - NodeRef docref(size_t i); + NodeRef docref(id_type i); + //! get the i-th document of the stream + //! @note @p i is NOT the node id, but the doc position within the stream + ConstNodeRef docref(id_type i) const; //! get the i-th document of the stream //! @note @p i is NOT the node id, but the doc position within the stream - ConstNodeRef docref(size_t i) const; + ConstNodeRef cdocref(id_type i) const; //! find a root child by name, return it as a NodeRef //! @note requires the root to be a map. @@ -584,10 +363,10 @@ class RYML_EXPORT Tree //! find a root child by index: return the root node's @p i-th child as a NodeRef //! @note @p i is NOT the node id, but the child's position - NodeRef operator[] (size_t i); + NodeRef operator[] (id_type i); //! find a root child by index: return the root node's @p i-th child as a NodeRef //! @note @p i is NOT the node id, but the child's position - ConstNodeRef operator[] (size_t i) const; + ConstNodeRef operator[] (id_type i) const; /** @} */ @@ -596,73 +375,76 @@ class RYML_EXPORT Tree /** @name node property getters */ /** @{ */ - NodeType type(size_t node) const { return _p(node)->m_type; } - const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } + NodeType type(id_type node) const { return _p(node)->m_type; } + const char* type_str(id_type node) const { return NodeType::type_str(_p(node)->m_type); } - csubstr const& key (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key.scalar; } - csubstr const& key_tag (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_key_tag(node)); return _p(node)->m_key.tag; } - csubstr const& key_ref (size_t node) const { _RYML_CB_ASSERT(m_callbacks, is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } - csubstr const& key_anchor(size_t node) const { _RYML_CB_ASSERT(m_callbacks, ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } - NodeScalar const& keysc (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key; } + csubstr const& key (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key.scalar; } + csubstr const& key_tag (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_tag(node)); return _p(node)->m_key.tag; } + csubstr const& key_ref (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_key_ref(node)); return _p(node)->m_key.anchor; } + csubstr const& key_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_anchor(node)); return _p(node)->m_key.anchor; } + NodeScalar const& keysc (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key; } - csubstr const& val (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val.scalar; } - csubstr const& val_tag (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_val_tag(node)); return _p(node)->m_val.tag; } - csubstr const& val_ref (size_t node) const { _RYML_CB_ASSERT(m_callbacks, is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } - csubstr const& val_anchor(size_t node) const { _RYML_CB_ASSERT(m_callbacks, ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } - NodeScalar const& valsc (size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val; } + csubstr const& val (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val.scalar; } + csubstr const& val_tag (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_tag(node)); return _p(node)->m_val.tag; } + csubstr const& val_ref (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_val_ref(node)); return _p(node)->m_val.anchor; } + csubstr const& val_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_anchor(node)); return _p(node)->m_val.anchor; } + NodeScalar const& valsc (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val; } /** @} */ public: - /** @name node predicates */ + /** @name node type predicates */ /** @{ */ - C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } - C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } - C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } - C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } - C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } - C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } - C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } - C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } - C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } - C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } - C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } - C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } - C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } - C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } - C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } - C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } - C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } - C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } - C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } - C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } - C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } - C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } - C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } - C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } - - C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_seq(_p(node)->m_parent); } - C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_map(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool type_has_any(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_any(bits); } + C4_ALWAYS_INLINE bool type_has_all(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_all(bits); } + C4_ALWAYS_INLINE bool type_has_none(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_none(bits); } + + C4_ALWAYS_INLINE bool is_stream(id_type node) const { return _p(node)->m_type.is_stream(); } + C4_ALWAYS_INLINE bool is_doc(id_type node) const { return _p(node)->m_type.is_doc(); } + C4_ALWAYS_INLINE bool is_container(id_type node) const { return _p(node)->m_type.is_container(); } + C4_ALWAYS_INLINE bool is_map(id_type node) const { return _p(node)->m_type.is_map(); } + C4_ALWAYS_INLINE bool is_seq(id_type node) const { return _p(node)->m_type.is_seq(); } + C4_ALWAYS_INLINE bool has_key(id_type node) const { return _p(node)->m_type.has_key(); } + C4_ALWAYS_INLINE bool has_val(id_type node) const { return _p(node)->m_type.has_val(); } + C4_ALWAYS_INLINE bool is_val(id_type node) const { return _p(node)->m_type.is_val(); } + C4_ALWAYS_INLINE bool is_keyval(id_type node) const { return _p(node)->m_type.is_keyval(); } + C4_ALWAYS_INLINE bool has_key_tag(id_type node) const { return _p(node)->m_type.has_key_tag(); } + C4_ALWAYS_INLINE bool has_val_tag(id_type node) const { return _p(node)->m_type.has_val_tag(); } + C4_ALWAYS_INLINE bool has_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); } + C4_ALWAYS_INLINE bool has_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); } + C4_ALWAYS_INLINE bool has_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); } + C4_ALWAYS_INLINE bool is_key_ref(id_type node) const { return _p(node)->m_type.is_key_ref(); } + C4_ALWAYS_INLINE bool is_val_ref(id_type node) const { return _p(node)->m_type.is_val_ref(); } + C4_ALWAYS_INLINE bool is_ref(id_type node) const { return _p(node)->m_type.is_ref(); } + + C4_ALWAYS_INLINE bool parent_is_seq(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_seq(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool parent_is_map(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_map(_p(node)->m_parent); } - /** true when key and val are empty, and has no children */ - C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } /** true when the node has an anchor named a */ - C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } - - C4_ALWAYS_INLINE bool key_is_null(size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && scalar_is_null(n->m_key.scalar); } - C4_ALWAYS_INLINE bool val_is_null(size_t node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && scalar_is_null(n->m_val.scalar); } - - /** @todo move this function to node_type.hpp */ - static bool scalar_is_null(csubstr s) noexcept - { - return s.str == nullptr || - s == "~" || - s == "null" || - s == "Null" || - s == "NULL"; - } + C4_ALWAYS_INLINE bool has_anchor(id_type node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + /** true if the node key does not have any KEYQUO flags, and its scalar verifies scalar_is_null(). + * @warning the node must verify .has_key() (asserted) (ie must be a member of a map) + * @see https://github.com/biojppm/rapidyaml/issues/413 */ + C4_ALWAYS_INLINE bool key_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && scalar_is_null(n->m_key.scalar); } + /** true if the node key does not have any VALQUO flags, and its scalar verifies scalar_is_null(). + * @warning the node must verify .has_val() (asserted) (ie must be a scalar / must not be a container) + * @see https://github.com/biojppm/rapidyaml/issues/413 */ + C4_ALWAYS_INLINE bool val_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && scalar_is_null(n->m_val.scalar); } + + /// true if the key was a scalar requiring filtering and was left + /// unfiltered during the parsing (see ParserOptions) + C4_ALWAYS_INLINE bool is_key_unfiltered(id_type node) const { return _p(node)->m_type.is_key_unfiltered(); } + /// true if the val was a scalar requiring filtering and was left + /// unfiltered during the parsing (see ParserOptions) + C4_ALWAYS_INLINE bool is_val_unfiltered(id_type node) const { return _p(node)->m_type.is_val_unfiltered(); } + + RYML_DEPRECATED("use has_key_anchor()") bool is_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); } + RYML_DEPRECATED("use has_val_anchor()") bool is_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); } + RYML_DEPRECATED("use has_anchor()") bool is_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); } + RYML_DEPRECATED("use has_anchor_or_ref()") bool is_anchor_or_ref(id_type node) const { return _p(node)->m_type.has_anchor() || _p(node)->m_type.is_ref(); } /** @} */ @@ -671,23 +453,26 @@ class RYML_EXPORT Tree /** @name hierarchy predicates */ /** @{ */ - bool is_root(size_t node) const { _RYML_CB_ASSERT(m_callbacks, _p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + bool is_root(id_type node) const { _RYML_CB_ASSERT(m_callbacks, _p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + + bool has_parent(id_type node) const { return _p(node)->m_parent != NONE; } - bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + /** true when key and val are empty, and has no children */ + bool empty(id_type node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } /** true if @p node has a child with id @p ch */ - bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } + bool has_child(id_type node, id_type ch) const { return _p(ch)->m_parent == node; } /** true if @p node has a child with key @p key */ - bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } + bool has_child(id_type node, csubstr key) const { return find_child(node, key) != NONE; } /** true if @p node has any children key */ - bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } + bool has_children(id_type node) const { return _p(node)->m_first_child != NONE; } /** true if @p node has a sibling with id @p sib */ - bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } + bool has_sibling(id_type node, id_type sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } /** true if one of the node's siblings has the given key */ - bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } + bool has_sibling(id_type node, csubstr key) const { return find_sibling(node, key) != NONE; } /** true if node is not a single child */ - bool has_other_siblings(size_t node) const + bool has_other_siblings(id_type node) const { NodeData const *n = _p(node); if(C4_LIKELY(n->m_parent != NONE)) @@ -698,7 +483,7 @@ class RYML_EXPORT Tree return false; } - RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(id_type /*node*/) const { return true; } /** @} */ @@ -707,64 +492,100 @@ class RYML_EXPORT Tree /** @name hierarchy getters */ /** @{ */ - size_t parent(size_t node) const { return _p(node)->m_parent; } + id_type parent(id_type node) const { return _p(node)->m_parent; } - size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } - size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } + id_type prev_sibling(id_type node) const { return _p(node)->m_prev_sibling; } + id_type next_sibling(id_type node) const { return _p(node)->m_next_sibling; } /** O(#num_children) */ - size_t num_children(size_t node) const; - size_t child_pos(size_t node, size_t ch) const; - size_t first_child(size_t node) const { return _p(node)->m_first_child; } - size_t last_child(size_t node) const { return _p(node)->m_last_child; } - size_t child(size_t node, size_t pos) const; - size_t find_child(size_t node, csubstr const& key) const; + id_type num_children(id_type node) const; + id_type child_pos(id_type node, id_type ch) const; + id_type first_child(id_type node) const { return _p(node)->m_first_child; } + id_type last_child(id_type node) const { return _p(node)->m_last_child; } + id_type child(id_type node, id_type pos) const; + id_type find_child(id_type node, csubstr const& key) const; /** O(#num_siblings) */ /** counts with this */ - size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } + id_type num_siblings(id_type node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } /** does not count with this */ - size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); _RYML_CB_ASSERT(m_callbacks, ns > 0); return ns-1; } - size_t sibling_pos(size_t node, size_t sib) const { _RYML_CB_ASSERT(m_callbacks, ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } - size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } - size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } - size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } - size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } + id_type num_other_siblings(id_type node) const { id_type ns = num_siblings(node); _RYML_CB_ASSERT(m_callbacks, ns > 0); return ns-1; } + id_type sibling_pos(id_type node, id_type sib) const { _RYML_CB_ASSERT(m_callbacks, ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } + id_type first_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } + id_type last_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } + id_type sibling(id_type node, id_type pos) const { return child(_p(node)->m_parent, pos); } + id_type find_sibling(id_type node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } - size_t doc(size_t i) const { size_t rid = root_id(); _RYML_CB_ASSERT(m_callbacks, is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + id_type doc(id_type i) const { id_type rid = root_id(); _RYML_CB_ASSERT(m_callbacks, is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + + id_type depth_asc(id_type node) const; /**< O(log(num_tree_nodes)) get the ascending depth of the node: number of levels between root and node */ + id_type depth_desc(id_type node) const; /**< O(num_tree_nodes) get the descending depth of the node: number of levels between node and deepest child */ + + /** @} */ + +public: + + /** @name node style predicates and modifiers. see the corresponding predicate in NodeType */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_container_styled(id_type node) const { return _p(node)->m_type.is_container_styled(); } + C4_ALWAYS_INLINE bool is_block(id_type node) const { return _p(node)->m_type.is_block(); } + C4_ALWAYS_INLINE bool is_flow_sl(id_type node) const { return _p(node)->m_type.is_flow_sl(); } + C4_ALWAYS_INLINE bool is_flow_ml(id_type node) const { return _p(node)->m_type.is_flow_ml(); } + C4_ALWAYS_INLINE bool is_flow(id_type node) const { return _p(node)->m_type.is_flow(); } + + C4_ALWAYS_INLINE bool is_key_styled(id_type node) const { return _p(node)->m_type.is_key_styled(); } + C4_ALWAYS_INLINE bool is_val_styled(id_type node) const { return _p(node)->m_type.is_val_styled(); } + C4_ALWAYS_INLINE bool is_key_literal(id_type node) const { return _p(node)->m_type.is_key_literal(); } + C4_ALWAYS_INLINE bool is_val_literal(id_type node) const { return _p(node)->m_type.is_val_literal(); } + C4_ALWAYS_INLINE bool is_key_folded(id_type node) const { return _p(node)->m_type.is_key_folded(); } + C4_ALWAYS_INLINE bool is_val_folded(id_type node) const { return _p(node)->m_type.is_val_folded(); } + C4_ALWAYS_INLINE bool is_key_squo(id_type node) const { return _p(node)->m_type.is_key_squo(); } + C4_ALWAYS_INLINE bool is_val_squo(id_type node) const { return _p(node)->m_type.is_val_squo(); } + C4_ALWAYS_INLINE bool is_key_dquo(id_type node) const { return _p(node)->m_type.is_key_dquo(); } + C4_ALWAYS_INLINE bool is_val_dquo(id_type node) const { return _p(node)->m_type.is_val_dquo(); } + C4_ALWAYS_INLINE bool is_key_plain(id_type node) const { return _p(node)->m_type.is_key_plain(); } + C4_ALWAYS_INLINE bool is_val_plain(id_type node) const { return _p(node)->m_type.is_val_plain(); } + C4_ALWAYS_INLINE bool is_key_quoted(id_type node) const { return _p(node)->m_type.is_key_quoted(); } + C4_ALWAYS_INLINE bool is_val_quoted(id_type node) const { return _p(node)->m_type.is_val_quoted(); } + C4_ALWAYS_INLINE bool is_quoted(id_type node) const { return _p(node)->m_type.is_quoted(); } + + C4_ALWAYS_INLINE void set_container_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, is_container(node)); _p(node)->m_type.set_container_style(style); } + C4_ALWAYS_INLINE void set_key_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_type.set_key_style(style); } + C4_ALWAYS_INLINE void set_val_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_type.set_val_style(style); } /** @} */ public: - /** @name node modifiers */ + /** @name node type modifiers */ /** @{ */ - void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); - void to_map(size_t node, csubstr key, type_bits more_flags=0); - void to_seq(size_t node, csubstr key, type_bits more_flags=0); - void to_val(size_t node, csubstr val, type_bits more_flags=0); - void to_map(size_t node, type_bits more_flags=0); - void to_seq(size_t node, type_bits more_flags=0); - void to_doc(size_t node, type_bits more_flags=0); - void to_stream(size_t node, type_bits more_flags=0); + void to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags=0); + void to_map(id_type node, csubstr key, type_bits more_flags=0); + void to_seq(id_type node, csubstr key, type_bits more_flags=0); + void to_val(id_type node, csubstr val, type_bits more_flags=0); + void to_map(id_type node, type_bits more_flags=0); + void to_seq(id_type node, type_bits more_flags=0); + void to_doc(id_type node, type_bits more_flags=0); + void to_stream(id_type node, type_bits more_flags=0); - void set_key(size_t node, csubstr key) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.scalar = key; } - void set_val(size_t node, csubstr val) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_val.scalar = val; } + void set_key(id_type node, csubstr key) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.scalar = key; } + void set_val(id_type node, csubstr val) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_val.scalar = val; } - void set_key_tag(size_t node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } - void set_val_tag(size_t node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + void set_key_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } + void set_val_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } - void set_key_anchor(size_t node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } - void set_val_anchor(size_t node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } - void set_key_ref (size_t node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } - void set_val_ref (size_t node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + void set_key_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } + void set_val_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks, ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } + void set_key_ref (id_type node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } + void set_val_ref (id_type node, csubstr ref ) { _RYML_CB_ASSERT(m_callbacks, ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } - void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } - void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } - void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } - void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } - void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } + void rem_key_anchor(id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } + void rem_val_anchor(id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } + void rem_key_ref (id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } + void rem_val_ref (id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } + void rem_anchor_ref(id_type node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } /** @} */ @@ -776,15 +597,16 @@ class RYML_EXPORT Tree /** reorder the tree in memory so that all the nodes are stored * in a linear sequence when visited in depth-first order. * This will invalidate existing ids, since the node id is its - * position in the node array. */ + * position in the tree's node array. */ void reorder(); /** Resolve references (aliases <- anchors) in the tree. * - * Dereferencing is opt-in; after parsing, Tree::resolve() - * has to be called explicitly for obtaining resolved references in the - * tree. This method will resolve all references and substitute the - * anchored values in place of the reference. + * Dereferencing is opt-in; after parsing, Tree::resolve() has to + * be called explicitly for obtaining resolved references in the + * tree. This method will @ref ReferenceResolver::resolve() + * to resolve all references and substitute the anchored values in + * place of the reference. * * This method first does a full traversal of the tree to gather all * anchors and references in a separate collection, then it goes through @@ -796,7 +618,12 @@ class RYML_EXPORT Tree * potentially expensive operation, with a best-case linear complexity * (from the initial traversal). This potential cost is the reason for * requiring an explicit call. + * + * @see ReferenceResolver::resolve() */ + void resolve(ReferenceResolver *C4_RESTRICT rr); + + /** Resolve references using a throw-away resolver. */ void resolve(); /** @} */ @@ -807,30 +634,29 @@ class RYML_EXPORT Tree /** @{ */ void resolve_tags(); + void normalize_tags(); + void normalize_tags_long(); - size_t num_tag_directives() const; - size_t add_tag_directive(TagDirective const& td); + id_type num_tag_directives() const; + bool add_tag_directive(csubstr directive); + id_type add_tag_directive(TagDirective const& td); void clear_tag_directives(); - size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; - csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const + /** resolve the given tag, appearing at node_id. Write the result into output. + * @return the number of characters required for the resolved tag */ + size_t resolve_tag(substr output, csubstr tag, id_type node_id) const; + csubstr resolve_tag_sub(substr output, csubstr tag, id_type node_id) const { size_t needed = resolve_tag(output, tag, node_id); return needed <= output.len ? output.first(needed) : output; } - using tag_directive_const_iterator = TagDirective const*; - tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } - tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } - - struct TagDirectiveProxy - { - tag_directive_const_iterator b, e; - tag_directive_const_iterator begin() const { return b; } - tag_directive_const_iterator end() const { return e; } - }; + TagDirective const* begin_tag_directives() const { return m_tag_directives; } + TagDirective const* end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + c4::yml::TagDirectiveRange tag_directives() const { return c4::yml::TagDirectiveRange{begin_tag_directives(), end_tag_directives()}; } - TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } + RYML_DEPRECATED("use c4::yml::tag_directive_const_iterator") typedef TagDirective const* tag_directive_const_iterator; + RYML_DEPRECATED("use c4::yml::TagDirectiveRange") typedef c4::yml::TagDirectiveRange TagDirectiveProxy; /** @} */ @@ -842,19 +668,25 @@ class RYML_EXPORT Tree /** create and insert a new child of @p parent. insert after the (to-be) * sibling @p after, which must be a child of @p parent. To insert as the * first child, set after to NONE */ - C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) + C4_ALWAYS_INLINE id_type insert_child(id_type parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, parent != NONE); _RYML_CB_ASSERT(m_callbacks, is_container(parent) || is_root(parent)); _RYML_CB_ASSERT(m_callbacks, after == NONE || (_p(after)->m_parent == parent)); - size_t child = _claim(); + id_type child = _claim(); _set_hierarchy(child, parent, after); return child; } /** create and insert a node as the first child of @p parent */ - C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } + C4_ALWAYS_INLINE id_type prepend_child(id_type parent) { return insert_child(parent, NONE); } /** create and insert a node as the last child of @p parent */ - C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } + C4_ALWAYS_INLINE id_type append_child(id_type parent) { return insert_child(parent, _p(parent)->m_last_child); } + C4_ALWAYS_INLINE id_type _append_child__unprotected(id_type parent) + { + id_type child = _claim(); + _set_hierarchy(child, parent, _p(parent)->m_last_child); + return child; + } public: @@ -869,25 +701,25 @@ class RYML_EXPORT Tree #endif //! create and insert a new sibling of n. insert after "after" - C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) + C4_ALWAYS_INLINE id_type insert_sibling(id_type node, id_type after) { return insert_child(_p(node)->m_parent, after); } /** create and insert a node as the first node of @p parent */ - C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } - C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE id_type prepend_sibling(id_type node) { return prepend_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE id_type append_sibling(id_type node) { return append_child(_p(node)->m_parent); } public: /** remove an entire branch at once: ie remove the children and the node itself */ - inline void remove(size_t node) + inline void remove(id_type node) { remove_children(node); _release(node); } /** remove all the node's children, but keep the node itself */ - void remove_children(size_t node); + void remove_children(id_type node); /** change the @p type of the node to one of MAP, SEQ or VAL. @p * type must have one and only one of MAP,SEQ,VAL; @p type may @@ -898,9 +730,9 @@ class RYML_EXPORT Tree * initialize with a null scalar (~), changing to MAP will * initialize with an empty map ({}), and changing to SEQ will * initialize with an empty seq ([]). */ - bool change_type(size_t node, NodeType type); + bool change_type(id_type node, NodeType type); - bool change_type(size_t node, type_bits type) + bool change_type(id_type node, type_bits type) { return change_type(node, (NodeType)type); } @@ -914,14 +746,14 @@ class RYML_EXPORT Tree public: /** change the node's position in the parent */ - void move(size_t node, size_t after); + void move(id_type node, id_type after); /** change the node's parent and position */ - void move(size_t node, size_t new_parent, size_t after); + void move(id_type node, id_type new_parent, id_type after); /** change the node's parent and position to a different tree * @return the index of the new node in the destination tree */ - size_t move(Tree * src, size_t node, size_t new_parent, size_t after); + id_type move(Tree * src, id_type node, id_type new_parent, id_type after); /** ensure the first node is a stream. Eg, change this tree * @@ -951,34 +783,34 @@ class RYML_EXPORT Tree /** recursively duplicate a node from this tree into a new parent, * placing it after one of its children * @return the index of the copy */ - size_t duplicate(size_t node, size_t new_parent, size_t after); + id_type duplicate(id_type node, id_type new_parent, id_type after); /** recursively duplicate a node from a different tree into a new parent, * placing it after one of its children * @return the index of the copy */ - size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); + id_type duplicate(Tree const* src, id_type node, id_type new_parent, id_type after); /** recursively duplicate the node's children (but not the node) * @return the index of the last duplicated child */ - size_t duplicate_children(size_t node, size_t parent, size_t after); + id_type duplicate_children(id_type node, id_type parent, id_type after); /** recursively duplicate the node's children (but not the node), where * the node is from a different tree * @return the index of the last duplicated child */ - size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); + id_type duplicate_children(Tree const* src, id_type node, id_type parent, id_type after); - void duplicate_contents(size_t node, size_t where); - void duplicate_contents(Tree const* src, size_t node, size_t where); + void duplicate_contents(id_type node, id_type where); + void duplicate_contents(Tree const* src, id_type node, id_type where); /** duplicate the node's children (but not the node) in a new parent, but * omit repetitions where a duplicated node has the same key (in maps) or * value (in seqs). If one of the duplicated children has the same key * (in maps) or value (in seqs) as one of the parent's children, the one * that is placed closest to the end will prevail. */ - size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); - size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); + id_type duplicate_children_no_rep(id_type node, id_type parent, id_type after); + id_type duplicate_children_no_rep(Tree const* src, id_type node, id_type parent, id_type after); public: - void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); + void merge_with(Tree const* src, id_type src_node=NONE, id_type dst_root=NONE); /** @} */ @@ -1156,8 +988,8 @@ class RYML_EXPORT Tree } /** ensure the tree's internal string arena is at least the given capacity - * @note This operation has a potential complexity of O(numNodes)+O(arenasize). - * Growing the arena may cause relocation of the entire + * @warning This operation may be expensive, with a potential complexity of O(numNodes)+O(arenasize). + * @warning Growing the arena may cause relocation of the entire * existing arena, and thus change the contents of individual nodes. */ void reserve_arena(size_t arena_cap) { @@ -1191,6 +1023,7 @@ class RYML_EXPORT Tree substr _request_span(size_t sz) { + _RYML_CB_ASSERT(m_callbacks, m_arena_pos + sz <= m_arena.len); substr s; s = m_arena.sub(m_arena_pos, sz); m_arena_pos += sz; @@ -1201,7 +1034,7 @@ class RYML_EXPORT Tree { _RYML_CB_ASSERT(m_callbacks, m_arena.is_super(s)); _RYML_CB_ASSERT(m_callbacks, m_arena.sub(0, m_arena_pos).is_super(s)); - auto pos = (s.str - m_arena.str); + auto pos = (s.str - m_arena.str); // this is larger than 0 based on the assertions above substr r(next_arena.str + pos, s.len); _RYML_CB_ASSERT(m_callbacks, r.str - next_arena.str == pos); _RYML_CB_ASSERT(m_callbacks, next_arena.sub(0, m_arena_pos).is_super(r)); @@ -1215,15 +1048,15 @@ class RYML_EXPORT Tree struct lookup_result { - size_t target; - size_t closest; + id_type target; + id_type closest; size_t path_pos; csubstr path; inline operator bool() const { return target != NONE; } lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} - lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} + lookup_result(csubstr path_, id_type start) : target(NONE), closest(start), path_pos(0), path(path_) {} /** get the part ot the input path that was resolved */ csubstr resolved() const; @@ -1232,19 +1065,19 @@ class RYML_EXPORT Tree }; /** for example foo.bar[0].baz */ - lookup_result lookup_path(csubstr path, size_t start=NONE) const; + lookup_result lookup_path(csubstr path, id_type start=NONE) const; /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify * the tree so that the corresponding lookup_path() would return the * default value. * @see lookup_path() */ - size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); + id_type lookup_path_or_modify(csubstr default_value, csubstr path, id_type start=NONE); /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify * the tree so that the corresponding lookup_path() would return the * branch @p src_node (from the tree @p src). * @see lookup_path() */ - size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); + id_type lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start=NONE); /** @} */ @@ -1260,13 +1093,13 @@ class RYML_EXPORT Tree bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } }; - size_t _lookup_path_or_create(csubstr path, size_t start); + id_type _lookup_path_or_create(csubstr path, id_type start); void _lookup_path (lookup_result *r) const; void _lookup_path_modify(lookup_result *r); - size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; - size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); + id_type _next_node (lookup_result *r, _lookup_path_token *parent) const; + id_type _next_node_modify(lookup_result *r, _lookup_path_token *parent); void _advance(lookup_result *r, size_t more) const; @@ -1277,7 +1110,7 @@ class RYML_EXPORT Tree void _clear(); void _free(); void _copy(Tree const& that); - void _move(Tree & that); + void _move(Tree & that) noexcept; void _relocate(substr next_arena); @@ -1286,9 +1119,9 @@ class RYML_EXPORT Tree /** @cond dev*/ #if ! RYML_USE_ASSERT - C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} + C4_ALWAYS_INLINE void _check_next_flags(id_type, type_bits) {} #else - void _check_next_flags(size_t node, type_bits f) + void _check_next_flags(id_type node, type_bits f) { auto n = _p(node); type_bits o = n->m_type; // old @@ -1321,34 +1154,34 @@ class RYML_EXPORT Tree } #endif - inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } - inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(id_type node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(id_type node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } - inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } - inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } + inline void _add_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _add_flags(id_type node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } - inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } - inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } + inline void _rem_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _rem_flags(id_type node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } - void _set_key(size_t node, csubstr key, type_bits more_flags=0) + void _set_key(id_type node, csubstr key, type_bits more_flags=0) { _p(node)->m_key.scalar = key; _add_flags(node, KEY|more_flags); } - void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) + void _set_key(id_type node, NodeScalar const& key, type_bits more_flags=0) { _p(node)->m_key = key; _add_flags(node, KEY|more_flags); } - void _set_val(size_t node, csubstr val, type_bits more_flags=0) + void _set_val(id_type node, csubstr val, type_bits more_flags=0) { _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0); _RYML_CB_ASSERT(m_callbacks, !is_seq(node) && !is_map(node)); _p(node)->m_val.scalar = val; _add_flags(node, VAL|more_flags); } - void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) + void _set_val(id_type node, NodeScalar const& val, type_bits more_flags=0) { _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0); _RYML_CB_ASSERT(m_callbacks, ! is_container(node)); @@ -1356,7 +1189,7 @@ class RYML_EXPORT Tree _add_flags(node, VAL|more_flags); } - void _set(size_t node, NodeInit const& i) + void _set(id_type node, NodeInit const& i) { _RYML_CB_ASSERT(m_callbacks, i._check()); NodeData *n = _p(node); @@ -1373,10 +1206,10 @@ class RYML_EXPORT Tree n->m_val = i.val; } - void _set_parent_as_container_if_needed(size_t in) + void _set_parent_as_container_if_needed(id_type in) { NodeData const* n = _p(in); - size_t ip = parent(in); + id_type ip = parent(in); if(ip != NONE) { if( ! (is_seq(ip) || is_map(ip))) @@ -1396,10 +1229,10 @@ class RYML_EXPORT Tree } } - void _seq2map(size_t node) + void _seq2map(id_type node) { _RYML_CB_ASSERT(m_callbacks, is_seq(node)); - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) { NodeData *C4_RESTRICT ch = _p(i); if(ch->m_type.is_keyval()) @@ -1412,24 +1245,24 @@ class RYML_EXPORT Tree n->m_type.add(MAP); } - size_t _do_reorder(size_t *node, size_t count); + id_type _do_reorder(id_type *node, id_type count); - void _swap(size_t n_, size_t m_); - void _swap_props(size_t n_, size_t m_); - void _swap_hierarchy(size_t n_, size_t m_); - void _copy_hierarchy(size_t dst_, size_t src_); + void _swap(id_type n_, id_type m_); + void _swap_props(id_type n_, id_type m_); + void _swap_hierarchy(id_type n_, id_type m_); + void _copy_hierarchy(id_type dst_, id_type src_); - inline void _copy_props(size_t dst_, size_t src_) + inline void _copy_props(id_type dst_, id_type src_) { _copy_props(dst_, this, src_); } - inline void _copy_props_wo_key(size_t dst_, size_t src_) + inline void _copy_props_wo_key(id_type dst_, id_type src_) { _copy_props_wo_key(dst_, this, src_); } - void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) + void _copy_props(id_type dst_, Tree const* that_tree, id_type src_) { auto & C4_RESTRICT dst = *_p(dst_); auto const& C4_RESTRICT src = *that_tree->_p(src_); @@ -1438,7 +1271,16 @@ class RYML_EXPORT Tree dst.m_val = src.m_val; } - void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) + void _copy_props(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & src_mask) | (dst.m_type & ~src_mask); + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_) { auto & C4_RESTRICT dst = *_p(dst_); auto const& C4_RESTRICT src = *that_tree->_p(src_); @@ -1446,12 +1288,20 @@ class RYML_EXPORT Tree dst.m_val = src.m_val; } - inline void _clear_type(size_t node) + void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & ((~_KEYMASK)|src_mask)) | (dst.m_type & (_KEYMASK|~src_mask)); + dst.m_val = src.m_val; + } + + inline void _clear_type(id_type node) { _p(node)->m_type = NOTYPE; } - inline void _clear(size_t node) + inline void _clear(id_type node) { auto *C4_RESTRICT n = _p(node); n->m_type = NOTYPE; @@ -1462,13 +1312,13 @@ class RYML_EXPORT Tree n->m_last_child = NONE; } - inline void _clear_key(size_t node) + inline void _clear_key(id_type node) { _p(node)->m_key.clear(); _rem_flags(node, KEY); } - inline void _clear_val(size_t node) + inline void _clear_val(id_type node) { _p(node)->m_val.clear(); _rem_flags(node, VAL); @@ -1478,28 +1328,30 @@ class RYML_EXPORT Tree private: - void _clear_range(size_t first, size_t num); + void _clear_range(id_type first, id_type num); - size_t _claim(); +public: + id_type _claim(); +private: void _claim_root(); - void _release(size_t node); - void _free_list_add(size_t node); - void _free_list_rem(size_t node); + void _release(id_type node); + void _free_list_add(id_type node); + void _free_list_rem(id_type node); - void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); - void _rem_hierarchy(size_t node); + void _set_hierarchy(id_type node, id_type parent, id_type after_sibling); + void _rem_hierarchy(id_type node); public: // members are exposed, but you should NOT access them directly - NodeData * m_buf; - size_t m_cap; + NodeData *m_buf; + id_type m_cap; - size_t m_size; + id_type m_size; - size_t m_free_head; - size_t m_free_tail; + id_type m_free_head; + id_type m_free_tail; substr m_arena; size_t m_arena_pos; diff --git a/3rdparty/rapidyaml/include/c4/yml/writer.hpp b/3rdparty/rapidyaml/include/c4/yml/writer.hpp index c23ab72be238d9..a77be91f755d0f 100644 --- a/3rdparty/rapidyaml/include/c4/yml/writer.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/writer.hpp @@ -23,18 +23,6 @@ namespace yml { */ -/** Repeat-Character: a character to be written a number of times. */ -struct RepC -{ - char c; - size_t num_times; -}; -inline RepC indent_to(size_t num_levels) -{ - return {' ', size_t(2) * num_levels}; -} - - //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -86,13 +74,11 @@ struct WriterFile ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - for(size_t i = 0; i < rc.num_times; ++i) - { - fputc(rc.c, m_file); - } - m_pos += rc.num_times; + for(size_t i = 0; i < num_times; ++i) + fputc(c, m_file); + m_pos += num_times; } }; @@ -149,13 +135,11 @@ struct WriterOStream ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - for(size_t i = 0; i < rc.num_times; ++i) - { - m_stream.put(rc.c); - } - m_pos += rc.num_times; + for(size_t i = 0; i < num_times; ++i) + m_stream.put(c); + m_pos += num_times; } }; @@ -212,22 +196,16 @@ struct WriterBuf inline void _do_write(const char c) { if(m_pos + 1 <= m_buf.len) - { m_buf[m_pos] = c; - } ++m_pos; } - inline void _do_write(RepC const rc) + inline void _do_write(const char c, size_t num_times) { - if(m_pos + rc.num_times <= m_buf.len) - { - for(size_t i = 0; i < rc.num_times; ++i) - { - m_buf[m_pos + i] = rc.c; - } - } - m_pos += rc.num_times; + if(m_pos + num_times <= m_buf.len) + for(size_t i = 0; i < num_times; ++i) + m_buf[m_pos + i] = c; + m_pos += num_times; } }; diff --git a/3rdparty/rapidyaml/include/c4/yml/yml.hpp b/3rdparty/rapidyaml/include/c4/yml/yml.hpp index 36f78fe827611e..dcd7c18d038707 100644 --- a/3rdparty/rapidyaml/include/c4/yml/yml.hpp +++ b/3rdparty/rapidyaml/include/c4/yml/yml.hpp @@ -4,7 +4,12 @@ #include "c4/yml/tree.hpp" #include "c4/yml/node.hpp" #include "c4/yml/emit.hpp" +#include "c4/yml/event_handler_tree.hpp" +#include "c4/yml/parse_engine.hpp" +#include "c4/yml/filter_processor.hpp" #include "c4/yml/parse.hpp" #include "c4/yml/preprocess.hpp" +#include "c4/yml/reference_resolver.hpp" +#include "c4/yml/tag.hpp" #endif // _C4_YML_YML_HPP_ diff --git a/3rdparty/rapidyaml/include/ryml.natvis b/3rdparty/rapidyaml/include/ryml.natvis index 5cd67b13e6bc18..cb0827546ada20 100644 --- a/3rdparty/rapidyaml/include/ryml.natvis +++ b/3rdparty/rapidyaml/include/ryml.natvis @@ -25,12 +25,24 @@ See also: - {type} + [KEYVAL] + [KEYSEQ] + [KEYMAP] + [DOCVAL] + [DOCSEQ] + [DOCMAP] + [VAL] + [KEY] + [SEQ] + [MAP] + [DOC] + [STREAM] + [NOTYPE] - + - c4::yml::VAL - c4::yml::KEY + c4::yml::KEY + c4::yml::VAL c4::yml::MAP c4::yml::SEQ c4::yml::DOC @@ -41,8 +53,25 @@ See also: c4::yml::VALANCH c4::yml::KEYTAG c4::yml::VALTAG - c4::yml::VALQUO - c4::yml::KEYQUO + + + + + c4::yml::_WIP_KEY_UNFILT + c4::yml::_WIP_VAL_UNFILT + c4::yml::_WIP_STYLE_FLOW + c4::yml::_WIP_STYLE_FLOW + c4::yml::_WIP_STYLE_BLOCK + c4::yml::_WIP_KEY_LITERAL + c4::yml::_WIP_VAL_LITERAL + c4::yml::_WIP_KEY_FOLDED + c4::yml::_WIP_VAL_FOLDED + c4::yml::_WIP_KEY_SQUO + c4::yml::_WIP_VAL_SQUO + c4::yml::_WIP_KEY_DQUO + c4::yml::_WIP_VAL_DQUO + c4::yml::_WIP_KEY_PLAIN + c4::yml::_WIP_VAL_PLAIN @@ -216,4 +245,60 @@ See also: + + src={src.str,[rpos]} dst={dst.str,[wpos]} + + src + dst + rpos + wpos + + src.str,[rpos] + + + rpos + src.str + + + + + + + + src={src.str,[rpos]} dst={src.str,[wpos]} + + rpos + wpos + wcap + + src.str,[wcap] + + + wcap + src.str + + + + src + + src.str+rpos,[src.len-rpos] + + src.len-rpossrc.str+rpos + + + + src.str,[rpos] + + rpossrc.str + + + + src.str,[wpos] + + wpossrc.str + + + + + diff --git a/3rdparty/rapidyaml/src/c4/error.cpp b/3rdparty/rapidyaml/src/c4/error.cpp index c8e3b7ad44cfb4..32a266d3e14c8b 100644 --- a/3rdparty/rapidyaml/src/c4/error.cpp +++ b/3rdparty/rapidyaml/src/c4/error.cpp @@ -49,6 +49,7 @@ namespace c4 { static error_flags s_error_flags = ON_ERROR_DEFAULTS; static error_callback_type s_error_callback = nullptr; + //----------------------------------------------------------------------------- error_flags get_error_flags() @@ -70,6 +71,7 @@ void set_error_callback(error_callback_type cb) s_error_callback = cb; } + //----------------------------------------------------------------------------- void handle_error(srcloc where, const char *fmt, ...) @@ -102,23 +104,24 @@ void handle_error(srcloc where, const char *fmt, ...) { if(s_error_callback) { - s_error_callback(buf, msglen/*ss.c_strp(), ss.tellp()*/); + s_error_callback(buf, msglen); } } - if(s_error_flags & ON_ERROR_ABORT) - { - abort(); - } - if(s_error_flags & ON_ERROR_THROW) { #if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) throw std::runtime_error(buf); -#else - abort(); #endif } + + if(s_error_flags & ON_ERROR_ABORT) + { + abort(); + } + + abort(); // abort anyway, in case nothing was set + C4_UNREACHABLE_AFTER_ERR(); } //----------------------------------------------------------------------------- @@ -126,20 +129,19 @@ void handle_error(srcloc where, const char *fmt, ...) void handle_warning(srcloc where, const char *fmt, ...) { va_list args; - char buf[1024]; //sstream ss; + char buf[1024]; va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); C4_LOGF_WARN("\n"); #if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) - C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf); C4_LOGF_WARN("%s:%d: WARNING: here: %s\n", where.file, where.line, where.func); #elif defined(C4_ERROR_SHOWS_FILELINE) - C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf); #elif ! defined(C4_ERROR_SHOWS_FUNC) - C4_LOGF_WARN("WARNING: %s\n", buf/*ss.c_strp()*/); + C4_LOGF_WARN("WARNING: %s\n", buf); #endif - //c4::log.flush(); } //----------------------------------------------------------------------------- @@ -216,6 +218,7 @@ bool is_debugger_attached() size = sizeof(info); junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); assert(junk == 0); + (void)junk; // We're being debugged if the P_TRACED flag is set. return ((info.kp_proc.p_flag & P_TRACED) != 0); diff --git a/3rdparty/rapidyaml/src/c4/yml/common.cpp b/3rdparty/rapidyaml/src/c4/yml/common.cpp index cdfdb81b8f537c..f8defaee745cbe 100644 --- a/3rdparty/rapidyaml/src/c4/yml/common.cpp +++ b/3rdparty/rapidyaml/src/c4/yml/common.cpp @@ -28,6 +28,8 @@ void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) { if(!loc.name.empty()) { + // this is more portable than using fprintf("%.*s:") which + // is not available in some embedded platforms fwrite(loc.name.str, 1, loc.name.len, f); fputc(':', f); } @@ -36,13 +38,17 @@ void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) fprintf(f, "%zu:", loc.col); if(loc.offset) fprintf(f, " (%zuB):", loc.offset); + fputc(' ', f); } - fprintf(f, "%.*s\n", (int)length, msg); + RYML_ASSERT(!csubstr(msg, length).ends_with('\0')); + fwrite(msg, 1, length, f); + fputc('\n', f); fflush(f); } [[noreturn]] void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) { + RYML_ASSERT(!csubstr(msg, length).ends_with('\0')); report_error_impl(msg, length, loc, nullptr); #ifdef RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS throw std::runtime_error(std::string(msg, length)); @@ -98,9 +104,9 @@ Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_e m_error(error_) #endif { - C4_CHECK(m_allocate); - C4_CHECK(m_free); - C4_CHECK(m_error); + RYML_CHECK(m_allocate); + RYML_CHECK(m_free); + RYML_CHECK(m_error); } diff --git a/3rdparty/rapidyaml/src/c4/yml/parse.cpp b/3rdparty/rapidyaml/src/c4/yml/parse.cpp index ca2263445574a5..6651fa9f3d5c35 100644 --- a/3rdparty/rapidyaml/src/c4/yml/parse.cpp +++ b/3rdparty/rapidyaml/src/c4/yml/parse.cpp @@ -1,5745 +1,164 @@ #include "c4/yml/parse.hpp" -#include "c4/error.hpp" -#include "c4/utf.hpp" -#include -#include -#include -#include - -#include "c4/yml/detail/parser_dbg.hpp" -#ifdef RYML_DBG -#include "c4/yml/detail/print.hpp" -#endif - -#ifndef RYML_ERRMSG_SIZE - #define RYML_ERRMSG_SIZE 1024 -#endif - -//#define RYML_WITH_TAB_TOKENS -#ifdef RYML_WITH_TAB_TOKENS -#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ -#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with -#else -#define _RYML_WITH_TAB_TOKENS(...) -#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without -#endif - - -#if defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) -#elif defined(__clang__) -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. -# pragma clang diagnostic ignored "-Wformat-nonliteral" -# pragma clang diagnostic ignored "-Wold-style-cast" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. -# pragma GCC diagnostic ignored "-Wformat-nonliteral" -# pragma GCC diagnostic ignored "-Wold-style-cast" -# if __GNUC__ >= 7 -# pragma GCC diagnostic ignored "-Wduplicated-branches" -# endif -#endif - -namespace c4 { -namespace yml { - -namespace { - -template -void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args) -{ - char writebuf[256]; - auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward(args)...); - // resume writing if the results failed to fit the buffer - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. - { - results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); - if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) - { - results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); - } - } -} - -bool _is_scalar_next__runk(csubstr s) -{ - return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'")); -} - -bool _is_scalar_next__rseq_rval(csubstr s) -{ - return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-"); -} - -bool _is_scalar_next__rmap(csubstr s) -{ - return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t"))); -} - -bool _is_scalar_next__rmap_val(csubstr s) -{ - return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-"); -} - -bool _is_doc_sep(csubstr s) -{ - constexpr const csubstr dashes = "---"; - constexpr const csubstr ellipsis = "..."; - constexpr const csubstr whitesp = " \t"; - if(s.begins_with(dashes)) - return s == dashes || s.sub(3).begins_with_any(whitesp); - else if(s.begins_with(ellipsis)) - return s == ellipsis || s.sub(3).begins_with_any(whitesp); - return false; -} - -/** @p i is set to the first non whitespace character after the line - * @return the number of empty lines after the initial position */ -size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) -{ - RYML_ASSERT(r[*i] == '\n'); - size_t numnl_following = 0; - ++(*i); - for( ; *i < r.len; ++(*i)) - { - if(r.str[*i] == '\n') - { - ++numnl_following; - if(indentation) // skip the indentation after the newline - { - size_t stop = *i + indentation; - for( ; *i < r.len; ++(*i)) - { - if(r.str[*i] != ' ' && r.str[*i] != '\r') - break; - RYML_ASSERT(*i < stop); - } - C4_UNUSED(stop); - } - } - else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace - ; - else - break; - } - return numnl_following; -} - -} // anon namespace - - -//----------------------------------------------------------------------------- - -Parser::~Parser() -{ - _free(); - _clr(); -} - -Parser::Parser(Callbacks const& cb, ParserOptions opts) - : m_options(opts) - , m_file() - , m_buf() - , m_root_id(NONE) - , m_tree() - , m_stack(cb) - , m_state() - , m_key_tag_indentation(0) - , m_key_tag2_indentation(0) - , m_key_tag() - , m_key_tag2() - , m_val_tag_indentation(0) - , m_val_tag() - , m_key_anchor_was_before(false) - , m_key_anchor_indentation(0) - , m_key_anchor() - , m_val_anchor_indentation(0) - , m_val_anchor() - , m_filter_arena() - , m_newline_offsets() - , m_newline_offsets_size(0) - , m_newline_offsets_capacity(0) - , m_newline_offsets_buf() -{ - m_stack.push(State{}); - m_state = &m_stack.top(); -} - -Parser::Parser(Parser &&that) - : m_options(that.m_options) - , m_file(that.m_file) - , m_buf(that.m_buf) - , m_root_id(that.m_root_id) - , m_tree(that.m_tree) - , m_stack(std::move(that.m_stack)) - , m_state(&m_stack.top()) - , m_key_tag_indentation(that.m_key_tag_indentation) - , m_key_tag2_indentation(that.m_key_tag2_indentation) - , m_key_tag(that.m_key_tag) - , m_key_tag2(that.m_key_tag2) - , m_val_tag_indentation(that.m_val_tag_indentation) - , m_val_tag(that.m_val_tag) - , m_key_anchor_was_before(that.m_key_anchor_was_before) - , m_key_anchor_indentation(that.m_key_anchor_indentation) - , m_key_anchor(that.m_key_anchor) - , m_val_anchor_indentation(that.m_val_anchor_indentation) - , m_val_anchor(that.m_val_anchor) - , m_filter_arena(that.m_filter_arena) - , m_newline_offsets(that.m_newline_offsets) - , m_newline_offsets_size(that.m_newline_offsets_size) - , m_newline_offsets_capacity(that.m_newline_offsets_capacity) - , m_newline_offsets_buf(that.m_newline_offsets_buf) -{ - that._clr(); -} - -Parser::Parser(Parser const& that) - : m_options(that.m_options) - , m_file(that.m_file) - , m_buf(that.m_buf) - , m_root_id(that.m_root_id) - , m_tree(that.m_tree) - , m_stack(that.m_stack) - , m_state(&m_stack.top()) - , m_key_tag_indentation(that.m_key_tag_indentation) - , m_key_tag2_indentation(that.m_key_tag2_indentation) - , m_key_tag(that.m_key_tag) - , m_key_tag2(that.m_key_tag2) - , m_val_tag_indentation(that.m_val_tag_indentation) - , m_val_tag(that.m_val_tag) - , m_key_anchor_was_before(that.m_key_anchor_was_before) - , m_key_anchor_indentation(that.m_key_anchor_indentation) - , m_key_anchor(that.m_key_anchor) - , m_val_anchor_indentation(that.m_val_anchor_indentation) - , m_val_anchor(that.m_val_anchor) - , m_filter_arena() - , m_newline_offsets() - , m_newline_offsets_size() - , m_newline_offsets_capacity() - , m_newline_offsets_buf() -{ - if(that.m_newline_offsets_capacity) - { - _resize_locations(that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); - memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); - m_newline_offsets_size = that.m_newline_offsets_size; - } - if(that.m_filter_arena.len) - { - _resize_filter_arena(that.m_filter_arena.len); - } -} - -Parser& Parser::operator=(Parser &&that) -{ - _free(); - m_options = (that.m_options); - m_file = (that.m_file); - m_buf = (that.m_buf); - m_root_id = (that.m_root_id); - m_tree = (that.m_tree); - m_stack = std::move(that.m_stack); - m_state = (&m_stack.top()); - m_key_tag_indentation = (that.m_key_tag_indentation); - m_key_tag2_indentation = (that.m_key_tag2_indentation); - m_key_tag = (that.m_key_tag); - m_key_tag2 = (that.m_key_tag2); - m_val_tag_indentation = (that.m_val_tag_indentation); - m_val_tag = (that.m_val_tag); - m_key_anchor_was_before = (that.m_key_anchor_was_before); - m_key_anchor_indentation = (that.m_key_anchor_indentation); - m_key_anchor = (that.m_key_anchor); - m_val_anchor_indentation = (that.m_val_anchor_indentation); - m_val_anchor = (that.m_val_anchor); - m_filter_arena = that.m_filter_arena; - m_newline_offsets = (that.m_newline_offsets); - m_newline_offsets_size = (that.m_newline_offsets_size); - m_newline_offsets_capacity = (that.m_newline_offsets_capacity); - m_newline_offsets_buf = (that.m_newline_offsets_buf); - that._clr(); - return *this; -} - -Parser& Parser::operator=(Parser const& that) -{ - _free(); - m_options = (that.m_options); - m_file = (that.m_file); - m_buf = (that.m_buf); - m_root_id = (that.m_root_id); - m_tree = (that.m_tree); - m_stack = that.m_stack; - m_state = &m_stack.top(); - m_key_tag_indentation = (that.m_key_tag_indentation); - m_key_tag2_indentation = (that.m_key_tag2_indentation); - m_key_tag = (that.m_key_tag); - m_key_tag2 = (that.m_key_tag2); - m_val_tag_indentation = (that.m_val_tag_indentation); - m_val_tag = (that.m_val_tag); - m_key_anchor_was_before = (that.m_key_anchor_was_before); - m_key_anchor_indentation = (that.m_key_anchor_indentation); - m_key_anchor = (that.m_key_anchor); - m_val_anchor_indentation = (that.m_val_anchor_indentation); - m_val_anchor = (that.m_val_anchor); - if(that.m_filter_arena.len > 0) - _resize_filter_arena(that.m_filter_arena.len); - if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) - _resize_locations(that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); - _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); - memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); - m_newline_offsets_size = that.m_newline_offsets_size; - m_newline_offsets_buf = that.m_newline_offsets_buf; - return *this; -} - -void Parser::_clr() -{ - m_options = {}; - m_file = {}; - m_buf = {}; - m_root_id = {}; - m_tree = {}; - m_stack.clear(); - m_state = {}; - m_key_tag_indentation = {}; - m_key_tag2_indentation = {}; - m_key_tag = {}; - m_key_tag2 = {}; - m_val_tag_indentation = {}; - m_val_tag = {}; - m_key_anchor_was_before = {}; - m_key_anchor_indentation = {}; - m_key_anchor = {}; - m_val_anchor_indentation = {}; - m_val_anchor = {}; - m_filter_arena = {}; - m_newline_offsets = {}; - m_newline_offsets_size = {}; - m_newline_offsets_capacity = {}; - m_newline_offsets_buf = {}; -} - -void Parser::_free() -{ - if(m_newline_offsets) - { - _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); - m_newline_offsets = nullptr; - m_newline_offsets_size = 0u; - m_newline_offsets_capacity = 0u; - m_newline_offsets_buf = 0u; - } - if(m_filter_arena.len) - { - _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); - m_filter_arena = {}; - } - m_stack._free(); -} - - -//----------------------------------------------------------------------------- -void Parser::_reset() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1); - m_stack.clear(); - m_stack.push({}); - m_state = &m_stack.top(); - m_state->reset(m_file.str, m_root_id); - - m_key_tag_indentation = 0; - m_key_tag2_indentation = 0; - m_key_tag.clear(); - m_key_tag2.clear(); - m_val_tag_indentation = 0; - m_val_tag.clear(); - m_key_anchor_was_before = false; - m_key_anchor_indentation = 0; - m_key_anchor.clear(); - m_val_anchor_indentation = 0; - m_val_anchor.clear(); - - if(m_options.locations()) - { - _prepare_locations(); - } -} - -//----------------------------------------------------------------------------- -template -void Parser::_fmt_msg(DumpFn &&dumpfn) const -{ - auto const& lc = m_state->line_contents; - csubstr contents = lc.stripped; - if(contents.len) - { - // print the yaml src line - size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col); - if(m_file.len) - { - _parse_dump(dumpfn, "{}:", m_file); - offs += m_file.len + 1; - } - _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col); - csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); - csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); - _parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); - // highlight the remaining portion of the previous line - size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); - size_t lastcol = firstcol + lc.rem.len; - for(size_t i = 0; i < offs + firstcol; ++i) - dumpfn(" "); - dumpfn("^"); - for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) - dumpfn("~"); - _parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); - } - else - { - dumpfn("\n"); - } - -#ifdef RYML_DBG - // next line: print the state flags - { - char flagbuf_[64]; - _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags)); - } -#endif -} - - -//----------------------------------------------------------------------------- -template -void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const -{ - char errmsg[RYML_ERRMSG_SIZE]; - detail::_SubstrWriter writer(errmsg); - auto dumpfn = [&writer](csubstr s){ writer.append(s); }; - _parse_dump(dumpfn, fmt, args...); - writer.append('\n'); - _fmt_msg(dumpfn); - size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; - c4::yml::error(m_tree->m_callbacks, errmsg, len, m_state->pos); - C4_UNREACHABLE_AFTER_ERR(); -} - -//----------------------------------------------------------------------------- -#ifdef RYML_DBG -template -void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const -{ - auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); }; - _parse_dump(dumpfn, fmt, args...); - dumpfn("\n"); - _fmt_msg(dumpfn); -} -#endif - -//----------------------------------------------------------------------------- -bool Parser::_finished_file() const -{ - bool ret = m_state->pos.offset >= m_buf.len; - if(ret) - { - _c4dbgp("finished file!!!"); - } - return ret; -} - -//----------------------------------------------------------------------------- -bool Parser::_finished_line() const -{ - return m_state->line_contents.rem.empty(); -} - -//----------------------------------------------------------------------------- -void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id) -{ - m_file = file; - m_buf = buf; - m_root_id = node_id; - m_tree = t; - _reset(); - while( ! _finished_file()) - { - _scan_line(); - while( ! _finished_line()) - _handle_line(); - if(_finished_file()) - break; // it may have finished because of multiline blocks - _line_ended(); - } - _handle_finished_file(); -} - -//----------------------------------------------------------------------------- -void Parser::_handle_finished_file() -{ - _end_stream(); -} - -//----------------------------------------------------------------------------- -void Parser::_handle_line() -{ - _c4dbgq("\n-----------"); - _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty()); - if(has_any(RSEQ)) - { - if(has_any(FLOW)) - { - if(_handle_seq_flow()) - return; - } - else - { - if(_handle_seq_blck()) - return; - } - } - else if(has_any(RMAP)) - { - if(has_any(FLOW)) - { - if(_handle_map_flow()) - return; - } - else - { - if(_handle_map_blck()) - return; - } - } - else if(has_any(RUNK)) - { - if(_handle_unk()) - return; - } - - if(_handle_top()) - return; -} - - -//----------------------------------------------------------------------------- -bool Parser::_handle_unk() -{ - _c4dbgp("handle_unk"); - - csubstr rem = m_state->line_contents.rem; - const bool start_as_child = (node(m_state) == nullptr); - - if(C4_UNLIKELY(has_any(NDOC))) - { - if(rem == "---" || rem.begins_with("--- ")) - { - _start_new_doc(rem); - return true; - } - auto trimmed = rem.triml(' '); - if(trimmed == "---" || trimmed.begins_with("--- ")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len); - _line_progressed(rem.len - trimmed.len); - _start_new_doc(trimmed); - _save_indentation(); - return true; - } - else if(trimmed.begins_with("...")) - { - _end_stream(); - } - else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag - { - _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem); - size_t indref = m_state->indref; - _push_level(); - _start_doc(); - _set_indentation(indref); - } - _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty()); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); - if(m_state->indref > 0) - { - csubstr ws = rem.left_of(rem.first_not_of(' ')); - if(m_state->indref <= ws.len) - { - _c4dbgpf("skipping base indentation of {}", m_state->indref); - _line_progressed(m_state->indref); - rem = rem.sub(m_state->indref); - } - } - - if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - _c4dbgpf("it's a seq (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_seq(start_as_child); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem == '-') - { - _c4dbgpf("it's a seq (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_seq(start_as_child); - _save_indentation(); - _line_progressed(1); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgpf("it's a seq, flow (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(/*explicit flow*/true); - _start_seq(start_as_child); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgpf("it's a map, flow (as_child={})", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(/*explicit flow*/true); - _start_map(start_as_child); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; - } - else if(rem.begins_with("? ")) - { - _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - addrem_flags(RKEY|QMRK, RVAL); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem.begins_with(": ") && !has_any(SSCL)) - { - _c4dbgp("it's a map with an empty key"); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem == ':' && !has_any(SSCL)) - { - _c4dbgp("it's a map with an empty key"); - _move_key_anchor_to_val_anchor(); - _move_key_tag_to_val_tag(); - _push_level(); - _start_map(start_as_child); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _save_indentation(); - _line_progressed(1); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(!rem.begins_with('*') && _handle_key_anchors_and_refs()) - { - return true; - } - else if(has_any(SSCL)) - { - _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar); - - csubstr saved_scalar; - bool is_quoted = false; - if(_scan_scalar_unk(&saved_scalar, &is_quoted)) - { - rem = m_state->line_contents.rem; - _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); - if(rem.begins_with_any(" \t")) - { - size_t n = rem.first_not_of(" \t"); - _c4dbgpf("skipping {} spaces/tabs", n); - rem = rem.sub(n); - _line_progressed(n); - } - } - - _c4dbgpf("rem='{}'", rem); - - if(rem.begins_with(", ")) - { - _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); - _start_seq(start_as_child); - add_flags(FLOW); - _append_val(_consume_scalar()); - _line_progressed(2); - } - else if(rem.begins_with(',')) - { - _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); - _start_seq(start_as_child); - add_flags(FLOW); - _append_val(_consume_scalar()); - _line_progressed(1); - } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child); - _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair - _line_progressed(2); - } - else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'")) - { - if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); } - else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); } - _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair - _line_progressed(1); // advance only 1 - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if(rem.begins_with('}')) - { - if(!has_all(RMAP|FLOW)) - { - _c4err("invalid token: not reading a map"); - } - if(!has_all(SSCL)) - { - _c4err("no scalar stored"); - } - _append_key_val(saved_scalar, is_quoted); - _stop_map(); - _line_progressed(1); - saved_scalar.clear(); - is_quoted = false; - } - #endif - else if(rem.begins_with("...")) - { - _c4dbgp("got stream end '...'"); - _end_stream(); - _line_progressed(3); - } - else if(rem.begins_with('#')) - { - _c4dbgpf("it's a comment: '{}'", rem); - _scan_comment(); - return true; - } - else if(_handle_key_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with(" ") || rem.begins_with("\t")) - { - size_t n = rem.first_not_of(" \t"); - if(n == npos) - n = rem.len; - _c4dbgpf("has {} spaces/tabs, skip...", n); - _line_progressed(n); - return true; - } - else if(rem.empty()) - { - // nothing to do - } - else if(rem == "---" || rem.begins_with("--- ")) - { - _c4dbgp("caught ---: starting doc"); - _start_new_doc(rem); - return true; - } - else if(rem.begins_with('%')) - { - _c4dbgp("caught a directive: ignoring..."); - _line_progressed(rem.len); - return true; - } - else - { - _c4err("parse error"); - } - - if(is_quoted || (! saved_scalar.empty())) - { - _store_scalar(saved_scalar, is_quoted); - } - - return true; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL)); - csubstr scalar; - size_t indentation = m_state->line_contents.indentation; // save - bool is_quoted; - if(_scan_scalar_unk(&scalar, &is_quoted)) - { - _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); - rem = m_state->line_contents.rem; - { - size_t first = rem.first_not_of(" \t"); - if(first && first != npos) - { - _c4dbgpf("skip {} whitespace characters", first); - _line_progressed(first); - rem = rem.sub(first); - } - } - _store_scalar(scalar, is_quoted); - if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child); - _push_level(); - _start_map(start_as_child); // wait for the val scalar to append the key-val pair - _set_indentation(indentation); - _line_progressed(2); // call this AFTER saving the indentation - } - else if(rem.begins_with(':')) - { - _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child); - _push_level(); - _start_map(start_as_child); // wait for the val scalar to append the key-val pair - _set_indentation(indentation); - _line_progressed(1); // call this AFTER saving the indentation - } - else - { - // we still don't know whether it's a seq or a map - // so just store the scalar - } - return true; - } - else if(rem.begins_with_any(" \t")) - { - csubstr ws = rem.left_of(rem.first_not_of(" \t")); - rem = rem.right_of(ws); - if(has_all(RTOP) && rem.begins_with("---")) - { - _c4dbgp("there's a doc starting, and it's indented"); - _set_indentation(ws.len); - } - _c4dbgpf("skipping {} spaces/tabs", ws.len); - _line_progressed(ws.len); - return true; - } - } - - return false; -} - - -//----------------------------------------------------------------------------- -C4_ALWAYS_INLINE void Parser::_skipchars(char c) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c)); - size_t pos = m_state->line_contents.rem.first_not_of(c); - if(pos == npos) - pos = m_state->line_contents.rem.len; // maybe the line is just whitespace - _c4dbgpf("skip {} '{}'", pos, c); - _line_progressed(pos); -} - -template -C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N]) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars)); - size_t pos = m_state->line_contents.rem.first_not_of(chars); - if(pos == npos) - pos = m_state->line_contents.rem.len; // maybe the line is just whitespace - _c4dbgpf("skip {} characters", pos); - _line_progressed(pos); -} - - -//----------------------------------------------------------------------------- -bool Parser::_handle_seq_flow() -{ - _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); - - if(rem.begins_with(' ')) - { - // with explicit flow, indentation does not matter - _c4dbgp("starts with spaces"); - _skipchars(' '); - return true; - } - _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) - { - _c4dbgp("starts with tabs"); - _skipchars('\t'); - return true; - }) - else if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); // also progresses the line - return true; - } - else if(rem.begins_with(']')) - { - _c4dbgp("end the sequence"); - _pop_level(); - _line_progressed(1); - if(has_all(RSEQIMAP)) - { - _stop_seqimap(); - _pop_level(); - } - return true; - } - - if(has_any(RVAL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - bool is_quoted; - if(_scan_scalar_seq_flow(&rem, &is_quoted)) - { - _c4dbgp("it's a scalar"); - addrem_flags(RNXT, RVAL); - _append_val(rem, is_quoted); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_map(); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; - } - else if(rem == ':') - { - _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(1); - return true; - } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - return true; - } - else if(rem.begins_with("? ")) - { - _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == ""); - addrem_flags(QMRK|RKEY, RVAL|SSCL); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with(", ")) - { - _c4dbgp("found ',' -- the value was null"); - _append_val_null(rem.str - 1); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("found ',' -- the value was null"); - _append_val_null(rem.str - 1); - _line_progressed(1); - return true; - } - else if(rem.begins_with('\t')) - { - _skipchars('\t'); - return true; - } - else - { - _c4err("parse error"); - } - } - else if(has_any(RNXT)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - if(rem.begins_with(", ")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - _c4dbgp("seq: expect next val"); - addrem_flags(RVAL, RNXT); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - _c4dbgp("seq: expect next val"); - addrem_flags(RVAL, RNXT); - _line_progressed(1); - return true; - } - else if(rem == ':') - { - _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(1); - return true; - } - else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); - _start_seqimap(); - _line_progressed(2); - return true; - } - else - { - _c4err("was expecting a comma"); - } - } - else - { - _c4err("internal error"); - } - C4_UNREACHABLE(); -} - -//----------------------------------------------------------------------------- -bool Parser::_handle_seq_blck() -{ - _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); - - if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); - return true; - } - if(has_any(RNXT)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - - if(_handle_indentation()) - return true; - - if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - _c4dbgp("expect another val"); - addrem_flags(RVAL, RNXT); - _line_progressed(2); - return true; - } - else if(rem == '-') - { - _c4dbgp("expect another val"); - addrem_flags(RVAL, RNXT); - _line_progressed(1); - return true; - } - else if(rem.begins_with_any(" \t")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - _skipchars(" \t"); - return true; - } - else if(rem.begins_with("...")) - { - _c4dbgp("got stream end '...'"); - _end_stream(); - _line_progressed(3); - return true; - } - else if(rem.begins_with("---")) - { - _c4dbgp("got document start '---'"); - _start_new_doc(rem); - return true; - } - else - { - _c4err("parse error"); - } - } - else if(has_any(RVAL)) - { - // there can be empty values - if(_handle_indentation()) - return true; - - csubstr s; - bool is_quoted; - if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line - { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - - rem = m_state->line_contents.rem; - if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' '))) - { - _c4dbgp("skipping whitespace..."); - size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(skip == csubstr::npos) - skip = rem.len; // maybe the line is just whitespace - _line_progressed(skip); - rem = rem.sub(skip); - } - - _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem); - if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) - { - _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); - if(m_key_anchor.empty()) - _move_val_anchor_to_key_anchor(); - if(m_key_tag.empty()) - _move_val_tag_to_key_tag(); - addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _start_map(); - _store_scalar(s, is_quoted); - if( ! _maybe_set_indentation_from_anchor_or_tag()) - { - _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col); - _set_indentation(m_state->scalar_col); // this is the column where the scalar starts - } - _move_key_tag2_to_key_tag(); - addrem_flags(RVAL, RKEY); - _line_progressed(1); - } - else - { - _c4dbgp("appending val to current seq"); - _append_val(s, is_quoted); - addrem_flags(RNXT, RVAL); - } - return true; - } - else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - if(_rval_dash_start_or_continue_seq()) - _line_progressed(2); - return true; - } - else if(rem == '-') - { - if(_rval_dash_start_or_continue_seq()) - _line_progressed(1); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq, flow"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map, flow"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _start_map(); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; - } - else if(rem.begins_with("? ")) - { - _c4dbgp("val is a child map + this key is complex"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(); - _start_map(); - addrem_flags(QMRK|RKEY, RVAL); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem.begins_with(' ')) - { - csubstr spc = rem.left_of(rem.first_not_of(' ')); - if(_at_line_begin()) - { - _c4dbgpf("skipping value indentation: {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - else - { - _c4dbgpf("skipping {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - /* pathological case: - * - &key : val - * - &key : - * - : val - */ - else if((!has_all(SSCL)) && - (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":")) - { - if(!m_val_anchor.empty() || !m_val_tag.empty()) - { - _c4dbgp("val is a child map + this key is empty, with anchors or tags"); - addrem_flags(RNXT, RVAL); // before _push_level! - _move_val_tag_to_key_tag(); - _move_val_anchor_to_key_anchor(); - _push_level(); - _start_map(); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist - _line_progressed(rem.begins_with(": ") ? 2u : 1u); - return true; - } - else - { - _c4dbgp("val is a child map + this key is empty, no anchors or tags"); - addrem_flags(RNXT, RVAL); // before _push_level! - size_t ind = m_state->indref; - _push_level(); - _start_map(); - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY); - _c4dbgpf("set indentation from map anchor: {}", ind + 2); - _set_indentation(ind + 2); // this is the column where the map starts - _line_progressed(rem.begins_with(": ") ? 2u : 1u); - return true; - } - } - else - { - _c4err("parse error"); - } - } - - return false; -} - -//----------------------------------------------------------------------------- - -bool Parser::_rval_dash_start_or_continue_seq() -{ - size_t ind = m_state->line_contents.current_col(); - _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref); - size_t delta_ind = ind - m_state->indref; - if( ! delta_ind) - { - _c4dbgp("prev val was empty"); - addrem_flags(RNXT, RVAL); - _append_val_null(&m_state->line_contents.full[ind]); - return false; - } - _c4dbgp("val is a nested seq, indented"); - addrem_flags(RNXT, RVAL); // before _push_level! - _push_level(); - _start_seq(); - _save_indentation(); - return true; -} - -//----------------------------------------------------------------------------- -bool Parser::_handle_map_flow() -{ - // explicit flow, ie, inside {}, separated by commas - _c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW)); - - if(rem.begins_with(' ')) - { - // with explicit flow, indentation does not matter - _c4dbgp("starts with spaces"); - _skipchars(' '); - return true; - } - _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) - { - // with explicit flow, indentation does not matter - _c4dbgp("starts with tabs"); - _skipchars('\t'); - return true; - }) - else if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); // also progresses the line - return true; - } - else if(rem.begins_with('}')) - { - _c4dbgp("end the map"); - if(has_all(SSCL)) - { - _c4dbgp("the last val was null"); - _append_key_val_null(rem.str - 1); - rem_flags(RVAL); - } - _pop_level(); - _line_progressed(1); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; - } - - if(has_any(RNXT)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP)); - - if(rem.begins_with(", ")) - { - _c4dbgp("seq: expect next keyval"); - addrem_flags(RKEY, RNXT); - _line_progressed(2); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("seq: expect next keyval"); - addrem_flags(RKEY, RNXT); - _line_progressed(1); - return true; - } - else - { - _c4err("parse error"); - } - } - else if(has_any(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - - bool is_quoted; - if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted)) - { - _c4dbgp("it's a scalar"); - _store_scalar(rem, is_quoted); - rem = m_state->line_contents.rem; - csubstr trimmed = rem.triml(" \t"); - if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str); - size_t num = static_cast(trimmed.str - rem.str); - _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num)); - rem = rem.sub(num); - _line_progressed(num); - } - } - - if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(2); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem == ':') - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem.begins_with('?')) - { - _c4dbgp("complex key"); - add_flags(QMRK); - _line_progressed(1); - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("prev scalar was a key with null value"); - _append_key_val_null(rem.str - 1); - _line_progressed(1); - return true; - } - else if(rem.begins_with('}')) - { - _c4dbgp("map terminates after a key..."); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - _c4dbgp("the last val was null"); - _append_key_val_null(rem.str - 1); - rem_flags(RVAL); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - _pop_level(); - _line_progressed(1); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_key_anchors_and_refs()) - { - return true; - } - else if(rem == "") - { - return true; - } - else - { - size_t pos = rem.first_not_of(" \t"); - if(pos == csubstr::npos) - pos = 0; - rem = rem.sub(pos); - if(rem.begins_with(':')) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(pos + 1); - if(!has_all(SSCL)) - { - _c4dbgp("no key was found, defaulting to empty key ''"); - _store_scalar_null(rem.str); - } - return true; - } - else if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - _line_progressed(pos); - rem = _scan_comment(); // also progresses the line - return true; - } - else - { - _c4err("parse error"); - } - } - } - else if(has_any(RVAL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - bool is_quoted; - if(_scan_scalar_map_flow(&rem, &is_quoted)) - { - _c4dbgp("it's a scalar"); - addrem_flags(RNXT, RVAL|RKEY); - _append_key_val(rem, is_quoted); - if(has_all(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq"); - addrem_flags(RNXT, RVAL|RKEY); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map"); - addrem_flags(RNXT, RVAL|RKEY); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_map(); - addrem_flags(FLOW|RKEY, RNXT|RVAL); - _line_progressed(1); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with(',')) - { - _c4dbgp("appending empty val"); - _append_key_val_null(rem.str - 1); - addrem_flags(RKEY, RVAL); - _line_progressed(1); - if(has_any(RSEQIMAP)) - { - _c4dbgp("stopping implicitly nested 1x map"); - _stop_seqimap(); - _pop_level(); - } - return true; - } - else if(has_any(RSEQIMAP) && rem.begins_with(']')) - { - _c4dbgp("stopping implicitly nested 1x map"); - if(has_any(SSCL)) - { - _append_key_val_null(rem.str - 1); - } - _stop_seqimap(); - _pop_level(); - return true; - } - else - { - _c4err("parse error"); - } - } - else - { - _c4err("internal error"); - } - C4_UNREACHABLE(); -} - -//----------------------------------------------------------------------------- -bool Parser::_handle_map_blck() -{ - _c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level); - csubstr rem = m_state->line_contents.rem; - - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); - - if(rem.begins_with('#')) - { - _c4dbgp("it's a comment"); - rem = _scan_comment(); - return true; - } - - if(has_any(RNXT)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - // actually, we don't need RNXT in indent-based maps. - addrem_flags(RKEY, RNXT); - } - - if(_handle_indentation()) - { - _c4dbgp("indentation token"); - return true; - } - - if(has_any(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - - _c4dbgp("RMAP|RKEY read scalar?"); - bool is_quoted; - if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line - { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - if(has_all(QMRK|SSCL)) - { - _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar); - _append_key_val_null(rem.str - 1); - } - _store_scalar(rem, is_quoted); - if(has_all(QMRK|RSET)) - { - _c4dbgp("it's a complex key, so use null value '~'"); - _append_key_val_null(rem.str); - } - rem = m_state->line_contents.rem; - - if(rem.begins_with(':')) - { - _c4dbgp("wait for val"); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - rem = m_state->line_contents.rem; - if(rem.begins_with_any(" \t")) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - rem = rem.left_of(rem.first_not_of(" \t")); - _c4dbgpf("skip {} spaces/tabs", rem.len); - _line_progressed(rem.len); - } - } - return true; - } - else if(rem.begins_with_any(" \t")) - { - size_t pos = rem.first_not_of(" \t"); - if(pos == npos) - pos = rem.len; - _c4dbgpf("skip {} spaces/tabs", pos); - _line_progressed(pos); - return true; - } - else if(rem == '?' || rem.begins_with("? ")) - { - _c4dbgp("it's a complex key"); - _line_progressed(rem.begins_with("? ") ? 2u : 1u); - if(has_any(SSCL)) - _append_key_val_null(rem.str - 1); - add_flags(QMRK); - return true; - } - else if(has_all(QMRK) && rem.begins_with(':')) - { - _c4dbgp("complex key finished"); - if(!has_any(SSCL)) - _store_scalar_null(rem.str); - addrem_flags(RVAL, RKEY|QMRK); - _line_progressed(1); - rem = m_state->line_contents.rem; - if(rem.begins_with(' ')) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); - _skipchars(' '); - } - return true; - } - else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) - { - _c4dbgp("key finished"); - if(!has_all(SSCL)) - { - _c4dbgp("key was empty..."); - _store_scalar_null(rem.str); - rem_flags(QMRK); - } - addrem_flags(RVAL, RKEY); - _line_progressed(rem == ':' ? 1 : 2); - return true; - } - else if(rem.begins_with("...")) - { - _c4dbgp("end current document"); - _end_stream(); - _line_progressed(3); - return true; - } - else if(rem.begins_with("---")) - { - _c4dbgp("start new document '---'"); - _start_new_doc(rem); - return true; - } - else if(_handle_types()) - { - return true; - } - else if(_handle_key_anchors_and_refs()) - { - return true; - } - else - { - _c4err("parse error"); - } - } - else if(has_any(RVAL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); - - _c4dbgp("RMAP|RVAL read scalar?"); - csubstr s; - bool is_quoted; - if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line - { - _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); - - rem = m_state->line_contents.rem; - - if(rem.begins_with(": ")) - { - _c4dbgp("actually, the scalar is the first key of a map"); - addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _move_scalar_from_top(); - _move_val_anchor_to_key_anchor(); - _start_map(); - _save_indentation(m_state->scalar_col); - addrem_flags(RVAL, RKEY); - _line_progressed(2); - } - else if(rem.begins_with(':')) - { - _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); - addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT - _push_level(); - _move_scalar_from_top(); - _move_val_anchor_to_key_anchor(); - _start_map(); - _save_indentation(/*behind*/s.len); - addrem_flags(RVAL, RKEY); - _line_progressed(1); - } - else - { - _c4dbgp("appending keyval to current map"); - _append_key_val(s, is_quoted); - addrem_flags(RKEY, RVAL); - } - return true; - } - else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) - { - _c4dbgp("val is a nested seq, indented"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(); - _move_scalar_from_top(); - _start_seq(); - _save_indentation(); - _line_progressed(2); - return true; - } - else if(rem == '-') - { - _c4dbgp("maybe a seq. start unknown, indented"); - _start_unk(); - _save_indentation(); - _line_progressed(1); - return true; - } - else if(rem.begins_with('[')) - { - _c4dbgp("val is a child seq, flow"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_seq(); - add_flags(FLOW); - _line_progressed(1); - return true; - } - else if(rem.begins_with('{')) - { - _c4dbgp("val is a child map, flow"); - addrem_flags(RKEY, RVAL); // before _push_level! - _push_level(/*explicit flow*/true); - _move_scalar_from_top(); - _start_map(); - addrem_flags(FLOW|RKEY, RVAL); - _line_progressed(1); - return true; - } - else if(rem.begins_with(' ')) - { - csubstr spc = rem.left_of(rem.first_not_of(' ')); - if(_at_line_begin()) - { - _c4dbgpf("skipping value indentation: {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - else - { - _c4dbgpf("skipping {} spaces", spc.len); - _line_progressed(spc.len); - return true; - } - } - else if(_handle_types()) - { - return true; - } - else if(_handle_val_anchors_and_refs()) - { - return true; - } - else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t")) - { - _start_new_doc(rem); - return true; - } - else if(rem.begins_with("...")) - { - _c4dbgp("end current document"); - _end_stream(); - _line_progressed(3); - return true; - } - else - { - _c4err("parse error"); - } - } - else - { - _c4err("internal error"); - } - C4_UNREACHABLE(); -} - - -//----------------------------------------------------------------------------- -bool Parser::_handle_top() -{ - _c4dbgp("handle_top"); - csubstr rem = m_state->line_contents.rem; - - if(rem.begins_with('#')) - { - _c4dbgp("a comment line"); - _scan_comment(); - return true; - } - - csubstr trimmed = rem.triml(' '); - - if(trimmed.begins_with('%')) - { - _handle_directive(trimmed); - _line_progressed(rem.len); - return true; - } - else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t")) - { - _start_new_doc(rem); - if(trimmed.len < rem.len) - { - _line_progressed(rem.len - trimmed.len); - _save_indentation(); - } - return true; - } - else if(trimmed.begins_with("...")) - { - _c4dbgp("end current document"); - _end_stream(); - if(trimmed.len < rem.len) - { - _line_progressed(rem.len - trimmed.len); - } - _line_progressed(3); - return true; - } - else - { - _c4err("parse error"); - } - C4_UNREACHABLE(); -} - - -//----------------------------------------------------------------------------- - -bool Parser::_handle_key_anchors_and_refs() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL)); - const csubstr rem = m_state->line_contents.rem; - if(rem.begins_with('&')) - { - _c4dbgp("found a key anchor!!!"); - if(has_all(QMRK|SSCL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); - _c4dbgp("there is a stored key, so this anchor is for the next element"); - _append_key_val_null(rem.str - 1); - rem_flags(QMRK); - return true; - } - csubstr anchor = rem.left_of(rem.first_of(' ')); - _line_progressed(anchor.len); - anchor = anchor.sub(1); // skip the first character - _move_key_anchor_to_val_anchor(); - _c4dbgpf("key anchor value: '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - return true; - } - else if(C4_UNLIKELY(rem.begins_with('*'))) - { - _c4err("not implemented - this should have been catched elsewhere"); - C4_UNREACHABLE(); - } - return false; -} - -bool Parser::_handle_val_anchors_and_refs() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY)); - const csubstr rem = m_state->line_contents.rem; - if(rem.begins_with('&')) - { - csubstr anchor = rem.left_of(rem.first_of(' ')); - _line_progressed(anchor.len); - anchor = anchor.sub(1); // skip the first character - _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem)); - if(m_val_anchor.empty()) - { - _c4dbgpf("save val anchor: '{}'", anchor); - m_val_anchor = anchor; - m_val_anchor_indentation = m_state->line_contents.current_col(rem); - } - else - { - _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor); - if(m_tree->is_seq(m_state->node_id)) - { - if(m_tree->has_children(m_state->node_id)) - { - _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id)); - _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - } - else - { - _c4dbgpf("current node={} is a seq, has no children", m_state->node_id); - if(m_tree->has_val_anchor(m_state->node_id)) - { - _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id)); - _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); - m_key_anchor = anchor; - m_key_anchor_indentation = m_state->line_contents.current_col(rem); - } - else - { - _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id); - m_tree->set_val_anchor(m_state->node_id, m_val_anchor); - m_val_anchor = anchor; - m_val_anchor_indentation = m_state->line_contents.current_col(rem); - } - } - } - } - return true; - } - else if(C4_UNLIKELY(rem.begins_with('*'))) - { - _c4err("not implemented - this should have been catched elsewhere"); - C4_UNREACHABLE(); - } - return false; -} - -void Parser::_move_key_anchor_to_val_anchor() -{ - if(m_key_anchor.empty()) - return; - _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor); - if(!m_val_anchor.empty()) - _c4err("triple-pending anchor"); - m_val_anchor = m_key_anchor; - m_val_anchor_indentation = m_key_anchor_indentation; - m_key_anchor = {}; - m_key_anchor_indentation = {}; -} - -void Parser::_move_val_anchor_to_key_anchor() -{ - if(m_val_anchor.empty()) - return; - if(!_token_is_from_this_line(m_val_anchor)) - return; - _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor); - if(!m_key_anchor.empty()) - _c4err("triple-pending anchor"); - m_key_anchor = m_val_anchor; - m_key_anchor_indentation = m_val_anchor_indentation; - m_val_anchor = {}; - m_val_anchor_indentation = {}; -} - -void Parser::_move_key_tag_to_val_tag() -{ - if(m_key_tag.empty()) - return; - _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag); - m_val_tag = m_key_tag; - m_val_tag_indentation = m_key_tag_indentation; - m_key_tag.clear(); - m_key_tag_indentation = 0; -} - -void Parser::_move_val_tag_to_key_tag() -{ - if(m_val_tag.empty()) - return; - if(!_token_is_from_this_line(m_val_tag)) - return; - _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag); - m_key_tag = m_val_tag; - m_key_tag_indentation = m_val_tag_indentation; - m_val_tag.clear(); - m_val_tag_indentation = 0; -} - -void Parser::_move_key_tag2_to_key_tag() -{ - if(m_key_tag2.empty()) - return; - _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2); - m_key_tag = m_key_tag2; - m_key_tag_indentation = m_key_tag2_indentation; - m_key_tag2.clear(); - m_key_tag2_indentation = 0; -} - - -//----------------------------------------------------------------------------- - -bool Parser::_handle_types() -{ - csubstr rem = m_state->line_contents.rem.triml(' '); - csubstr t; - - if(rem.begins_with("!!")) - { - _c4dbgp("begins with '!!'"); - t = rem.left_of(rem.first_of(" ,")); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); - //t = t.sub(2); - if(t == "!!set") - add_flags(RSET); - } - else if(rem.begins_with("!<")) - { - _c4dbgp("begins with '!<'"); - t = rem.left_of(rem.first_of('>'), true); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); - //t = t.sub(2, t.len-1); - } - else if(rem.begins_with("!h!")) - { - _c4dbgp("begins with '!h!'"); - t = rem.left_of(rem.first_of(' ')); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3); - //t = t.sub(3); - } - else if(rem.begins_with('!')) - { - _c4dbgp("begins with '!'"); - t = rem.left_of(rem.first_of(' ')); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); - //t = t.sub(1); - } - - if(t.empty()) - return false; - - if(has_all(QMRK|SSCL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); - _c4dbgp("there is a stored key, so this tag is for the next element"); - _append_key_val_null(rem.str - 1); - rem_flags(QMRK); - } - - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - const char *tag_beginning = rem.str; - #endif - size_t tag_indentation = m_state->line_contents.current_col(t); - _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin()); - _line_progressed(static_cast(t.end() - m_state->line_contents.rem.begin())); - { - size_t pos = m_state->line_contents.rem.first_not_of(" \t"); - if(pos != csubstr::npos) - _line_progressed(pos); - } - - if(has_all(RMAP|RKEY)) - { - _c4dbgpf("saving map key tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty()); - m_key_tag = t; - m_key_tag_indentation = tag_indentation; - } - else if(has_all(RMAP|RVAL)) - { - /* foo: !!str - * !!str : bar */ - rem = m_state->line_contents.rem; - rem = rem.left_of(rem.find("#")); - rem = rem.trimr(" \t"); - _c4dbgpf("rem='{}'", rem); - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(rem == ':' || rem.begins_with(": ")) - { - _c4dbgp("the last val was null, and this is a tag from a null key"); - _append_key_val_null(tag_beginning - 1); - _store_scalar_null(rem.str - 1); - // do not change the flag to key, it is ~ - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin()); - size_t token_len = rem == ':' ? 1 : 2; - _line_progressed(static_cast(token_len + rem.begin() - m_state->line_contents.rem.begin())); - } - #endif - _c4dbgpf("saving map val tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; - } - else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC)) - { - if(m_val_tag.empty()) - { - _c4dbgpf("saving seq/doc val tag '{}'", t); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; - } - else - { - _c4dbgpf("saving seq/doc key tag '{}'", t); - m_key_tag = t; - m_key_tag_indentation = tag_indentation; - } - } - else if(has_all(RTOP|RUNK) || has_any(RUNK)) - { - rem = m_state->line_contents.rem; - rem = rem.left_of(rem.find("#")); - rem = rem.trimr(" \t"); - if(rem.empty()) - { - _c4dbgpf("saving val tag '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); - m_val_tag = t; - m_val_tag_indentation = tag_indentation; - } - else - { - _c4dbgpf("saving key tag '{}'", t); - if(m_key_tag.empty()) - { - m_key_tag = t; - m_key_tag_indentation = tag_indentation; - } - else - { - /* handle this case: - * !!str foo: !!map - * !!int 1: !!float 20.0 - * !!int 3: !!float 40.0 - * - * (m_key_tag would be !!str and m_key_tag2 would be !!int) - */ - m_key_tag2 = t; - m_key_tag2_indentation = tag_indentation; - } - } - } - else - { - _c4err("internal error"); - } - - if(m_val_tag.not_empty()) - { - YamlTag_e tag = to_tag(t); - if(tag == TAG_STR) - { - _c4dbgpf("tag '{}' is a str-type tag", t); - if(has_all(RTOP|RUNK|NDOC)) - { - _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset); - csubstr scalar = _slurp_doc_scalar(); - _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar); - m_tree->to_val(m_state->node_id, scalar, DOC); - _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - if(!m_val_anchor.empty()) - { - _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor); - m_tree->set_val_anchor(m_state->node_id, m_val_anchor); - m_val_anchor.clear(); - } - _end_stream(); - } - } - } - return true; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_slurp_doc_scalar() -{ - csubstr s = m_state->line_contents.rem; - size_t pos = m_state->pos.offset; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos); - _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - if(s.len == 0) - { - _line_ended(); - _scan_line(); - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - } - - size_t skipws = s.first_not_of(" \t"); - _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - if(skipws != npos) - { - _line_progressed(skipws); - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty()); - _handle_val_anchors_and_refs(); - if(!m_val_anchor.empty()) - { - s = m_state->line_contents.rem; - skipws = s.first_not_of(" \t"); - if(skipws != npos) - { - _line_progressed(skipws); - } - s = m_state->line_contents.rem; - pos = m_state->pos.offset; - _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - } - - if(s.begins_with('\'')) - { - m_state->scalar_col = m_state->line_contents.current_col(s); - return _scan_squot_scalar(); - } - else if(s.begins_with('"')) - { - m_state->scalar_col = m_state->line_contents.current_col(s); - return _scan_dquot_scalar(); - } - else if(s.begins_with('|') || s.begins_with('>')) - { - return _scan_block(); - } - - _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); - _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); - - _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); - - if(_at_line_end()) - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was '{}'", s); - - return s; -} - - -//----------------------------------------------------------------------------- - -bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); - - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) - { - return false; - } - - _c4dbgp("RSEQ|RVAL"); - if( ! _is_scalar_next__rseq_rval(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - - if(s.ends_with(':')) - { - --s.len; - } - else - { - auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); - if(first) - s.len = first.pos; - } - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; -} - -bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _c4dbgp("_scan_scalar_map_blck"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); - - csubstr s = m_state->line_contents.rem; - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED - if(s.len == 0) - return false; - #endif - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) - { - return false; - } - - if( ! _is_scalar_next__rmap(s)) - return false; - - size_t colon_token = s.find(": "); - if(colon_token == npos) - { - _RYML_WITH_OR_WITHOUT_TAB_TOKENS( - // with tab tokens - colon_token = s.find(":\t"); - if(colon_token == npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - colon_token = s.find(':'); - if(colon_token != s.len-1) - colon_token = npos; - } - , - // without tab tokens - colon_token = s.find(':'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - if(colon_token != s.len-1) - colon_token = npos; - ) - } - - if(has_all(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); - if(has_any(QMRK)) - { - _c4dbgp("RMAP|RKEY|CPLX"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_token); - s = s.left_of(s.first_of("#")); - s = s.trimr(" \t"); - if(s.begins_with("---")) - return false; - else if(s.begins_with("...")) - return false; - } - else - { - _c4dbgp("RMAP|RKEY"); - _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_token); - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(s.begins_with("---")) - { - return false; - } - else if(s.begins_with("...")) - { - return false; - } - } - } - else if(has_all(RVAL)) - { - _c4dbgp("RMAP|RVAL"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); - if( ! _is_scalar_next__rmap_val(s)) - return false; - _RYML_WITH_TAB_TOKENS( - else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RMAP|RVAL: scalar"); - s = s.left_of(s.find(" #")); // is there a comment? - s = s.left_of(s.find("\t#")); // is there a comment? - s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(s.begins_with("---")) - return false; - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED - else if(s.begins_with("...")) - return false; - #endif - } - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; -} - -bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); - - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - - if(has_all(RVAL)) - { - _c4dbgp("RSEQ|RVAL"); - if( ! _is_scalar_next__rseq_rval(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RSEQ|RVAL|FLOW"); - s = s.left_of(s.first_of(",]")); - if(s.ends_with(':')) - { - --s.len; - } - else - { - auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); - if(first) - s.len = first.pos; - } - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - } - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; -} - -bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); - - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - - if( ! _is_scalar_next__rmap(s)) - return false; - - if(has_all(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); - size_t colon_token = s.find(": "); - if(colon_token == npos) - { - _RYML_WITH_OR_WITHOUT_TAB_TOKENS( - // with tab tokens - colon_token = s.find(":\t"); - if(colon_token == npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - colon_token = s.find(':'); - if(colon_token != s.len-1) - colon_token = npos; - } - , - // without tab tokens - colon_token = s.find(':'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - if(colon_token != s.len-1) - colon_token = npos; - ) - } - if(s.begins_with("? ") || s == '?') - return false; - if(has_any(QMRK)) - { - _c4dbgp("RMAP|RKEY|CPLX"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - s = s.left_of(colon_token); - s = s.left_of(s.first_of("#")); - s = s.left_of(s.first_of(':')); - s = s.trimr(" \t"); - if(s.begins_with("---")) - return false; - else if(s.begins_with("...")) - return false; - } - else - { - _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); - _c4dbgp("RMAP|RKEY"); - s = s.left_of(colon_token); - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s); - s = s.left_of(s.first_of(",}")); - if(s.ends_with(':')) - --s.len; - } - } - else if(has_all(RVAL)) - { - _c4dbgp("RMAP|RVAL"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); - if( ! _is_scalar_next__rmap_val(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - _c4dbgp("RMAP|RVAL|FLOW"); - if(has_none(RSEQIMAP)) - s = s.left_of(s.first_of(",}")); - else - s = s.left_of(s.first_of(",]")); - s = s.left_of(s.find(" #")); // is there a comment? - s = s.left_of(s.find("\t#")); // is there a comment? - s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - } - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr='{}'", s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was '{}'", s); - - *scalar = s; - *quoted = false; - return true; -} - -bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK)); - - csubstr s = m_state->line_contents.rem; - if(s.len == 0) - return false; - s = s.trim(" \t"); - if(s.len == 0) - return false; - - if(s.begins_with('\'')) - { - _c4dbgp("got a ': scanning single-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_squot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('"')) - { - _c4dbgp("got a \": scanning double-quoted scalar"); - m_state->scalar_col = m_state->line_contents.current_col(s); - *scalar = _scan_dquot_scalar(); - *quoted = true; - return true; - } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) - { - return false; - } - - _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); - if( ! _is_scalar_next__runk(s)) - { - _c4dbgp("RUNK: no scalar next"); - return false; - } - size_t pos = s.find(" #"); - if(pos != npos) - { - _c4dbgpf("RUNK: found ' #' at {}", pos); - s = s.left_of(pos); - } - pos = s.find(": "); - if(pos != npos) - { - _c4dbgpf("RUNK: found ': ' at {}", pos); - s = s.left_of(pos); - } - else if(s.ends_with(':')) - { - _c4dbgp("RUNK: ends with ':'"); - s = s.left_of(s.len-1); - } - _RYML_WITH_TAB_TOKENS( - else if((pos = s.find(":\t")) != npos) // TABS - { - _c4dbgp("RUNK: ends with ':\\t'"); - s = s.left_of(pos); - }) - else - { - _c4dbgp("RUNK: trimming left of ,"); - s = s.left_of(s.first_of(',')); - } - s = s.trim(" \t"); - _c4dbgpf("RUNK: scalar=[{}]~~~{}~~~", s.len, s); - - if(s.empty()) - return false; - - m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); - _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - - if(_at_line_end() && s != '~') - { - _c4dbgpf("at line end. curr=[{}]~~~{}~~", s.len, s); - s = _extend_scanned_scalar(s); - } - - _c4dbgpf("scalar was [{}]~~~{}~~~", s.len, s); - - *scalar = s; - *quoted = false; - return true; -} - - -//----------------------------------------------------------------------------- - -csubstr Parser::_extend_scanned_scalar(csubstr s) -{ - if(has_all(RMAP|RKEY|QMRK)) - { - size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col; - _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col); - csubstr n = _scan_to_next_nonempty_line(scalar_indentation); - if(!n.empty()) - { - substr full = _scan_complex_key(s, n).trimr(" \t\r\n"); - if(full != s) - s = _filter_plain_scalar(full, scalar_indentation); - } - } - // deal with plain (unquoted) scalars that continue to the next line - else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference) - { - _c4dbgpf("extend_scalar: line ended, scalar='{}'", s); - if(has_none(FLOW)) - { - size_t scalar_indentation = m_state->indref + 1; - if(has_all(RUNK) && scalar_indentation == 1) - scalar_indentation = 0; - csubstr n = _scan_to_next_nonempty_line(scalar_indentation); - if(!n.empty()) - { - _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n)); - substr full = _scan_plain_scalar_blck(s, n, scalar_indentation); - if(full.len >= s.len) - s = _filter_plain_scalar(full, scalar_indentation); - } - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); - csubstr n = _scan_to_next_nonempty_line(/*indentation*/0); - if(!n.empty()) - { - _c4dbgp("rscalar[FLOW]"); - substr full = _scan_plain_scalar_flow(s, n); - s = _filter_plain_scalar(full, /*indentation*/0); - } - } - } - - return s; -} - - -//----------------------------------------------------------------------------- - -substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) -{ - static constexpr const csubstr chars = "[]{}?#,"; - size_t pos = peeked_line.first_of(chars); - bool first = true; - while(pos != 0) - { - if(has_all(RMAP|RKEY) || has_any(RUNK)) - { - csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); - if(tpkl.begins_with(": ") || tpkl == ':') - { - _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line); - peeked_line = peeked_line.first(0); - break; - } - else - { - auto colon_pos = peeked_line.first_of_any(": ", ":"); - if(colon_pos && colon_pos.pos < pos) - { - peeked_line = peeked_line.first(colon_pos.pos); - _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); - _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); - break; - } - } - } - if(pos != npos) - { - _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); - peeked_line = peeked_line.left_of(pos); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); - _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); - break; - } - _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n")); - if(!first) - { - RYML_CHECK(_advance_to_peeked()); - } - peeked_line = _scan_to_next_nonempty_line(/*indentation*/0); - if(peeked_line.empty()) - { - _c4err("expected token or continuation"); - } - pos = peeked_line.first_of(chars); - first = false; - } - substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset); - full = full.trimr("\n\r "); - return full; -} - - -//----------------------------------------------------------------------------- - -substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); - // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice - // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar - _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); - size_t offs = static_cast(currscalar.end() - m_buf.begin()); - _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation)); - while(true) - { - _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation); - if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) - { - _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); - break; - } - else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented? - { - if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank? - { - _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n")); - break; - } - _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n")); - _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation); - csubstr next_peeked = _scan_to_next_nonempty_line(indentation); - if(next_peeked.empty()) - { - _c4dbgp("rscalar[IMPL]: ... finished."); - break; - } - _c4dbgp("rscalar[IMPL]: ... continuing."); - peeked_line = next_peeked; - } - - _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n")); - size_t token_pos; - if(peeked_line.find(": ") != npos) - { - _line_progressed(peeked_line.find(": ")); - _c4err("': ' is not a valid token in plain flow (unquoted) scalars"); - } - else if(peeked_line.ends_with(':')) - { - _line_progressed(peeked_line.find(':')); - _c4err("lines cannot end with ':' in plain flow (unquoted) scalars"); - } - else if((token_pos = peeked_line.find(" #")) != npos) - { - _line_progressed(token_pos); - break; - //_c4err("' #' is not a valid token in plain flow (unquoted) scalars"); - } - - _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); - if(!_advance_to_peeked()) - { - _c4dbgp("rscalar[IMPL]: file finishes after the scalar"); - break; - } - peeked_line = m_state->line_contents.rem; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); - substr full(m_buf.str + (currscalar.str - m_buf.str), - currscalar.len + (m_state->pos.offset - offs)); - full = full.trimr("\r\n "); - return full; -} - -substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); - // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice - // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar - _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); - size_t offs = static_cast(currscalar.end() - m_buf.begin()); - while(true) - { - _c4dbgp("rcplxkey: continuing..."); - if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) - { - _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); - break; - } - else - { - size_t pos = peeked_line.first_of("?:[]{}"); - if(pos == csubstr::npos) - { - pos = peeked_line.find("- "); - } - if(pos != csubstr::npos) - { - _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n")); - _line_progressed(pos); - break; - } - } - - _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n")); - csubstr next_peeked = _scan_to_next_nonempty_line(0); - if(next_peeked.empty()) - { - _c4dbgp("rcplxkey: empty ... finished."); - break; - } - _c4dbgp("rcplxkey: ... continuing."); - peeked_line = next_peeked; - - _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n")); - size_t colpos; - if((colpos = peeked_line.find(": ")) != npos) - { - _c4dbgp("rcplxkey: found ': ', stopping."); - _line_progressed(colpos); - break; - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if((colpos = peeked_line.ends_with(':'))) - { - _c4dbgp("rcplxkey: ends with ':', stopping."); - _line_progressed(colpos); - break; - } - #endif - _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); - if(!_advance_to_peeked()) - { - _c4dbgp("rcplxkey: file finishes after the scalar"); - break; - } - peeked_line = m_state->line_contents.rem; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); - substr full(m_buf.str + (currscalar.str - m_buf.str), - currscalar.len + (m_state->pos.offset - offs)); - return full; -} - -//! scans to the next non-blank line starting with the given indentation -csubstr Parser::_scan_to_next_nonempty_line(size_t indentation) -{ - csubstr next_peeked; - while(true) - { - _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation); - next_peeked = _peek_next_line(m_state->pos.offset); - csubstr next_peeked_triml = next_peeked.triml(' '); - _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n")); - if(next_peeked_triml.begins_with('#')) - { - _c4dbgp("rscalar: ... first non-space character is #"); - return {}; - } - else if(next_peeked.begins_with(' ', indentation)) - { - _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation); - _advance_to_peeked(); - return next_peeked; - } - else // check for de-indentation - { - csubstr trimmed = next_peeked_triml.trimr("\t\r\n"); - _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed); - if(!trimmed.empty()) - { - _c4dbgp("rscalar: ... and not empty. bailing out."); - return {}; - } - } - if(!_advance_to_peeked()) - { - _c4dbgp("rscalar: file finished"); - return {}; - } - } - return {}; -} - -// returns false when the file finished -bool Parser::_advance_to_peeked() -{ - _line_progressed(m_state->line_contents.rem.len); - _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos); - _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len); - _scan_line(); // puts the peeked-at line in the buffer - if(_finished_file()) - { - _c4dbgp("rscalar: finished file!"); - return false; - } - return true; -} - -//----------------------------------------------------------------------------- - -C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) -{ - return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); -} - -//! look for the next newline chars, and jump to the right of those -csubstr from_next_line(csubstr rem) -{ - size_t nlpos = rem.first_of("\r\n"); - if(nlpos == csubstr::npos) - return {}; - const char nl = rem[nlpos]; - rem = rem.right_of(nlpos); - if(rem.empty()) - return {}; - if(_extend_from_combined_newline(nl, rem.front())) - rem = rem.sub(1); - return rem; -} - -csubstr Parser::_peek_next_line(size_t pos) const -{ - csubstr rem{}; // declare here because of the goto - size_t nlpos{}; // declare here because of the goto - pos = pos == npos ? m_state->pos.offset : pos; - if(pos >= m_buf.len) - goto next_is_empty; - - // look for the next newline chars, and jump to the right of those - rem = from_next_line(m_buf.sub(pos)); - if(rem.empty()) - goto next_is_empty; - - // now get everything up to and including the following newline chars - nlpos = rem.first_of("\r\n"); - if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) - nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); - rem = rem.left_of(nlpos, /*include_pos*/true); - - _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); - return rem; - -next_is_empty: - _c4dbgpf("peek next line @ {}: (len=0)''", pos); - return {}; -} - - -//----------------------------------------------------------------------------- -void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset) -{ - RYML_ASSERT(offset <= buf.len); - char const* C4_RESTRICT b = &buf[offset]; - char const* C4_RESTRICT e = b; - // get the current line stripped of newline chars - while(e < buf.end() && (*e != '\n' && *e != '\r')) - ++e; - RYML_ASSERT(e >= b); - const csubstr stripped_ = buf.sub(offset, static_cast(e - b)); - // advance pos to include the first line ending - if(e != buf.end() && *e == '\r') - ++e; - if(e != buf.end() && *e == '\n') - ++e; - RYML_ASSERT(e >= b); - const csubstr full_ = buf.sub(offset, static_cast(e - b)); - reset(full_, stripped_); -} - -void Parser::_scan_line() -{ - if(m_state->pos.offset >= m_buf.len) - { - m_state->line_contents.reset(m_buf.last(0), m_buf.last(0)); - return; - } - m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset); -} - - -//----------------------------------------------------------------------------- -void Parser::_line_progressed(size_t ahead) -{ - _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead); - m_state->pos.offset += ahead; - m_state->pos.col += ahead; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1); - m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead); -} - -void Parser::_line_ended() -{ - _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1); - m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len; - ++m_state->pos.line; - m_state->pos.col = 1; -} - -void Parser::_line_ended_undo() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); - size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len; - _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta); - m_state->pos.offset -= delta; - --m_state->pos.line; - m_state->pos.col = m_state->line_contents.stripped.len + 1u; - // don't forget to undo also the changes to the remainder of the line - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r'); - m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0); -} - - -//----------------------------------------------------------------------------- -void Parser::_set_indentation(size_t indentation) -{ - m_state->indref = indentation; - _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); -} - -void Parser::_save_indentation(size_t behind) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin()); - m_state->indref = static_cast(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin()); - _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref); - m_state->indref -= behind; - _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); -} - -bool Parser::_maybe_set_indentation_from_anchor_or_tag() -{ - if(m_key_anchor.not_empty()) - { - _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation); - _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts - return true; - } - else if(m_key_tag.not_empty()) - { - _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation); - _set_indentation(m_key_tag_indentation); // this is the column where the tag starts - return true; - } - return false; -} - - -//----------------------------------------------------------------------------- -void Parser::_write_key_anchor(size_t node_id) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id)); - if( ! m_key_anchor.empty()) - { - _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor); - m_tree->set_key_anchor(node_id, m_key_anchor); - m_key_anchor.clear(); - m_key_anchor_was_before = false; - m_key_anchor_indentation = 0; - } - else if( ! m_tree->is_key_quoted(node_id)) - { - csubstr r = m_tree->key(node_id); - if(r.begins_with('*')) - { - _c4dbgpf("node={}: set key reference: '{}'", node_id, r); - m_tree->set_key_ref(node_id, r.sub(1)); - } - else if(r == "<<") - { - m_tree->set_key_ref(node_id, r); - _c4dbgpf("node={}: it's an inheriting reference", node_id); - if(m_tree->is_seq(node_id)) - { - _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id)); - for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i)) - { - if( ! (m_tree->val(i).begins_with('*'))) - _c4err("malformed reference: '{}'", m_tree->val(i)); - } - } - else if( ! m_tree->val(node_id).begins_with('*')) - { - _c4err("malformed reference: '{}'", m_tree->val(node_id)); - } - //m_tree->set_key_ref(node_id, r); - } - } -} - -//----------------------------------------------------------------------------- -void Parser::_write_val_anchor(size_t node_id) -{ - if( ! m_val_anchor.empty()) - { - _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor); - m_tree->set_val_anchor(node_id, m_val_anchor); - m_val_anchor.clear(); - } - csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : ""; - if(!m_tree->is_val_quoted(node_id) && r.begins_with('*')) - { - _c4dbgpf("node={}: set val reference: '{}'", node_id, r); - RYML_CHECK(!m_tree->has_val_anchor(node_id)); - m_tree->set_val_ref(node_id, r.sub(1)); - } -} - -//----------------------------------------------------------------------------- -void Parser::_push_level(bool explicit_flow_chars) -{ - _c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); - if(node(m_state) == nullptr) - { - _c4dbgp("pushing level! actually no, current node is null"); - //_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars); - return; - } - flag_t st = RUNK; - if(explicit_flow_chars || has_all(FLOW)) - { - st |= FLOW; - } - m_stack.push_top(); - m_state = &m_stack.top(); - set_flags(st); - m_state->node_id = (size_t)NONE; - m_state->indref = (size_t)NONE; - ++m_state->level; - _c4dbgpf("pushing level: now, currlevel={}", m_state->level); -} - -void Parser::_pop_level() -{ - _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level); - if(has_any(RMAP) || m_tree->is_map(m_state->node_id)) - { - _stop_map(); - } - if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id)) - { - _stop_seq(); - } - if(m_tree->is_doc(m_state->node_id)) - { - _stop_doc(); - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); - _prepare_pop(); - m_stack.pop(); - m_state = &m_stack.top(); - /*if(has_any(RMAP)) - { - _toggle_key_val(); - }*/ - if(m_state->line_contents.indentation == 0) - { - //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP)); - add_flags(RTOP); - } - _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level); -} - -//----------------------------------------------------------------------------- -void Parser::_start_unk(bool /*as_child*/) -{ - _c4dbgp("start_unk"); - _push_level(); - _move_scalar_from_top(); -} - -//----------------------------------------------------------------------------- -void Parser::_start_doc(bool as_child) -{ - _c4dbgpf("start_doc (as child={})", as_child); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id)); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - _c4dbgpf("start_doc: parent={}", parent_id); - if( ! m_tree->is_stream(parent_id)) - { - _c4dbgp("start_doc: rearranging with root as STREAM"); - m_tree->set_root_as_stream(); - } - m_state->node_id = m_tree->append_child(parent_id); - m_tree->to_doc(m_state->node_id); - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id)); - m_state->node_id = parent_id; - if( ! m_tree->is_doc(parent_id)) - { - m_tree->to_doc(parent_id, DOC); - } - } - #endif - _c4dbgpf("start_doc: id={}", m_state->node_id); - add_flags(RUNK|RTOP|NDOC); - _handle_types(); - rem_flags(NDOC); -} - -void Parser::_stop_doc() -{ - size_t doc_node = m_state->node_id; - _c4dbgpf("stop_doc[{}]", doc_node); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node)); - if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); - _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node); - m_tree->to_val(doc_node, {}, DOC); - } -} - -void Parser::_end_stream() -{ - _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty()); - NodeData *added = nullptr; - if(has_any(SSCL)) - { - if(m_tree->is_seq(m_state->node_id)) - { - _c4dbgp("append val..."); - added = _append_val(_consume_scalar()); - } - else if(m_tree->is_map(m_state->node_id)) - { - _c4dbgp("append null key val..."); - added = _append_key_val_null(m_state->line_contents.rem.str); - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(has_any(RSEQIMAP)) - { - _stop_seqimap(); - _pop_level(); - } - #endif - } - else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE) - { - NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar - csubstr scalar = _consume_scalar(); - _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : ""); - m_tree->to_val(m_state->node_id, scalar, DOC|quoted); - added = m_tree->get(m_state->node_id); - } - else - { - _c4err("internal error"); - } - } - else if(has_all(RSEQ|RVAL) && has_none(FLOW)) - { - _c4dbgp("add last..."); - added = _append_val_null(m_state->line_contents.rem.str); - } - else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)) - { - csubstr scalar = m_state->line_contents.rem.first(0); - _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id); - m_tree->to_val(m_state->node_id, scalar, DOC); - added = m_tree->get(m_state->node_id); - } - - if(added) - { - size_t added_id = m_tree->id(added); - if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id)) - { - if(!m_key_anchor.empty()) - { - _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor); - m_val_anchor = m_key_anchor; - m_key_anchor = {}; - } - if(!m_key_tag.empty()) - { - _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag); - m_val_tag = m_key_tag; - m_key_tag = {}; - } - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(!m_key_anchor.empty()) - { - _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor); - m_tree->set_key_anchor(added_id, m_key_anchor); - m_key_anchor = {}; - } - #endif - if(!m_val_anchor.empty()) - { - _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor); - m_tree->set_val_anchor(added_id, m_val_anchor); - m_val_anchor = {}; - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - if(!m_key_tag.empty()) - { - _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(added_id, normalize_tag(m_key_tag)); - m_key_tag = {}; - } - #endif - if(!m_val_tag.empty()) - { - _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(added_id, normalize_tag(m_val_tag)); - m_val_tag = {}; - } - } - - while(m_stack.size() > 1) - { - _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size()); - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top())); - if(has_all(RSEQ|FLOW)) - _c4err("closing ] not found"); - _pop_level(); - } - add_flags(NDOC); -} - -void Parser::_start_new_doc(csubstr rem) -{ - _c4dbgp("_start_new_doc"); - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---")); - C4_UNUSED(rem); - - _end_stream(); - - size_t indref = m_state->indref; - _c4dbgpf("start a document, indentation={}", indref); - _line_progressed(3); - _push_level(); - _start_doc(); - _set_indentation(indref); -} - - -//----------------------------------------------------------------------------- -void Parser::_start_map(bool as_child) -{ - _c4dbgpf("start_map (as child={})", as_child); - addrem_flags(RMAP|RVAL, RKEY|RUNK); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - m_state->node_id = m_tree->append_child(parent_id); - if(has_all(SSCL)) - { - type_bits key_quoted = NOTYPE; - if(m_state->flags & QSCL) // before consuming the scalar - key_quoted |= KEYQUO; - csubstr key = _consume_scalar(); - m_tree->to_map(m_state->node_id, key, key_quoted); - _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id)); - _write_key_anchor(m_state->node_id); - if( ! m_key_tag.empty()) - { - _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); - m_key_tag.clear(); - } - } - else - { - m_tree->to_map(m_state->node_id); - _c4dbgpf("start_map: id={}", m_state->node_id); - } - m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; - _write_val_anchor(m_state->node_id); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - m_state->node_id = parent_id; - _c4dbgpf("start_map: id={}", m_state->node_id); - type_bits as_doc = 0; - if(m_tree->is_doc(m_state->node_id)) - as_doc |= DOC; - if(!m_tree->is_map(parent_id)) - { - RYML_CHECK(!m_tree->has_children(parent_id)); - m_tree->to_map(parent_id, as_doc); - } - else - { - m_tree->_add_flags(parent_id, as_doc); - } - _move_scalar_from_top(); - if(m_key_anchor.not_empty()) - m_key_anchor_was_before = true; - _write_val_anchor(parent_id); - if(m_stack.size() >= 2) - { - State const& parent_state = m_stack.top(1); - if(parent_state.flags & RSET) - add_flags(RSET); - } - m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } -} - -void Parser::_start_map_unk(bool as_child) -{ - _c4dbgpf("start_map_unk (as child={})", as_child); - if(!m_key_anchor_was_before) - { - _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor); - csubstr ka = m_key_anchor; - m_key_anchor = {}; - _start_map(as_child); - m_key_anchor = ka; - } - else - { - _start_map(as_child); - m_key_anchor_was_before = false; - } - if(m_key_tag2.not_empty()) - { - m_key_tag = m_key_tag2; - m_key_tag_indentation = m_key_tag2_indentation; - m_key_tag2.clear(); - m_key_tag2_indentation = 0; - } -} - -void Parser::_stop_map() -{ - _c4dbgpf("stop_map[{}]", m_state->node_id); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); - if(has_all(QMRK|RKEY) && !has_all(SSCL)) - { - _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id); - _store_scalar_null(m_state->line_contents.rem.str); - _append_key_val_null(m_state->line_contents.rem.str); - } -} - - -//----------------------------------------------------------------------------- -void Parser::_start_seq(bool as_child) -{ - _c4dbgpf("start_seq (as child={})", as_child); - if(has_all(RTOP|RUNK)) - { - _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag); - m_val_tag = m_key_tag; - m_key_tag.clear(); - } - addrem_flags(RSEQ|RVAL, RUNK); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); - size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; - _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); - if(as_child) - { - m_state->node_id = m_tree->append_child(parent_id); - if(has_all(SSCL)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id)); - type_bits key_quoted = 0; - if(m_state->flags & QSCL) // before consuming the scalar - key_quoted |= KEYQUO; - csubstr key = _consume_scalar(); - m_tree->to_seq(m_state->node_id, key, key_quoted); - _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id)); - _write_key_anchor(m_state->node_id); - if( ! m_key_tag.empty()) - { - _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); - m_key_tag.clear(); - } - } - else - { - type_bits as_doc = 0; - _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id)); - m_tree->to_seq(m_state->node_id, as_doc); - _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : ""); - } - _write_val_anchor(m_state->node_id); - m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - else - { - m_state->node_id = parent_id; - type_bits as_doc = 0; - if(m_tree->is_doc(m_state->node_id)) - as_doc |= DOC; - if(!m_tree->is_seq(parent_id)) - { - RYML_CHECK(!m_tree->has_children(parent_id)); - m_tree->to_seq(parent_id, as_doc); - } - else - { - m_tree->_add_flags(parent_id, as_doc); - } - _move_scalar_from_top(); - _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : ""); - _write_val_anchor(parent_id); - m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } -} - -void Parser::_stop_seq() -{ - _c4dbgp("stop_seq"); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); -} - - -//----------------------------------------------------------------------------- -void Parser::_start_seqimap() -{ - _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id)); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); - // create a map, and turn the last scalar of this sequence - // into the key of the map's first child. This scalar was - // understood to be a value in the sequence, but it is - // actually a key of a map, implicitly opened here. - // Eg [val, key: val] - // - // Yep, YAML is crazy. - if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id))) - { - size_t prev = m_tree->last_child(m_state->node_id); - NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes - NodeScalar tmp = m_tree->valsc(prev); - _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted()); - m_tree->remove(prev); - _push_level(); - _start_map(); - _store_scalar(tmp.scalar, ty.is_val_quoted()); - m_key_anchor = tmp.anchor; - m_key_tag = tmp.tag; - } - else - { - _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id); - _push_level(); - _start_map(); - _store_scalar_null(m_state->line_contents.rem.str); - } - add_flags(RSEQIMAP|FLOW); -} - -void Parser::_stop_seqimap() -{ - _c4dbgp("stop_seqimap"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP)); -} - - -//----------------------------------------------------------------------------- -NodeData* Parser::_append_val(csubstr val, flag_t quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL)); - _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); - type_bits additional_flags = quoted ? VALQUO : NOTYPE; - _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : ""); - size_t nid = m_tree->append_child(m_state->node_id); - m_tree->to_val(nid, val, additional_flags); - _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar); - if( ! m_val_tag.empty()) - { - _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } - _write_val_anchor(nid); - return m_tree->get(nid); -} - -NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); - type_bits additional_flags = 0; - if(m_state->flags & QSCL) - additional_flags |= KEYQUO; - if(val_quoted) - additional_flags |= VALQUO; - csubstr key = _consume_scalar(); - _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : ""); - size_t nid = m_tree->append_child(m_state->node_id); - m_tree->to_keyval(nid, key, val, additional_flags); - _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid)); - if( ! m_key_tag.empty()) - { - _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag)); - m_tree->set_key_tag(nid, normalize_tag(m_key_tag)); - m_key_tag.clear(); - } - if( ! m_val_tag.empty()) - { - _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); - m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); - m_val_tag.clear(); - } - _write_key_anchor(nid); - _write_val_anchor(nid); - rem_flags(QMRK); - return m_tree->get(nid); -} - - -//----------------------------------------------------------------------------- -void Parser::_store_scalar(csubstr s, flag_t is_quoted) -{ - _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')", - m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar); - RYML_CHECK(has_none(SSCL)); - add_flags(SSCL | (is_quoted * QSCL)); - m_state->scalar = s; -} - -csubstr Parser::_consume_scalar() -{ - _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL); - RYML_CHECK(m_state->flags & SSCL); - csubstr s = m_state->scalar; - rem_flags(SSCL | QSCL); - m_state->scalar.clear(); - return s; -} - -void Parser::_move_scalar_from_top() -{ - if(m_stack.size() < 2) return; - State &prev = m_stack.top(1); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev); - if(prev.flags & SSCL) - { - _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar); - add_flags(prev.flags & (SSCL | QSCL)); - m_state->scalar = prev.scalar; - rem_flags(SSCL | QSCL, &prev); - prev.scalar.clear(); - } -} - -//----------------------------------------------------------------------------- -/** @todo this function is a monster and needs love. Likely, it needs - * to be split like _scan_scalar_*() */ -bool Parser::_handle_indentation() -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); - if( ! _at_line_begin()) - return false; - - size_t ind = m_state->line_contents.indentation; - csubstr rem = m_state->line_contents.rem; - /** @todo instead of trimming, we should use the indentation index from above */ - csubstr remt = rem.triml(' '); - - if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line - { - _line_progressed(rem.size()); - return true; - } - - _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); - if(ind == m_state->indref) - { - _c4dbgpf("same indentation: {}", ind); - if(!rem.sub(ind).begins_with('-')) - { - _c4dbgp("does not begin with -"); - if(has_any(RMAP)) - { - if(has_all(SSCL|RVAL)) - { - _c4dbgp("add with null val"); - _append_key_val_null(rem.str + ind - 1); - addrem_flags(RKEY, RVAL); - } - } - else if(has_any(RSEQ)) - { - if(m_stack.size() > 2) // do not pop to root level - { - if(has_any(RNXT)) - { - _c4dbgp("end the indentless seq"); - _pop_level(); - return true; - } - else if(has_any(RVAL)) - { - _c4dbgp("add with null val"); - _append_val_null(rem.str); - _c4dbgp("end the indentless seq"); - _pop_level(); - return true; - } - } - } - } - _line_progressed(ind); - return ind > 0; - } - else if(ind < m_state->indref) - { - _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref); - if(has_all(RVAL)) - { - _c4dbgp("there was an empty val -- appending"); - if(has_all(RMAP)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); - _append_key_val_null(rem.sub(ind).str - 1); - } - else if(has_all(RSEQ)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); - _append_val_null(rem.sub(ind).str - 1); - } - } - // search the stack frame to jump to based on its indentation - State const* popto = nullptr; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous - for(State const* s = m_state-1; s >= m_stack.begin(); --s) - { - _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); - if(s->indref == ind) - { - _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); - popto = s; - // while it may be tempting to think we're done at this - // point, we must still determine whether we're jumping to a - // parent with the same indentation. Consider this case with - // an indentless sequence: - // - // product: - // - sku: BL394D - // quantity: 4 - // description: Basketball - // price: 450.00 - // - sku: BL4438H - // quantity: 1 - // description: Super Hoop - // price: 2392.00 # jumping one level here would be wrong. - // tax: 1234.5 # we must jump two levels - if(popto > m_stack.begin()) - { - auto parent = popto - 1; - if(parent->indref == popto->indref) - { - _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref); - _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id)); - if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id)) - { - if( ! remt.begins_with('-')) - { - _c4dbgp("this is an indentless sequence"); - popto = parent; - } - else - { - _c4dbgp("not an indentless sequence"); - } - } - } - } - break; - } - } - if(!popto || popto >= m_state || popto->level >= m_state->level) - { - _c4err("parse error: incorrect indentation?"); - } - _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level); - while(m_state != popto) - { - _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref); - _pop_level(); - } - _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref); - _line_progressed(ind); - return true; - } - else - { - _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref); - _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref); - if(has_all(RMAP|RVAL)) - { - if(_is_scalar_next__rmap_val(remt) && (!remt.first_of_any(": ", "? ")) && (!remt.ends_with(":"))) - { - _c4dbgpf("actually it seems a value: '{}'", remt); - } - else - { - addrem_flags(RKEY, RVAL); - _start_unk(); - //_move_scalar_from_top(); - _line_progressed(ind); - _save_indentation(); - return true; - } - } - else if(has_all(RSEQ|RVAL)) - { - // nothing to do here - } - else - { - _c4err("parse error - indentation should not increase at this point"); - } - } - - return false; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_comment() -{ - csubstr s = m_state->line_contents.rem; - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#')); - _line_progressed(s.len); - // skip the # character - s = s.sub(1); - // skip leading whitespace - s = s.right_of(s.first_not_of(' '), /*include_pos*/true); - _c4dbgpf("comment was '{}'", s); - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_squot_scalar() -{ - // quoted scalars can spread over multiple lines! - // nice explanation here: http://yaml-multiline.info/ - - // a span to the end of the file - size_t b = m_state->pos.offset; - substr s = m_buf.sub(b); - if(s.begins_with(' ')) - { - s = s.triml(' '); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); - _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); - } - b = m_state->pos.offset; // take this into account - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\'')); - - // skip the opening quote - _line_progressed(1); - s = s.sub(1); - - bool needs_filter = false; - - size_t numlines = 1; // we already have one line - size_t pos = npos; // find the pos of the matching quote - while( ! _finished_file()) - { - const csubstr line = m_state->line_contents.rem; - bool line_is_blank = true; - _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line); - for(size_t i = 0; i < line.len; ++i) - { - const char curr = line.str[i]; - if(curr == '\'') // single quotes are escaped with two single quotes - { - const char next = i+1 < line.len ? line.str[i+1] : '~'; - if(next != '\'') // so just look for the first quote - { // without another after it - pos = i; - break; - } - else - { - needs_filter = true; // needs filter to remove escaped quotes - ++i; // skip the escaped quote - } - } - else if(curr != ' ') - { - line_is_blank = false; - } - } - - // leading whitespace also needs filtering - needs_filter = needs_filter - || (numlines > 1) - || line_is_blank - || (_at_line_begin() && line.begins_with(' ')); - - if(pos == npos) - { - _line_progressed(line.len); - ++numlines; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\''); - _line_progressed(pos + 1); // progress beyond the quote - pos = m_state->pos.offset - b - 1; // but we stop before it - break; - } - - _line_ended(); - _scan_line(); - } - - if(pos == npos) - { - _c4err("reached end of file while looking for closing quote"); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); - s = s.sub(0, pos-1); - } - - if(needs_filter) - { - csubstr ret = _filter_squot_scalar(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); - _c4dbgpf("final scalar: \"{}\"", ret); - return ret; - } - - _c4dbgpf("final scalar: \"{}\"", s); - - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_dquot_scalar() -{ - // quoted scalars can spread over multiple lines! - // nice explanation here: http://yaml-multiline.info/ - - // a span to the end of the file - size_t b = m_state->pos.offset; - substr s = m_buf.sub(b); - if(s.begins_with(' ')) - { - s = s.triml(' '); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); - _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); - } - b = m_state->pos.offset; // take this into account - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"')); - - // skip the opening quote - _line_progressed(1); - s = s.sub(1); - - bool needs_filter = false; - - size_t numlines = 1; // we already have one line - size_t pos = npos; // find the pos of the matching quote - while( ! _finished_file()) - { - const csubstr line = m_state->line_contents.rem; - bool line_is_blank = true; - _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line); - for(size_t i = 0; i < line.len; ++i) - { - const char curr = line.str[i]; - if(curr != ' ') - line_is_blank = false; - // every \ is an escape - if(curr == '\\') - { - const char next = i+1 < line.len ? line.str[i+1] : '~'; - needs_filter = true; - if(next == '"' || next == '\\') - ++i; - } - else if(curr == '"') - { - pos = i; - break; - } - } - - // leading whitespace also needs filtering - needs_filter = needs_filter - || (numlines > 1) - || line_is_blank - || (_at_line_begin() && line.begins_with(' ')); - - if(pos == npos) - { - _line_progressed(line.len); - ++numlines; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"'); - _line_progressed(pos + 1); // progress beyond the quote - pos = m_state->pos.offset - b - 1; // but we stop before it - break; - } - - _line_ended(); - _scan_line(); - } - - if(pos == npos) - { - _c4err("reached end of file looking for closing quote"); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); - s = s.sub(0, pos-1); - } - - if(needs_filter) - { - csubstr ret = _filter_dquot_scalar(s); - _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret); - _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); - return ret; - } - - _c4dbgpf("final scalar: \"{}\"", s); - - return s; -} - -//----------------------------------------------------------------------------- -csubstr Parser::_scan_block() -{ - // nice explanation here: http://yaml-multiline.info/ - csubstr s = m_state->line_contents.rem; - csubstr trimmed = s.triml(' '); - if(trimmed.str > s.str) - { - _c4dbgp("skipping whitespace"); - _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str); - _line_progressed(static_cast(trimmed.str - s.str)); - s = trimmed; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); - - _c4dbgpf("scanning block: specs=\"{}\"", s); - - // parse the spec - BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL; - BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used - size_t indentation = npos; // have to find out if no spec is given - csubstr digits; - if(s.len > 1) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>")); - csubstr t = s.sub(1); - _c4dbgpf("scanning block: spec is multichar: '{}'", t); - _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); - size_t pos = t.first_of("-+"); - _c4dbgpf("scanning block: spec chomp char at {}", pos); - if(pos != npos) - { - if(t[pos] == '-') - chomp = CHOMP_STRIP; - else if(t[pos] == '+') - chomp = CHOMP_KEEP; - if(pos == 0) - t = t.sub(1); - else - t = t.first(pos); - } - // from here to the end, only digits are considered - digits = t.left_of(t.first_not_of("0123456789")); - if( ! digits.empty()) - { - if( ! c4::atou(digits, &indentation)) - _c4err("parse error: could not read decimal"); - _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref); - indentation += m_state->indref; - } - } - - // finish the current line - _line_progressed(s.len); - _line_ended(); - _scan_line(); - - _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); - - // start with a zero-length block, already pointing at the right place - substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); - _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin()); - - // read every full line into a raw block, - // from which newlines are to be stripped as needed. - // - // If no explicit indentation was given, pick it from the first - // non-empty line. See - // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator - size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos; - LineContents lc; - while(( ! _finished_file())) - { - // peek next line, but do not advance immediately - lc.reset_with_next_line(m_buf, m_state->pos.offset); - _c4dbgpf("scanning block: peeking at '{}'", lc.stripped); - // evaluate termination conditions - if(indentation != npos) - { - // stop when the line is deindented and not empty - if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) - { - if(raw_block.len) - { - _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation); - } - else - { - _c4err("indentation decreased without any scalar"); - } - break; - } - else if(indentation == 0) - { - if((lc.rem == "..." || lc.rem.begins_with("... ")) - || - (lc.rem == "---" || lc.rem.begins_with("--- "))) - { - _c4dbgp("scanning block: stop. indentation=0 and stream ended"); - break; - } - } - } - else - { - _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' ')); - if(lc.stripped.first_not_of(' ') != npos) // non-empty line - { - _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); - if(provisional_indentation == npos) - { - if(lc.indentation < m_state->indref) - { - _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); - if(raw_block.len == 0) - { - _c4dbgp("scanning block: was empty, undo next line"); - _line_ended_undo(); - } - break; - } - else if(lc.indentation == m_state->indref) - { - if(has_any(RSEQ|RMAP)) - { - _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref); - break; - } - } - _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation); - indentation = lc.indentation; - } - else - { - if(lc.indentation >= provisional_indentation) - { - _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); - //indentation = provisional_indentation ? provisional_indentation : lc.indentation; - indentation = lc.indentation; - } - else - { - break; - //_c4err("parse error: first non-empty block line should have at least the original indentation"); - } - } - } - else // empty line - { - _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); - if(provisional_indentation != npos) - { - if(lc.stripped.len >= provisional_indentation) - { - _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); - provisional_indentation = lc.stripped.len; - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if(lc.indentation >= provisional_indentation && lc.indentation != npos) - { - _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); - provisional_indentation = lc.indentation; - } - #endif - } - else - { - provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); - _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); - if(provisional_indentation == npos) - { - provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); - _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); - } - } - } - } - // advance now that we know the folded scalar continues - m_state->line_contents = lc; - _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem); - raw_block.len += m_state->line_contents.full.len; - _line_progressed(m_state->line_contents.rem.len); - _line_ended(); - ++num_lines; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0)); - C4_UNUSED(num_lines); - C4_UNUSED(first); - - if(indentation == npos) - { - _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation); - indentation = provisional_indentation; - } - - if(num_lines) - _line_ended_undo(); - - _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block); - - // ok! now we strip the newlines and spaces according to the specs - s = _filter_block_scalar(raw_block, newline, chomp, indentation); - - _c4dbgpf("scanning block: final=~~~{}~~~", s); - - return s; -} - - -//----------------------------------------------------------------------------- - -template -bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__) - #else - #define _c4dbgfnl(...) - #endif - - const char curr = r[*i];(void)curr; - bool replaced = false; - - _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos); - _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n'); - - _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); - size_t ii = *i; - size_t numnl_following = count_following_newlines(r, &ii, indentation); - if(numnl_following) - { - _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i); - for(size_t j = 0; j < numnl_following; ++j) - m_filter_arena.str[(*pos)++] = '\n'; - } - else - { - if(r.first_not_of(" \t", *i+1) != npos) - { - m_filter_arena.str[(*pos)++] = ' '; - _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); - replaced = true; - } - else - { - if C4_IF_CONSTEXPR (keep_trailing_whitespace) - { - m_filter_arena.str[(*pos)++] = ' '; - _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); - replaced = true; - } - else - { - _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len); - *i = r.len; - } - } - if C4_IF_CONSTEXPR (backslash_is_escape) - { - if(ii < r.len && r.str[ii] == '\\') - { - const char next = ii+1 < r.len ? r.str[ii+1] : '\0'; - if(next == ' ' || next == '\t') - { - _c4dbgfnl("extend skip to backslash{}", ""); - ++ii; - } - } - } - } - *i = ii - 1; // correct for the loop increment - - #undef _c4dbgfnl - - return replaced; -} - - -//----------------------------------------------------------------------------- - -template -void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__) - #else - #define _c4dbgfws(...) - #endif - - const char curr = r[*i]; - _c4dbgfws("found whitespace '{}'", _c4prc(curr)); - _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t'); - - size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i); - if(first != npos) - { - if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace - { - _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first); - *i = first - 1; // correct for the loop increment - } - else // a legit whitespace - { - m_filter_arena.str[(*pos)++] = curr; - _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); - } - } - else - { - _c4dbgfws("... everything else is trailing whitespace{}", ""); - if C4_IF_CONSTEXPR (keep_trailing_whitespace) - for(size_t j = *i; j < r.len; ++j) - m_filter_arena.str[(*pos)++] = r[j]; - *i = r.len; - } - - #undef _c4dbgfws -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_plain_scalar(substr s, size_t indentation) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__) - #else - #define _c4dbgfps(...) - #endif - - _c4dbgfps("before=~~~{}~~~", s); - - substr r = s.triml(" \t"); - _grow_filter_arena(r.len); - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) - { - const char curr = r.str[i]; - _c4dbgfps("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') - { - _filter_ws(r, &i, &pos); - } - else if(curr == '\n') - { - filtered_chars = _filter_nl(r, &i, &pos, indentation); - } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 - { - ; - } - else - { - m_filter_arena.str[pos++] = r[i]; - } - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) - { - r = _finish_filter_arena(r, pos); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfps - return r; -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_squot_scalar(substr s) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar") - #else - #define _c4dbgfsq(...) - #endif - - // from the YAML spec for double-quoted scalars: - // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted - - _c4dbgfsq(": before=~~~{}~~~", s); - - _grow_filter_arena(s.len); - substr r = s; - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) - { - const char curr = r[i]; - _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') - { - _filter_ws(r, &i, &pos); - } - else if(curr == '\n') - { - filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); - } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 - { - ; - } - else if(curr == '\'') - { - char next = i+1 < r.len ? r[i+1] : '\0'; - if(next == '\'') - { - _c4dbgfsq("[{}]: two consecutive quotes", i); - filtered_chars = true; - m_filter_arena.str[pos++] = '\''; - ++i; - } - } - else - { - m_filter_arena.str[pos++] = curr; - } - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) - { - r = _finish_filter_arena(r, pos); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfsq - return r; -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_dquot_scalar(substr s) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__) - #else - #define _c4dbgfdq(...) - #endif - - _c4dbgfdq(": before=~~~{}~~~", s); - - // from the YAML spec for double-quoted scalars: - // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted - // - // All leading and trailing white space characters are excluded - // from the content. Each continuation line must therefore contain - // at least one non-space character. Empty lines, if any, are - // consumed as part of the line folding. - - _grow_filter_arena(s.len + 2u * s.count('\\')); - substr r = s; - size_t pos = 0; // the filtered size - bool filtered_chars = false; - for(size_t i = 0; i < r.len; ++i) - { - const char curr = r[i]; - _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr)); - if(curr == ' ' || curr == '\t') - { - _filter_ws(r, &i, &pos); - } - else if(curr == '\n') - { - filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); - } - else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 - { - ; - } - else if(curr == '\\') - { - char next = i+1 < r.len ? r[i+1] : '\0'; - _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next)); - filtered_chars = true; - if(next == '\r') - { - if(i+2 < r.len && r[i+2] == '\n') - { - ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented) - next = '\n'; - _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i); - } - } - // remember the loop will also increment i - if(next == '\n') - { - size_t ii = i + 2; - for( ; ii < r.len; ++ii) - { - if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace - ; - else - break; - } - i += ii - i - 1; - } - else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility - { - m_filter_arena.str[pos++] = next; - ++i; - } - else if(next == '\r') - { - //++i; - } - else if(next == 'n') - { - m_filter_arena.str[pos++] = '\n'; - ++i; - } - else if(next == 'r') - { - m_filter_arena.str[pos++] = '\r'; - ++i; // skip - } - else if(next == 't') - { - m_filter_arena.str[pos++] = '\t'; - ++i; - } - else if(next == '\\') - { - m_filter_arena.str[pos++] = '\\'; - ++i; - } - else if(next == 'x') // UTF8 - { - if(i + 1u + 2u >= r.len) - _c4err("\\x requires 2 hex digits"); - uint8_t byteval = {}; - if(!read_hex(r.sub(i + 2u, 2u), &byteval)) - _c4err("failed to read \\x codepoint"); - m_filter_arena.str[pos++] = *(char*)&byteval; - i += 1u + 2u; - } - else if(next == 'u') // UTF16 - { - if(i + 1u + 4u >= r.len) - _c4err("\\u requires 4 hex digits"); - char readbuf[8]; - csubstr codepoint = r.sub(i + 2u, 4u); - uint32_t codepoint_val = {}; - if(!read_hex(codepoint, &codepoint_val)) - _c4err("failed to parse \\u codepoint"); - size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); - C4_ASSERT(numbytes <= 4); - memcpy(m_filter_arena.str + pos, readbuf, numbytes); - pos += numbytes; - i += 1u + 4u; - } - else if(next == 'U') // UTF32 - { - if(i + 1u + 8u >= r.len) - _c4err("\\U requires 8 hex digits"); - char readbuf[8]; - csubstr codepoint = r.sub(i + 2u, 8u); - uint32_t codepoint_val = {}; - if(!read_hex(codepoint, &codepoint_val)) - _c4err("failed to parse \\U codepoint"); - size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); - C4_ASSERT(numbytes <= 4); - memcpy(m_filter_arena.str + pos, readbuf, numbytes); - pos += numbytes; - i += 1u + 8u; - } - // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char - else if(next == '0') - { - m_filter_arena.str[pos++] = '\0'; - ++i; - } - else if(next == 'b') // backspace - { - m_filter_arena.str[pos++] = '\b'; - ++i; - } - else if(next == 'f') // form feed - { - m_filter_arena.str[pos++] = '\f'; - ++i; - } - else if(next == 'a') // bell character - { - m_filter_arena.str[pos++] = '\a'; - ++i; - } - else if(next == 'v') // vertical tab - { - m_filter_arena.str[pos++] = '\v'; - ++i; - } - else if(next == 'e') // escape character - { - m_filter_arena.str[pos++] = '\x1b'; - ++i; - } - else if(next == '_') // unicode non breaking space \u00a0 - { - // https://www.compart.com/en/unicode/U+00a0 - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0); - ++i; - } - else if(next == 'N') // unicode next line \u0085 - { - // https://www.compart.com/en/unicode/U+0085 - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85); - ++i; - } - else if(next == 'L') // unicode line separator \u2028 - { - // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8); - ++i; - } - else if(next == 'P') // unicode paragraph separator \u2029 - { - // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); - m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9); - ++i; - } - _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos)); - } - else - { - m_filter_arena.str[pos++] = curr; - } - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - if(pos < r.len || filtered_chars) - { - r = _finish_filter_arena(r, pos); - } - - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); - _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - - #undef _c4dbgfdq - - return r; -} - - -//----------------------------------------------------------------------------- -bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp) -{ - substr trimmed = buf.first(*pos).trimr('\n'); - bool added_newline = false; - switch(chomp) - { - case CHOMP_KEEP: - if(trimmed.len == *pos) - { - _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos); - //m_filter_arena.str[(*pos)++] = '\n'; - added_newline = true; - } - break; - case CHOMP_CLIP: - if(trimmed.len == *pos) - { - _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos); - m_filter_arena.str[(*pos)++] = '\n'; - added_newline = true; - } - else - { - _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1); - *pos = trimmed.len + 1; - } - break; - case CHOMP_STRIP: - _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len); - *pos = trimmed.len; - break; - default: - _c4err("unknown chomp style"); - } - return added_newline; -} - - -//----------------------------------------------------------------------------- -csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation) -{ - // a debugging scaffold: - #if 0 - #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__) - #else - #define _c4dbgfbl(...) - #endif - - _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); - - if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u) - { - _c4dbgp("filt_block: empty scalar"); - return s.first(0); - } - - substr r = s; - - switch(style) - { - case BLOCK_LITERAL: - { - _c4dbgp("filt_block: style=literal"); - // trim leading whitespace up to indentation - { - size_t numws = r.first_not_of(' '); - if(numws != npos) - { - if(numws > indentation) - r = r.sub(indentation); - else - r = r.sub(numws); - _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r); - } - else - { - if(chomp != CHOMP_KEEP || r.len == 0) - { - _c4dbgfbl(": all spaces {}, return empty", r.len); - return r.first(0); - } - else - { - r[0] = '\n'; - return r.first(1); - } - } - } - _grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline - size_t pos = 0; // the filtered size - for(size_t i = 0; i < r.len; ++i) - { - const char curr = r.str[i]; - _c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos); - if(curr == '\r') - continue; - m_filter_arena.str[pos++] = curr; - if(curr == '\n') - { - _c4dbgfbl("[{}]: found newline", i); - // skip indentation on the next line - csubstr rem = r.sub(i+1); - size_t first = rem.first_not_of(' '); - if(first != npos) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); - _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - } - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); - first = rem.len; - _c4dbgfbl("[{}]: {} spaces to the end", i, first); - if(first) - { - if(first < indentation) - { - _c4dbgfbl("[{}]: skip everything", i); - --pos; - break; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - } - } - else if(i+1 == r.len) - { - if(chomp == CHOMP_STRIP) - --pos; - break; - } - } - } - } - _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos); - _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); - bool changed = _apply_chomp(m_filter_arena, &pos, chomp); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len); - if(pos < r.len || changed) - { - r = _finish_filter_arena(s, pos); // write into s - } - break; - } - case BLOCK_FOLD: - { - _c4dbgp("filt_block: style=fold"); - _grow_filter_arena(r.len + 2); - size_t pos = 0; // the filtered size - bool filtered_chars = false; - bool started = false; - bool is_indented = false; - size_t i = r.first_not_of(' '); - _c4dbgfbl(": first non space at {}", i); - if(i > indentation) - { - is_indented = true; - i = indentation; - } - _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented); - auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){ - _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i) - { - if(r.str[i] == '\r') - continue; - _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i])); - m_filter_arena.str[pos++] = r.str[i]; - } - --i; - }; - for( ; i < r.len; ++i) - { - const char curr = r.str[i]; - _c4dbgfbl("[{}]='{}'", i, _c4prc(curr)); - if(curr == '\n') - { - filtered_chars = true; - // skip indentation on the next line, and advance over the next non-indented blank lines as well - size_t first_non_whitespace; - size_t numnl_following = (size_t)-1; - while(r[i] == '\n') - { - ++numnl_following; - csubstr rem = r.sub(i+1); - size_t first = rem.first_not_of(' '); - _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len); - if(first != npos) - { - first_non_whitespace = first + i+1; - while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r') - ++first_non_whitespace; - _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); - _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first])); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - if(first > indentation) - { - _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation); - goto finished_counting_newlines; - } - } - // prepare the next while loop iteration - // by setting i at the next newline after - // an empty line - if(r[first_non_whitespace] == '\n') - i = first_non_whitespace; - else - goto finished_counting_newlines; - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); - first = rem.len; - first_non_whitespace = first + i+1; - if(first) - { - _c4dbgfbl("[{}]: {} spaces to the end", i, first); - if(first < indentation) - { - _c4dbgfbl("[{}]: skip everything", i); - i += first; - } - else - { - _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); - i += indentation; - if(first > indentation) - { - _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first); - goto finished_counting_newlines; - } - } - } - else // if(i+1 == r.len) - { - _c4dbgfbl("[{}]: it's the final newline", i); - _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0); - } - goto end_of_scalar; - } - } - end_of_scalar: - // Write all the trailing newlines. Since we're - // at the end no folding is needed, so write every - // newline (add 1). - _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - break; - finished_counting_newlines: - _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); - while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t') - ++first_non_whitespace; - _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); - _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len); - size_t last_newl = r.last_of('\n', first_non_whitespace); - size_t this_indentation = first_non_whitespace - last_newl - 1; - _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation); - _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1); - _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation); - if(!started) - { - _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following); - for(size_t j = 0; j < 1 + numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - if(this_indentation > indentation) - { - is_indented = true; - _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation); - i = last_newl + indentation; - } - else - { - i = first_non_whitespace - 1; - _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace); - } - } - else if(this_indentation == indentation) - { - _c4dbgfbl("[{}]: same indentation", i); - if(!is_indented) - { - if(numnl_following == 0) - { - _c4dbgfbl("[{}]: fold!", i); - m_filter_arena.str[pos++] = ' '; - } - else - { - _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following); - for(size_t j = 0; j < numnl_following; ++j) - m_filter_arena.str[pos++] = '\n'; - } - i = first_non_whitespace - 1; - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } - else - { - _c4dbgfbl("[{}]: back to ref indentation", i); - is_indented = false; - on_change_indentation(numnl_following, last_newl, first_non_whitespace); - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } - } - else - { - _c4dbgfbl("[{}]: increased indentation.", i); - is_indented = true; - _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation); - on_change_indentation(numnl_following, last_newl, first_non_whitespace); - _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); - } - } - else if(curr != '\r') - { - if(curr != '\t') - started = true; - m_filter_arena.str[pos++] = curr; - } - } - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos)); - bool changed = _apply_chomp(m_filter_arena, &pos, chomp); - if(pos < r.len || filtered_chars || changed) - { - r = _finish_filter_arena(s, pos); // write into s - } - } - break; - default: - _c4err("unknown block style"); - } - - _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r); - - #undef _c4dbgfbl - - return r; -} - -//----------------------------------------------------------------------------- -size_t Parser::_count_nlines(csubstr src) -{ - return 1 + src.count('\n'); -} - -//----------------------------------------------------------------------------- -void Parser::_handle_directive(csubstr directive_) -{ - csubstr directive = directive_; - if(directive.begins_with("%TAG")) - { - TagDirective td; - _c4dbgpf("%TAG directive: {}", directive_); - directive = directive.sub(4); - if(!directive.begins_with(' ')) - _c4err("malformed tag directive: {}", directive_); - directive = directive.triml(' '); - size_t pos = directive.find(' '); - if(pos == npos) - _c4err("malformed tag directive: {}", directive_); - td.handle = directive.first(pos); - directive = directive.sub(td.handle.len).triml(' '); - pos = directive.find(' '); - if(pos != npos) - directive = directive.first(pos); - td.prefix = directive; - td.next_node_id = m_tree->size(); - if(m_tree->size() > 0) - { - size_t prev = m_tree->size() - 1; - if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev)) - ++td.next_node_id; - } - _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id); - m_tree->add_tag_directive(td); - } - else if(directive.begins_with("%YAML")) - { - _c4dbgpf("%YAML directive! ignoring...: {}", directive); - } -} - -//----------------------------------------------------------------------------- -void Parser::set_flags(flag_t f, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64]; - csubstr buf1 = _prfl(buf1_, f); - csubstr buf2 = _prfl(buf2_, s->flags); - _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2); +#ifndef _C4_YML_NODE_HPP_ +#include "c4/yml/node.hpp" #endif - s->flags = f; -} - -void Parser::add_flags(flag_t on, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64]; - csubstr buf1 = _prfl(buf1_, on); - csubstr buf2 = _prfl(buf2_, s->flags); - csubstr buf3 = _prfl(buf3_, s->flags|on); - _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#ifndef _C4_YML_PARSE_ENGINE_HPP_ +#include "c4/yml/parse_engine.hpp" #endif - s->flags |= on; -} - -void Parser::addrem_flags(flag_t on, flag_t off, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; - csubstr buf1 = _prfl(buf1_, on); - csubstr buf2 = _prfl(buf2_, off); - csubstr buf3 = _prfl(buf3_, s->flags); - csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off))); - _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4); +#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_ +#include "c4/yml/parse_engine.def.hpp" #endif - s->flags |= on; - s->flags &= ~off; -} - -void Parser::rem_flags(flag_t off, State * s) -{ -#ifdef RYML_DBG - char buf1_[64], buf2_[64], buf3_[64]; - csubstr buf1 = _prfl(buf1_, off); - csubstr buf2 = _prfl(buf2_, s->flags); - csubstr buf3 = _prfl(buf3_, s->flags&(~off)); - _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_ +#include "c4/yml/event_handler_tree.hpp" #endif - s->flags &= ~off; -} - -//----------------------------------------------------------------------------- - -csubstr Parser::_prfl(substr buf, flag_t flags) -{ - size_t pos = 0; - bool gotone = false; - - #define _prflag(fl) \ - if((flags & fl) == (fl)) \ - { \ - if(gotone) \ - { \ - if(pos + 1 < buf.len) \ - buf[pos] = '|'; \ - ++pos; \ - } \ - csubstr fltxt = #fl; \ - if(pos + fltxt.len <= buf.len) \ - memcpy(buf.str + pos, fltxt.str, fltxt.len); \ - pos += fltxt.len; \ - gotone = true; \ - } - - _prflag(RTOP); - _prflag(RUNK); - _prflag(RMAP); - _prflag(RSEQ); - _prflag(FLOW); - _prflag(QMRK); - _prflag(RKEY); - _prflag(RVAL); - _prflag(RNXT); - _prflag(SSCL); - _prflag(QSCL); - _prflag(RSET); - _prflag(NDOC); - _prflag(RSEQIMAP); - - #undef _prflag - - RYML_ASSERT(pos <= buf.len); - - return buf.first(pos); -} -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -void Parser::_grow_filter_arena(size_t num_characters_needed) -{ - _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed); - if(num_characters_needed <= m_filter_arena.len) - return; - size_t sz = m_filter_arena.len << 1; - _c4dbgpf("grow: sz={}", sz); - sz = num_characters_needed > sz ? num_characters_needed : sz; - _c4dbgpf("grow: sz={}", sz); - sz = sz < 128u ? 128u : sz; - _c4dbgpf("grow: sz={}", sz); - _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed); - _resize_filter_arena(sz); -} - -void Parser::_resize_filter_arena(size_t num_characters) -{ - if(num_characters > m_filter_arena.len) - { - _c4dbgpf("resize: sz={}", num_characters); - char *prev = m_filter_arena.str; - if(m_filter_arena.str) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0); - _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); - } - m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev); - m_filter_arena.len = num_characters; - } -} +namespace c4 { +namespace yml { -substr Parser::_finish_filter_arena(substr dst, size_t pos) -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len); - memcpy(dst.str, m_filter_arena.str, pos); - return dst.first(pos); -} +// instantiate the parser class +template class ParseEngine; +namespace { +inline void _reset_tree_handler(Parser *parser, Tree *t, id_type node_id) +{ + RYML_ASSERT(parser); + RYML_ASSERT(t); + if(!parser->m_evt_handler) + _RYML_CB_ERR(t->m_callbacks, "event handler is not set"); + parser->m_evt_handler->reset(t, node_id); + RYML_ASSERT(parser->m_evt_handler->m_tree == t); +} +} // namespace + +void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id) +{ + _reset_tree_handler(parser, t, node_id); + parser->parse_in_place_ev(filename, yaml); +} + +void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id) +{ + _reset_tree_handler(parser, t, node_id); + parser->parse_json_in_place_ev(filename, json); +} + + +// this is vertically aligned to highlight the parameter differences. +void parse_in_place(Parser *parser, substr yaml, Tree *t, id_type node_id) { parse_in_place(parser, {}, yaml, t, node_id); } +void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t ) { RYML_CHECK(t); parse_in_place(parser, filename, yaml, t, t->root_id()); } +void parse_in_place(Parser *parser, substr yaml, Tree *t ) { RYML_CHECK(t); parse_in_place(parser, {} , yaml, t, t->root_id()); } +void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, filename, yaml, node.tree(), node.id()); } +void parse_in_place(Parser *parser, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, {} , yaml, node.tree(), node.id()); } +Tree parse_in_place(Parser *parser, csubstr filename, substr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, filename, yaml, &tree, tree.root_id()); return tree; } +Tree parse_in_place(Parser *parser, substr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, {} , yaml, &tree, tree.root_id()); return tree; } -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- +// this is vertically aligned to highlight the parameter differences. +void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, node_id); } +void parse_in_place( substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, t, node_id); } +void parse_in_place(csubstr filename, substr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, t->root_id()); } +void parse_in_place( substr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, t, t->root_id()); } +void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, node.tree(), node.id()); } +void parse_in_place( substr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {} , yaml, node.tree(), node.id()); } +Tree parse_in_place(csubstr filename, substr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, filename, yaml, &tree, tree.root_id()); return tree; } +Tree parse_in_place( substr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, {} , yaml, &tree, tree.root_id()); return tree; } -csubstr Parser::location_contents(Location const& loc) const -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len); - return m_buf.sub(loc.offset); -} -Location Parser::location(ConstNodeRef node) const -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, node.readable()); - return location(*node.tree(), node.id()); -} +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_place(Parser *parser, substr json, Tree *t, id_type node_id) { parse_json_in_place(parser, {}, json, t, node_id); } +void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t ) { RYML_CHECK(t); parse_json_in_place(parser, filename, json, t, t->root_id()); } +void parse_json_in_place(Parser *parser, substr json, Tree *t ) { RYML_CHECK(t); parse_json_in_place(parser, {} , json, t, t->root_id()); } +void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, filename, json, node.tree(), node.id()); } +void parse_json_in_place(Parser *parser, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, {} , json, node.tree(), node.id()); } +Tree parse_json_in_place(Parser *parser, csubstr filename, substr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, filename, json, &tree, tree.root_id()); return tree; } +Tree parse_json_in_place(Parser *parser, substr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, {} , json, &tree, tree.root_id()); return tree; } -Location Parser::location(Tree const& tree, size_t node) const -{ - // try hard to avoid getting the location from a null string. - Location loc; - if(_location_from_node(tree, node, &loc, 0)) - return loc; - return val_location(m_buf.str); -} +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, node_id); } +void parse_json_in_place( substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, t, node_id); } +void parse_json_in_place(csubstr filename, substr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, t->root_id()); } +void parse_json_in_place( substr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, t, t->root_id()); } +void parse_json_in_place(csubstr filename, substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, node.tree(), node.id()); } +void parse_json_in_place( substr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {} , json, node.tree(), node.id()); } +Tree parse_json_in_place(csubstr filename, substr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, filename, json, &tree, tree.root_id()); return tree; } +Tree parse_json_in_place( substr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, {} , json, &tree, tree.root_id()); return tree; } -bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const -{ - if(tree.has_key(node)) - { - csubstr k = tree.key(node); - if(C4_LIKELY(k.str != nullptr)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k)); - *loc = val_location(k.str); - return true; - } - } - if(tree.has_val(node)) - { - csubstr v = tree.val(node); - if(C4_LIKELY(v.str != nullptr)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v)); - *loc = val_location(v.str); - return true; - } - } +// this is vertically aligned to highlight the parameter differences. +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, node_id); } +void parse_in_arena(Parser *parser, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {} , src, t, node_id); } +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, t->root_id()); } +void parse_in_arena(Parser *parser, csubstr yaml, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {} , src, t, t->root_id()); } +void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, filename, src, node.tree(), node.id()); } +void parse_in_arena(Parser *parser, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, {} , src, node.tree(), node.id()); } +Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_in_arena(Parser *parser, csubstr yaml ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, {} , src, &tree, tree.root_id()); return tree; } - if(tree.is_container(node)) - { - if(_location_from_cont(tree, node, loc)) - return true; - } +// this is vertically aligned to highlight the parameter differences. +void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, node_id); } +void parse_in_arena( csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {} , src, t, node_id); } +void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, t->root_id()); } +void parse_in_arena( csubstr yaml, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {} , src, t, t->root_id()); } +void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, filename, src, node.tree(), node.id()); } +void parse_in_arena( csubstr yaml, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, {} , src, node.tree(), node.id()); } +Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_in_arena( csubstr yaml ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, {} , src, &tree, tree.root_id()); return tree; } - if(tree.type(node) != NOTYPE && level == 0) - { - // try the prev sibling - { - const size_t prev = tree.prev_sibling(node); - if(prev != NONE) - { - if(_location_from_node(tree, prev, loc, level+1)) - return true; - } - } - // try the next sibling - { - const size_t next = tree.next_sibling(node); - if(next != NONE) - { - if(_location_from_node(tree, next, loc, level+1)) - return true; - } - } - // try the parent - { - const size_t parent = tree.parent(node); - if(parent != NONE) - { - if(_location_from_node(tree, parent, loc, level+1)) - return true; - } - } - } - return false; -} +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, node_id); } +void parse_json_in_arena(Parser *parser, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {} , src, t, node_id); } +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, t->root_id()); } +void parse_json_in_arena(Parser *parser, csubstr json, Tree *t ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {} , src, t, t->root_id()); } +void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, filename, src, node.tree(), node.id()); } +void parse_json_in_arena(Parser *parser, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, {} , src, node.tree(), node.id()); } +Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_json_in_arena(Parser *parser, csubstr json ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, {} , src, &tree, tree.root_id()); return tree; } -bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const -{ - _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node)); - if(!tree.is_stream(node)) - { - const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container - if(tree.has_children(node)) - { - size_t child = tree.first_child(node); - if(tree.has_key(child)) - { - // when a map starts, the container was set after the key - csubstr k = tree.key(child); - if(k.str && node_start > k.str) - node_start = k.str; - } - } - *loc = val_location(node_start); - return true; - } - else // it's a stream - { - *loc = val_location(m_buf.str); // just return the front of the buffer - } - return true; -} +// this is vertically aligned to highlight the parameter differences. +void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, node_id); } +void parse_json_in_arena( csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {} , src, t, node_id); } +void parse_json_in_arena(csubstr filename, csubstr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, t->root_id()); } +void parse_json_in_arena( csubstr json, Tree *t ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {} , src, t, t->root_id()); } +void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, filename, src, node.tree(), node.id()); } +void parse_json_in_arena( csubstr json, NodeRef node ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, {} , src, node.tree(), node.id()); } +Tree parse_json_in_arena(csubstr filename, csubstr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; } +Tree parse_json_in_arena( csubstr json ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, {} , src, &tree, tree.root_id()); return tree; } -Location Parser::val_location(const char *val) const +RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept { - if(C4_UNLIKELY(val == nullptr)) - return {m_file, 0, 0, 0}; - - _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations()); - // NOTE: if any of these checks fails, the parser needs to be - // instantiated with locations enabled. - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations()); - _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); - // NOTE: the pointer needs to belong to the buffer that was used to parse. - csubstr src = m_buf; - _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr); - _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); - // ok. search the first stored newline after the given ptr - using lineptr_type = size_t const* C4_RESTRICT; - lineptr_type lineptr = nullptr; - size_t offset = (size_t)(val - src.begin()); - if(m_newline_offsets_size < 30) // TODO magic number + if(indentation + 1 > s.len) + return npos; + for(size_t i = s.len-indentation-1; i != size_t(-1); --i) { - // just do a linear search if the size is small. - for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) + if(s.str[i] == '\n') { - if(*curr > offset) - { - lineptr = curr; - break; - } + csubstr rem = s.sub(i + 1); + size_t first = rem.first_not_of(' '); + first = (first != npos) ? first : rem.len; + if(first > indentation) + return i; } } - else - { - // do a bisection search if the size is not small. - // - // We could use std::lower_bound but this is simple enough and - // spares the include of . - size_t count = m_newline_offsets_size; - size_t step; - lineptr_type it; - lineptr = m_newline_offsets; - while(count) - { - step = count >> 1; - it = lineptr + step; - if(*it < offset) - { - lineptr = ++it; - count -= step + 1; - } - else - { - count = step; - } - } - } - _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets); - _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); - _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset); - Location loc; - loc.name = m_file; - loc.offset = offset; - loc.line = (size_t)(lineptr - m_newline_offsets); - if(lineptr > m_newline_offsets) - loc.col = (offset - *(lineptr-1) - 1u); - else - loc.col = offset; - return loc; + return npos; } - -void Parser::_prepare_locations() -{ - m_newline_offsets_buf = m_buf; - size_t numnewlines = 1u + m_buf.count('\n'); - _resize_locations(numnewlines); - m_newline_offsets_size = 0; - for(size_t i = 0; i < m_buf.len; i++) - if(m_buf[i] == '\n') - m_newline_offsets[m_newline_offsets_size++] = i; - m_newline_offsets[m_newline_offsets_size++] = m_buf.len; - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); -} - -void Parser::_resize_locations(size_t numnewlines) -{ - if(numnewlines > m_newline_offsets_capacity) + +//----------------------------------------------------------------------------- + +RYML_EXPORT id_type estimate_tree_capacity(csubstr src) +{ + id_type num_nodes = 1; // root + for(size_t i = 0; i < src.len; ++i) { - if(m_newline_offsets) - _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); - m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); - m_newline_offsets_capacity = numnewlines; - } -} - -bool Parser::_locations_dirty() const -{ - return !m_newline_offsets_size; + const char c = src.str[i]; + num_nodes += (c == '\n') || (c == ',') || (c == '[') || (c == '{'); + } + return num_nodes; } } // namespace yml } // namespace c4 - - -#if defined(_MSC_VER) -# pragma warning(pop) -#elif defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#endif diff --git a/3rdparty/rapidyaml/src/c4/yml/tree.cpp b/3rdparty/rapidyaml/src/c4/yml/tree.cpp index 43bb12c45e9b9f..e20940151d4c65 100644 --- a/3rdparty/rapidyaml/src/c4/yml/tree.cpp +++ b/3rdparty/rapidyaml/src/c4/yml/tree.cpp @@ -1,238 +1,19 @@ #include "c4/yml/tree.hpp" #include "c4/yml/detail/parser_dbg.hpp" #include "c4/yml/node.hpp" -#include "c4/yml/detail/stack.hpp" +#include "c4/yml/reference_resolver.hpp" C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) +C4_SUPPRESS_WARNING_MSVC(4702/*unreachable code*/) C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast") C4_SUPPRESS_WARNING_GCC("-Wtype-limits") +C4_SUPPRESS_WARNING_GCC("-Wuseless-cast") namespace c4 { namespace yml { -csubstr normalize_tag(csubstr tag) -{ - YamlTag_e t = to_tag(tag); - if(t != TAG_NONE) - return from_tag(t); - if(tag.begins_with("!<")) - tag = tag.sub(1); - if(tag.begins_with(""}; - case TAG_OMAP: - return {""}; - case TAG_PAIRS: - return {""}; - case TAG_SET: - return {""}; - case TAG_SEQ: - return {""}; - case TAG_BINARY: - return {""}; - case TAG_BOOL: - return {""}; - case TAG_FLOAT: - return {""}; - case TAG_INT: - return {""}; - case TAG_MERGE: - return {""}; - case TAG_NULL: - return {""}; - case TAG_STR: - return {""}; - case TAG_TIMESTAMP: - return {""}; - case TAG_VALUE: - return {""}; - case TAG_YAML: - return {""}; - case TAG_NONE: - return {""}; - } - return {""}; -} - -csubstr from_tag(YamlTag_e tag) -{ - switch(tag) - { - case TAG_MAP: - return {"!!map"}; - case TAG_OMAP: - return {"!!omap"}; - case TAG_PAIRS: - return {"!!pairs"}; - case TAG_SET: - return {"!!set"}; - case TAG_SEQ: - return {"!!seq"}; - case TAG_BINARY: - return {"!!binary"}; - case TAG_BOOL: - return {"!!bool"}; - case TAG_FLOAT: - return {"!!float"}; - case TAG_INT: - return {"!!int"}; - case TAG_MERGE: - return {"!!merge"}; - case TAG_NULL: - return {"!!null"}; - case TAG_STR: - return {"!!str"}; - case TAG_TIMESTAMP: - return {"!!timestamp"}; - case TAG_VALUE: - return {"!!value"}; - case TAG_YAML: - return {"!!yaml"}; - case TAG_NONE: - return {""}; - } - return {""}; -} - - -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- -//----------------------------------------------------------------------------- - -const char* NodeType::type_str(NodeType_e ty) -{ - switch(ty & _TYMASK) - { - case KEYVAL: - return "KEYVAL"; - case KEY: - return "KEY"; - case VAL: - return "VAL"; - case MAP: - return "MAP"; - case SEQ: - return "SEQ"; - case KEYMAP: - return "KEYMAP"; - case KEYSEQ: - return "KEYSEQ"; - case DOCSEQ: - return "DOCSEQ"; - case DOCMAP: - return "DOCMAP"; - case DOCVAL: - return "DOCVAL"; - case DOC: - return "DOC"; - case STREAM: - return "STREAM"; - case NOTYPE: - return "NOTYPE"; - default: - if((ty & KEYVAL) == KEYVAL) - return "KEYVAL***"; - if((ty & KEYMAP) == KEYMAP) - return "KEYMAP***"; - if((ty & KEYSEQ) == KEYSEQ) - return "KEYSEQ***"; - if((ty & DOCSEQ) == DOCSEQ) - return "DOCSEQ***"; - if((ty & DOCMAP) == DOCMAP) - return "DOCMAP***"; - if((ty & DOCVAL) == DOCVAL) - return "DOCVAL***"; - if(ty & KEY) - return "KEY***"; - if(ty & VAL) - return "VAL***"; - if(ty & MAP) - return "MAP***"; - if(ty & SEQ) - return "SEQ***"; - if(ty & DOC) - return "DOC***"; - return "(unk)"; - } -} - - //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -251,17 +32,17 @@ ConstNodeRef Tree::crootref() const return ConstNodeRef(this, root_id()); } -NodeRef Tree::ref(size_t id) +NodeRef Tree::ref(id_type id) { _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); return NodeRef(this, id); } -ConstNodeRef Tree::ref(size_t id) const +ConstNodeRef Tree::ref(id_type id) const { _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); return ConstNodeRef(this, id); } -ConstNodeRef Tree::cref(size_t id) const +ConstNodeRef Tree::cref(id_type id) const { _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap); return ConstNodeRef(this, id); @@ -276,20 +57,24 @@ ConstNodeRef Tree::operator[] (csubstr key) const return rootref()[key]; } -NodeRef Tree::operator[] (size_t i) +NodeRef Tree::operator[] (id_type i) { return rootref()[i]; } -ConstNodeRef Tree::operator[] (size_t i) const +ConstNodeRef Tree::operator[] (id_type i) const { return rootref()[i]; } -NodeRef Tree::docref(size_t i) +NodeRef Tree::docref(id_type i) { return ref(doc(i)); } -ConstNodeRef Tree::docref(size_t i) const +ConstNodeRef Tree::docref(id_type i) const +{ + return cref(doc(i)); +} +ConstNodeRef Tree::cdocref(id_type i) const { return cref(doc(i)); } @@ -305,10 +90,11 @@ Tree::Tree(Callbacks const& cb) , m_arena() , m_arena_pos(0) , m_callbacks(cb) + , m_tag_directives() { } -Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb) +Tree::Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb) : Tree(cb) { reserve(node_capacity); @@ -334,12 +120,12 @@ Tree& Tree::operator= (Tree const& that) return *this; } -Tree::Tree(Tree && that) : Tree(that.m_callbacks) +Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) { _move(that); } -Tree& Tree::operator= (Tree && that) +Tree& Tree::operator= (Tree && that) RYML_NOEXCEPT { _free(); m_callbacks = that.m_callbacks; @@ -352,7 +138,7 @@ void Tree::_free() if(m_buf) { _RYML_CB_ASSERT(m_callbacks, m_cap > 0); - _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap); } if(m_arena.str) { @@ -377,7 +163,7 @@ void Tree::_clear() m_free_tail = 0; m_arena = {}; m_arena_pos = 0; - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) m_tag_directives[i] = {}; } @@ -386,8 +172,11 @@ void Tree::_copy(Tree const& that) _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); - m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf); - memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData)); + if(that.m_cap) + { + m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)that.m_cap, that.m_buf); + memcpy(m_buf, that.m_buf, (size_t)that.m_cap * sizeof(NodeData)); + } m_cap = that.m_cap; m_size = that.m_size; m_free_head = that.m_free_head; @@ -403,11 +192,11 @@ void Tree::_copy(Tree const& that) _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena m_arena = arena; } - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) m_tag_directives[i] = that.m_tag_directives[i]; } -void Tree::_move(Tree & that) +void Tree::_move(Tree & that) noexcept { _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); @@ -419,7 +208,7 @@ void Tree::_move(Tree & that) m_free_tail = that.m_free_tail; m_arena = that.m_arena; m_arena_pos = that.m_arena_pos; - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) m_tag_directives[i] = that.m_tag_directives[i]; that._clear(); } @@ -428,7 +217,8 @@ void Tree::_relocate(substr next_arena) { _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); - memcpy(next_arena.str, m_arena.str, m_arena_pos); + if(m_arena_pos) + memcpy(next_arena.str, m_arena.str, m_arena_pos); for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) { if(in_arena(n->m_key.scalar)) @@ -455,17 +245,17 @@ void Tree::_relocate(substr next_arena) //----------------------------------------------------------------------------- -void Tree::reserve(size_t cap) +void Tree::reserve(id_type cap) { if(cap > m_cap) { - NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf); + NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)cap, m_buf); if(m_buf) { - memcpy(buf, m_buf, m_cap * sizeof(NodeData)); - _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + memcpy(buf, m_buf, (size_t)m_cap * sizeof(NodeData)); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap); } - size_t first = m_cap, del = cap - m_cap; + id_type first = m_cap, del = cap - m_cap; m_cap = cap; m_buf = buf; _clear_range(first, del); @@ -509,26 +299,26 @@ void Tree::clear() m_free_head = NONE; m_free_tail = NONE; } - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) m_tag_directives[i] = {}; } void Tree::_claim_root() { - size_t r = _claim(); + id_type r = _claim(); _RYML_CB_ASSERT(m_callbacks, r == 0); _set_hierarchy(r, NONE, NONE); } //----------------------------------------------------------------------------- -void Tree::_clear_range(size_t first, size_t num) +void Tree::_clear_range(id_type first, id_type num) { if(num == 0) return; // prevent overflow when subtracting _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); - memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this - for(size_t i = first, e = first + num; i < e; ++i) + memset(m_buf + first, 0, (size_t)num * sizeof(NodeData)); // TODO we should not need this + for(id_type i = first, e = first + num; i < e; ++i) { _clear(i); NodeData *n = m_buf + i; @@ -542,7 +332,7 @@ C4_SUPPRESS_WARNING_GCC_POP //----------------------------------------------------------------------------- -void Tree::_release(size_t i) +void Tree::_release(id_type i) { _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); @@ -555,7 +345,7 @@ void Tree::_release(size_t i) //----------------------------------------------------------------------------- // add to the front of the free list -void Tree::_free_list_add(size_t i) +void Tree::_free_list_add(id_type i) { _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); NodeData &C4_RESTRICT w = m_buf[i]; @@ -570,7 +360,7 @@ void Tree::_free_list_add(size_t i) m_free_tail = m_free_head; } -void Tree::_free_list_rem(size_t i) +void Tree::_free_list_rem(id_type i) { if(m_free_head == i) m_free_head = _p(i)->m_next_sibling; @@ -578,11 +368,11 @@ void Tree::_free_list_rem(size_t i) } //----------------------------------------------------------------------------- -size_t Tree::_claim() +id_type Tree::_claim() { if(m_free_head == NONE || m_buf == nullptr) { - size_t sz = 2 * m_cap; + id_type sz = 2 * m_cap; sz = sz ? sz : 16; reserve(sz); _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); @@ -591,7 +381,7 @@ size_t Tree::_claim() _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); - size_t ichild = m_free_head; + id_type ichild = m_free_head; NodeData *child = m_buf + ichild; ++m_size; @@ -612,16 +402,22 @@ size_t Tree::_claim() C4_SUPPRESS_WARNING_GCC_PUSH C4_SUPPRESS_WARNING_CLANG_PUSH C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") -#if defined(__GNUC__) && (__GNUC__ >= 6) +#if defined(__GNUC__) +#if (__GNUC__ >= 6) C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") #endif +#if (__GNUC__ > 9) +C4_SUPPRESS_WARNING_GCC("-Wanalyzer-fd-leak") +#endif +#endif -void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) +void Tree::_set_hierarchy(id_type ichild, id_type iparent, id_type iprev_sibling) { + _RYML_CB_ASSERT(m_callbacks, ichild >= 0 && ichild < m_cap); _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); - NodeData *C4_RESTRICT child = get(ichild); + NodeData *C4_RESTRICT child = _p(ichild); child->m_parent = iparent; child->m_prev_sibling = NONE; @@ -636,7 +432,7 @@ void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) if(iparent == NONE) return; - size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); + id_type inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); NodeData *C4_RESTRICT parent = get(iparent); NodeData *C4_RESTRICT psib = get(iprev_sibling); NodeData *C4_RESTRICT nsib = get(inext_sibling); @@ -678,7 +474,7 @@ C4_SUPPRESS_WARNING_CLANG_POP //----------------------------------------------------------------------------- -void Tree::_rem_hierarchy(size_t i) +void Tree::_rem_hierarchy(id_type i) { _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); @@ -712,14 +508,7 @@ void Tree::_rem_hierarchy(size_t i) } //----------------------------------------------------------------------------- -void Tree::reorder() -{ - size_t r = root_id(); - _do_reorder(&r, 0); -} - -//----------------------------------------------------------------------------- -size_t Tree::_do_reorder(size_t *node, size_t count) +id_type Tree::_do_reorder(id_type *node, id_type count) { // swap this node if it's not in place if(*node != count) @@ -730,7 +519,7 @@ size_t Tree::_do_reorder(size_t *node, size_t count) ++count; // bump the count from this node // now descend in the hierarchy - for(size_t i = first_child(*node); i != NONE; i = next_sibling(i)) + for(id_type i = first_child(*node); i != NONE; i = next_sibling(i)) { // this child may have been relocated to a different index, // so get an updated version @@ -739,8 +528,15 @@ size_t Tree::_do_reorder(size_t *node, size_t count) return count; } +void Tree::reorder() +{ + id_type r = root_id(); + _do_reorder(&r, 0); +} + + //----------------------------------------------------------------------------- -void Tree::_swap(size_t n_, size_t m_) +void Tree::_swap(id_type n_, id_type m_) { _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); @@ -774,18 +570,18 @@ void Tree::_swap(size_t n_, size_t m_) } //----------------------------------------------------------------------------- -void Tree::_swap_hierarchy(size_t ia, size_t ib) +void Tree::_swap_hierarchy(id_type ia, id_type ib) { if(ia == ib) return; - for(size_t i = first_child(ia); i != NONE; i = next_sibling(i)) + for(id_type i = first_child(ia); i != NONE; i = next_sibling(i)) { if(i == ib || i == ia) continue; _p(i)->m_parent = ib; } - for(size_t i = first_child(ib); i != NONE; i = next_sibling(i)) + for(id_type i = first_child(ib); i != NONE; i = next_sibling(i)) { if(i == ib || i == ia) continue; @@ -871,7 +667,7 @@ void Tree::_swap_hierarchy(size_t ia, size_t ib) _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); _p(b.m_next_sibling)->m_prev_sibling = ia; } - size_t ns = b.m_next_sibling; + id_type ns = b.m_next_sibling; b.m_prev_sibling = a.m_prev_sibling; b.m_next_sibling = ia; a.m_prev_sibling = ib; @@ -890,7 +686,7 @@ void Tree::_swap_hierarchy(size_t ia, size_t ib) _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); _p(a.m_next_sibling)->m_prev_sibling = ib; } - size_t ns = b.m_prev_sibling; + id_type ns = b.m_prev_sibling; a.m_prev_sibling = b.m_prev_sibling; a.m_next_sibling = ib; b.m_prev_sibling = ia; @@ -930,12 +726,12 @@ void Tree::_swap_hierarchy(size_t ia, size_t ib) } //----------------------------------------------------------------------------- -void Tree::_copy_hierarchy(size_t dst_, size_t src_) +void Tree::_copy_hierarchy(id_type dst_, id_type src_) { auto const& C4_RESTRICT src = *_p(src_); auto & C4_RESTRICT dst = *_p(dst_); auto & C4_RESTRICT prt = *_p(src.m_parent); - for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i)) + for(id_type i = src.m_first_child; i != NONE; i = next_sibling(i)) { _p(i)->m_parent = dst_; } @@ -963,7 +759,7 @@ void Tree::_copy_hierarchy(size_t dst_, size_t src_) } //----------------------------------------------------------------------------- -void Tree::_swap_props(size_t n_, size_t m_) +void Tree::_swap_props(id_type n_, id_type m_) { NodeData &C4_RESTRICT n = *_p(n_); NodeData &C4_RESTRICT m = *_p(m_); @@ -973,7 +769,7 @@ void Tree::_swap_props(size_t n_, size_t m_) } //----------------------------------------------------------------------------- -void Tree::move(size_t node, size_t after) +void Tree::move(id_type node, id_type after) { _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, node != after); @@ -986,7 +782,7 @@ void Tree::move(size_t node, size_t after) //----------------------------------------------------------------------------- -void Tree::move(size_t node, size_t new_parent, size_t after) +void Tree::move(id_type node, id_type new_parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, node != after); @@ -999,21 +795,21 @@ void Tree::move(size_t node, size_t new_parent, size_t after) _set_hierarchy(node, new_parent, after); } -size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) +id_type Tree::move(Tree *src, id_type node, id_type new_parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, src != nullptr); _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); _RYML_CB_ASSERT(m_callbacks, new_parent != after); - size_t dup = duplicate(src, node, new_parent, after); + id_type dup = duplicate(src, node, new_parent, after); src->remove(node); return dup; } void Tree::set_root_as_stream() { - size_t root = root_id(); + id_type root = root_id(); if(is_stream(root)) return; // don't use _add_flags() because it's checked and will fail @@ -1022,7 +818,7 @@ void Tree::set_root_as_stream() if(is_val(root)) { _p(root)->m_type.add(SEQ); - size_t next_doc = append_child(root); + id_type next_doc = append_child(root); _copy_props_wo_key(next_doc, root); _p(next_doc)->m_type.add(DOC); _p(next_doc)->m_type.rem(SEQ); @@ -1031,10 +827,10 @@ void Tree::set_root_as_stream() return; } _RYML_CB_ASSERT(m_callbacks, !has_key(root)); - size_t next_doc = append_child(root); + id_type next_doc = append_child(root); _copy_props_wo_key(next_doc, root); _add_flags(next_doc, DOC); - for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + for(id_type prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) { if(ch == next_doc) break; @@ -1048,15 +844,15 @@ void Tree::set_root_as_stream() //----------------------------------------------------------------------------- -void Tree::remove_children(size_t node) +void Tree::remove_children(id_type node) { _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); - size_t ich = get(node)->m_first_child; + id_type ich = get(node)->m_first_child; while(ich != NONE) { remove_children(ich); _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); - size_t next = get(ich)->m_next_sibling; + id_type next = get(ich)->m_next_sibling; _release(ich); if(ich == get(node)->m_last_child) break; @@ -1064,7 +860,7 @@ void Tree::remove_children(size_t node) } } -bool Tree::change_type(size_t node, NodeType type) +bool Tree::change_type(id_type node, NodeType type) { _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); @@ -1083,19 +879,19 @@ bool Tree::change_type(size_t node, NodeType type) //----------------------------------------------------------------------------- -size_t Tree::duplicate(size_t node, size_t parent, size_t after) +id_type Tree::duplicate(id_type node, id_type parent, id_type after) { return duplicate(this, node, parent, after); } -size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after) +id_type Tree::duplicate(Tree const* src, id_type node, id_type parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, src != nullptr); _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, parent != NONE); _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); - size_t copy = _claim(); + id_type copy = _claim(); _copy_props(copy, src, node); _set_hierarchy(copy, parent, after); @@ -1105,20 +901,20 @@ size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after } //----------------------------------------------------------------------------- -size_t Tree::duplicate_children(size_t node, size_t parent, size_t after) +id_type Tree::duplicate_children(id_type node, id_type parent, id_type after) { return duplicate_children(this, node, parent, after); } -size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after) +id_type Tree::duplicate_children(Tree const* src, id_type node, id_type parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, src != nullptr); _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, parent != NONE); _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); - size_t prev = after; - for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + id_type prev = after; + for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i)) { prev = duplicate(src, i, parent, prev); } @@ -1127,12 +923,12 @@ size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, siz } //----------------------------------------------------------------------------- -void Tree::duplicate_contents(size_t node, size_t where) +void Tree::duplicate_contents(id_type node, id_type where) { duplicate_contents(this, node, where); } -void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) +void Tree::duplicate_contents(Tree const *src, id_type node, id_type where) { _RYML_CB_ASSERT(m_callbacks, src != nullptr); _RYML_CB_ASSERT(m_callbacks, node != NONE); @@ -1142,12 +938,12 @@ void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) } //----------------------------------------------------------------------------- -size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after) +id_type Tree::duplicate_children_no_rep(id_type node, id_type parent, id_type after) { return duplicate_children_no_rep(this, node, parent, after); } -size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after) +id_type Tree::duplicate_children_no_rep(Tree const *src, id_type node, id_type parent, id_type after) { _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, parent != NONE); @@ -1156,10 +952,10 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare // don't loop using pointers as there may be a relocation // find the position where "after" is - size_t after_pos = NONE; + id_type after_pos = NONE; if(after != NONE) { - for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) + for(id_type i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) { if(i == after) { @@ -1171,8 +967,8 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare } // for each child to be duplicated... - size_t prev = after; - for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + id_type prev = after; + for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i)) { if(is_seq(parent)) { @@ -1182,8 +978,8 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare { _RYML_CB_ASSERT(m_callbacks, is_map(parent)); // does the parent already have a node with key equal to that of the current duplicate? - size_t rep = NONE, rep_pos = NONE; - for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) + id_type rep = NONE, rep_pos = NONE; + for(id_type j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) { if(key(j) == key(i)) { @@ -1227,7 +1023,7 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare //----------------------------------------------------------------------------- -void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) +void Tree::merge_with(Tree const *src, id_type src_node, id_type dst_node) { _RYML_CB_ASSERT(m_callbacks, src != nullptr); if(src_node == NONE) @@ -1235,20 +1031,24 @@ void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) if(dst_node == NONE) dst_node = root_id(); _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); - if(src->has_val(src_node)) { + type_bits mask_src = ~STYLE; // keep the existing style if it is already a val if( ! has_val(dst_node)) { if(has_children(dst_node)) remove_children(dst_node); + mask_src |= VAL_STYLE; // copy the src style } if(src->is_keyval(src_node)) - _copy_props(dst_node, src, src_node); - else if(src->is_val(src_node)) - _copy_props_wo_key(dst_node, src, src_node); + { + _copy_props(dst_node, src, src_node, mask_src); + } else - C4_NEVER_REACH(); + { + _RYML_CB_ASSERT(m_callbacks, src->is_val(src_node)); + _copy_props_wo_key(dst_node, src, src_node, mask_src); + } } else if(src->is_seq(src_node)) { @@ -1261,16 +1061,18 @@ void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) to_seq(dst_node, src->key(src_node)); else to_seq(dst_node); + _p(dst_node)->m_type = src->_p(src_node)->m_type; } - for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) { - size_t dch = append_child(dst_node); + id_type dch = append_child(dst_node); _copy_props_wo_key(dch, src, sch); merge_with(src, sch, dch); } } - else if(src->is_map(src_node)) + else { + _RYML_CB_ASSERT(m_callbacks, src->is_map(src_node)); if( ! is_map(dst_node)) { if(has_children(dst_node)) @@ -1280,10 +1082,11 @@ void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) to_map(dst_node, src->key(src_node)); else to_map(dst_node); + _p(dst_node)->m_type = src->_p(src_node)->m_type; } - for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) { - size_t dch = find_child(dst_node, src->key(sch)); + id_type dch = find_child(dst_node, src->key(sch)); if(dch == NONE) { dch = append_child(dst_node); @@ -1292,279 +1095,42 @@ void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) merge_with(src, sch, dch); } } - else - { - C4_NEVER_REACH(); - } } //----------------------------------------------------------------------------- -namespace detail { -/** @todo make this part of the public API, refactoring as appropriate - * to be able to use the same resolver to handle multiple trees (one - * at a time) */ -struct ReferenceResolver -{ - struct refdata - { - NodeType type; - size_t node; - size_t prev_anchor; - size_t target; - size_t parent_ref; - size_t parent_ref_sibling; - }; - - Tree *t; - /** from the specs: "an alias node refers to the most recent - * node in the serialization having the specified anchor". So - * we need to start looking upward from ref nodes. - * - * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ - stack refs; - - ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks()) - { - resolve(); - } - - void store_anchors_and_refs() - { - // minimize (re-)allocations by counting first - size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id()); - if(!num_anchors_and_refs) - return; - refs.reserve(num_anchors_and_refs); - - // now descend through the hierarchy - _store_anchors_and_refs(t->root_id()); - - // finally connect the reference list - size_t prev_anchor = npos; - size_t count = 0; - for(auto &rd : refs) - { - rd.prev_anchor = prev_anchor; - if(rd.type.is_anchor()) - prev_anchor = count; - ++count; - } - } - - size_t count_anchors_and_refs(size_t n) - { - size_t c = 0; - c += t->has_key_anchor(n); - c += t->has_val_anchor(n); - c += t->is_key_ref(n); - c += t->is_val_ref(n); - for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) - c += count_anchors_and_refs(ch); - return c; - } - - void _store_anchors_and_refs(size_t n) - { - if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<")) - { - if(t->is_seq(n)) - { - // for merging multiple inheritance targets - // <<: [ *CENTER, *BIG ] - for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich)) - { - RYML_ASSERT(t->num_children(ich) == 0); - refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)}); - } - return; - } - if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs - { - RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n))); - refs.push({KEYREF, n, npos, npos, NONE, NONE}); - } - if(t->is_val_ref(n)) - { - RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n))); - refs.push({VALREF, n, npos, npos, NONE, NONE}); - } - } - if(t->has_key_anchor(n)) - { - RYML_CHECK(t->has_key(n)); - refs.push({KEYANCH, n, npos, npos, NONE, NONE}); - } - if(t->has_val_anchor(n)) - { - RYML_CHECK(t->has_val(n) || t->is_container(n)); - refs.push({VALANCH, n, npos, npos, NONE, NONE}); - } - for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) - { - _store_anchors_and_refs(ch); - } - } - - size_t lookup_(refdata *C4_RESTRICT ra) - { - RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); - RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); - csubstr refname; - if(ra->type.is_val_ref()) - { - refname = t->val_ref(ra->node); - } - else - { - RYML_ASSERT(ra->type.is_key_ref()); - refname = t->key_ref(ra->node); - } - while(ra->prev_anchor != npos) - { - ra = &refs[ra->prev_anchor]; - if(t->has_anchor(ra->node, refname)) - return ra->node; - } - - #ifndef RYML_ERRMSG_SIZE - #define RYML_ERRMSG_SIZE 1024 - #endif - - char errmsg[RYML_ERRMSG_SIZE]; - snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'", - static_cast(refname.size()), refname.data()); - c4::yml::error(errmsg); - C4_UNREACHABLE_AFTER_ERR(); - } - - void resolve() - { - store_anchors_and_refs(); - if(refs.empty()) - return; - - /* from the specs: "an alias node refers to the most recent - * node in the serialization having the specified anchor". So - * we need to start looking upward from ref nodes. - * - * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ - for(size_t i = 0, e = refs.size(); i < e; ++i) - { - auto &C4_RESTRICT rd = refs.top(i); - if( ! rd.type.is_ref()) - continue; - rd.target = lookup_(&rd); - } - } - -}; // ReferenceResolver -} // namespace detail - void Tree::resolve() { if(m_size == 0) return; + ReferenceResolver rr; + resolve(&rr); +} - detail::ReferenceResolver rr(this); - - // insert the resolved references - size_t prev_parent_ref = NONE; - size_t prev_parent_ref_after = NONE; - for(auto const& C4_RESTRICT rd : rr.refs) - { - if( ! rd.type.is_ref()) - continue; - if(rd.parent_ref != NONE) - { - _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref)); - size_t after, p = parent(rd.parent_ref); - if(prev_parent_ref != rd.parent_ref) - { - after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling); - prev_parent_ref_after = after; - } - else - { - after = prev_parent_ref_after; - } - prev_parent_ref = rd.parent_ref; - prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after); - remove(rd.node); - } - else - { - if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<") - { - _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node)); - size_t p = parent(rd.node); - size_t after = prev_sibling(rd.node); - duplicate_children_no_rep(rd.target, p, after); - remove(rd.node); - } - else if(rd.type.is_key_ref()) - { - _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node)); - _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target)); - if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node)) - { - _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); - _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); - _p(rd.node)->m_key.scalar = val(rd.target); - _add_flags(rd.node, KEY); - } - else - { - _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node)); - _p(rd.node)->m_key.scalar = key(rd.target); - _add_flags(rd.node, VAL); - } - } - else - { - _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref()); - if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node)) - { - _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); - _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); - _p(rd.node)->m_val.scalar = key(rd.target); - _add_flags(rd.node, VAL); - } - else - { - duplicate_contents(rd.target, rd.node); - } - } - } - } - - // clear anchors and refs - for(auto const& C4_RESTRICT ar : rr.refs) - { - rem_anchor_ref(ar.node); - if(ar.parent_ref != NONE) - if(type(ar.parent_ref) != NOTYPE) - remove(ar.parent_ref); - } - +void Tree::resolve(ReferenceResolver *C4_RESTRICT rr) +{ + if(m_size == 0) + return; + rr->resolve(this); } + //----------------------------------------------------------------------------- -size_t Tree::num_children(size_t node) const +id_type Tree::num_children(id_type node) const { - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) ++count; return count; } -size_t Tree::child(size_t node, size_t pos) const +id_type Tree::child(id_type node, id_type pos) const { _RYML_CB_ASSERT(m_callbacks, node != NONE); - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) { if(count++ == pos) return i; @@ -1572,17 +1138,17 @@ size_t Tree::child(size_t node, size_t pos) const return NONE; } -size_t Tree::child_pos(size_t node, size_t ch) const +id_type Tree::child_pos(id_type node, id_type ch) const { _RYML_CB_ASSERT(m_callbacks, node != NONE); - size_t count = 0; - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + id_type count = 0; + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) { if(i == ch) return count; ++count; } - return npos; + return NONE; } #if defined(__clang__) @@ -1593,9 +1159,12 @@ size_t Tree::child_pos(size_t node, size_t ch) const # if __GNUC__ >= 6 # pragma GCC diagnostic ignored "-Wnull-dereference" # endif +# if __GNUC__ > 9 +# pragma GCC diagnostic ignored "-Wanalyzer-null-dereference" +# endif #endif -size_t Tree::find_child(size_t node, csubstr const& name) const +id_type Tree::find_child(id_type node, csubstr const& name) const { _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, is_map(node)); @@ -1608,7 +1177,7 @@ size_t Tree::find_child(size_t node, csubstr const& name) const { _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); } - for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + for(id_type i = first_child(node); i != NONE; i = next_sibling(i)) { if(_p(i)->m_key.scalar == name) { @@ -1624,10 +1193,41 @@ size_t Tree::find_child(size_t node, csubstr const& name) const # pragma GCC diagnostic pop #endif +namespace { +id_type depth_desc_(Tree const& C4_RESTRICT t, id_type id, id_type currdepth=0, id_type maxdepth=0) +{ + maxdepth = currdepth > maxdepth ? currdepth : maxdepth; + for(id_type child = t.first_child(id); child != NONE; child = t.next_sibling(child)) + { + const id_type d = depth_desc_(t, child, currdepth+1, maxdepth); + maxdepth = d > maxdepth ? d : maxdepth; + } + return maxdepth; +} +} + +id_type Tree::depth_desc(id_type node) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + return depth_desc_(*this, node); +} + +id_type Tree::depth_asc(id_type node) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + id_type depth = 0; + while(!is_root(node)) + { + ++depth; + node = parent(node); + } + return depth; +} + //----------------------------------------------------------------------------- -void Tree::to_val(size_t node, csubstr val, type_bits more_flags) +void Tree::to_val(id_type node, csubstr val, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); @@ -1636,7 +1236,7 @@ void Tree::to_val(size_t node, csubstr val, type_bits more_flags) _p(node)->m_val = val; } -void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags) +void Tree::to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); @@ -1645,7 +1245,7 @@ void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags _p(node)->m_val = val; } -void Tree::to_map(size_t node, type_bits more_flags) +void Tree::to_map(id_type node, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys @@ -1654,7 +1254,7 @@ void Tree::to_map(size_t node, type_bits more_flags) _p(node)->m_val.clear(); } -void Tree::to_map(size_t node, csubstr key, type_bits more_flags) +void Tree::to_map(id_type node, csubstr key, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); @@ -1663,7 +1263,7 @@ void Tree::to_map(size_t node, csubstr key, type_bits more_flags) _p(node)->m_val.clear(); } -void Tree::to_seq(size_t node, type_bits more_flags) +void Tree::to_seq(id_type node, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); @@ -1672,7 +1272,7 @@ void Tree::to_seq(size_t node, type_bits more_flags) _p(node)->m_val.clear(); } -void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) +void Tree::to_seq(id_type node, csubstr key, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); @@ -1681,7 +1281,7 @@ void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) _p(node)->m_val.clear(); } -void Tree::to_doc(size_t node, type_bits more_flags) +void Tree::to_doc(id_type node, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _set_flags(node, DOC|more_flags); @@ -1689,7 +1289,7 @@ void Tree::to_doc(size_t node, type_bits more_flags) _p(node)->m_val.clear(); } -void Tree::to_stream(size_t node, type_bits more_flags) +void Tree::to_stream(id_type node, type_bits more_flags) { _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); _set_flags(node, STREAM|more_flags); @@ -1699,10 +1299,10 @@ void Tree::to_stream(size_t node, type_bits more_flags) //----------------------------------------------------------------------------- -size_t Tree::num_tag_directives() const +id_type Tree::num_tag_directives() const { // this assumes we have a very small number of tag directives - for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) if(m_tag_directives[i].handle.empty()) return i; return RYML_MAX_TAG_DIRECTIVES; @@ -1714,149 +1314,142 @@ void Tree::clear_tag_directives() td = {}; } -size_t Tree::add_tag_directive(TagDirective const& td) +id_type Tree::add_tag_directive(TagDirective const& td) { _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); - _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!')); - _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!')); + _RYML_CB_CHECK(m_callbacks, td.handle.begins_with('!')); + _RYML_CB_CHECK(m_callbacks, td.handle.ends_with('!')); // https://yaml.org/spec/1.2.2/#rule-ns-word-char - _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); - size_t pos = num_tag_directives(); + _RYML_CB_CHECK(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); + id_type pos = num_tag_directives(); _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); m_tag_directives[pos] = td; return pos; } -size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const +bool Tree::add_tag_directive(csubstr directive_) +{ + TagDirective td; + if(td.create_from_str(directive_, this)) + { + add_tag_directive(td); + return true; + } + return false; +} + +size_t Tree::resolve_tag(substr output, csubstr tag, id_type node_id) const { // lookup from the end. We want to find the first directive that // matches the tag and has a target node id leq than the given // node_id. - for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i) + for(id_type i = RYML_MAX_TAG_DIRECTIVES-1; i != (id_type)-1; --i) { auto const& td = m_tag_directives[i]; if(td.handle.empty()) continue; if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + return td.transform(tag, output, m_callbacks); + } + if(tag.begins_with('!')) + { + if(is_custom_tag(tag)) { - _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len); - csubstr rest = tag.sub(td.handle.len); - size_t len = 1u + td.prefix.len + rest.len + 1u; - size_t numpc = rest.count('%'); - if(numpc == 0) - { - if(len <= output.len) - { - output.str[0] = '<'; - memcpy(1u + output.str, td.prefix.str, td.prefix.len); - memcpy(1u + output.str + td.prefix.len, rest.str, rest.len); - output.str[1u + td.prefix.len + rest.len] = '>'; - } - } - else - { - // need to decode URI % sequences - size_t pos = rest.find('%'); - _RYML_CB_ASSERT(m_callbacks, pos != npos); - do { - size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); - if(next == npos) - next = rest.len; - _RYML_CB_CHECK(m_callbacks, pos+1 < next); - _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); - size_t delta = next - (pos+1); - len -= delta; - pos = rest.find('%', pos+1); - } while(pos != npos); - if(len <= output.len) - { - size_t prev = 0, wpos = 0; - auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; - auto appendchar = [&](char c) { output.str[wpos++] = c; }; - appendchar('<'); - appendstr(td.prefix); - pos = rest.find('%'); - _RYML_CB_ASSERT(m_callbacks, pos != npos); - do { - size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); - if(next == npos) - next = rest.len; - _RYML_CB_CHECK(m_callbacks, pos+1 < next); - _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); - uint8_t val; - if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) - _RYML_CB_ERR(m_callbacks, "invalid URI character"); - appendstr(rest.range(prev, pos)); - appendchar((char)val); - prev = next; - pos = rest.find('%', pos+1); - } while(pos != npos); - _RYML_CB_ASSERT(m_callbacks, pos == npos); - _RYML_CB_ASSERT(m_callbacks, prev > 0); - _RYML_CB_ASSERT(m_callbacks, rest.len >= prev); - appendstr(rest.sub(prev)); - appendchar('>'); - _RYML_CB_ASSERT(m_callbacks, wpos == len); - } - } - return len; + _RYML_CB_ERR(m_callbacks, "tag directive not found"); } } return 0; // return 0 to signal that the tag is local and cannot be resolved } namespace { -csubstr _transform_tag(Tree *t, csubstr tag, size_t node) +csubstr _transform_tag(Tree *t, csubstr tag, id_type node) { + _c4dbgpf("[{}] resolving tag ~~~{}~~~", node, tag); size_t required_size = t->resolve_tag(substr{}, tag, node); if(!required_size) + { + if(tag.begins_with("!<")) + tag = tag.sub(1); + _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, tag); return tag; - const char *prev_arena = t->arena().str; (void)prev_arena; + } + const char *prev_arena = t->arena().str;(void)prev_arena; substr buf = t->alloc_arena(required_size); _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); size_t actual_size = t->resolve_tag(buf, tag, node); _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); + _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, buf.first(actual_size)); return buf.first(actual_size); } -void _resolve_tags(Tree *t, size_t node) +void _resolve_tags(Tree *t, id_type node) { - for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) - { - if(t->has_key(child) && t->has_key_tag(child)) - t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child)); - if(t->has_val(child) && t->has_val_tag(child)) - t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child)); + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = _transform_tag(t, d->m_key.tag, node); + if(d->m_type & VALTAG) + d->m_val.tag = _transform_tag(t, d->m_val.tag, node); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) _resolve_tags(t, child); - } } -size_t _count_resolved_tags_size(Tree const* t, size_t node) +size_t _count_resolved_tags_size(Tree const* t, id_type node) { size_t sz = 0; - for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) - { - if(t->has_key(child) && t->has_key_tag(child)) - sz += t->resolve_tag(substr{}, t->key_tag(child), child); - if(t->has_val(child) && t->has_val_tag(child)) - sz += t->resolve_tag(substr{}, t->val_tag(child), child); + NodeData const* C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + sz += t->resolve_tag(substr{}, d->m_key.tag, node); + if(d->m_type & VALTAG) + sz += t->resolve_tag(substr{}, d->m_val.tag, node); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) sz += _count_resolved_tags_size(t, child); - } return sz; } +void _normalize_tags(Tree *t, id_type node) +{ + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = normalize_tag(d->m_key.tag); + if(d->m_type & VALTAG) + d->m_val.tag = normalize_tag(d->m_val.tag); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + _normalize_tags(t, child); +} +void _normalize_tags_long(Tree *t, id_type node) +{ + NodeData *C4_RESTRICT d = t->_p(node); + if(d->m_type & KEYTAG) + d->m_key.tag = normalize_tag_long(d->m_key.tag); + if(d->m_type & VALTAG) + d->m_val.tag = normalize_tag_long(d->m_val.tag); + for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + _normalize_tags_long(t, child); +} } // namespace void Tree::resolve_tags() { if(empty()) return; - if(num_tag_directives() == 0) - return; size_t needed_size = _count_resolved_tags_size(this, root_id()); if(needed_size) reserve_arena(arena_size() + needed_size); _resolve_tags(this, root_id()); } +void Tree::normalize_tags() +{ + if(empty()) + return; + _normalize_tags(this, root_id()); +} + +void Tree::normalize_tags_long() +{ + if(empty()) + return; + _normalize_tags_long(this, root_id()); +} + //----------------------------------------------------------------------------- @@ -1880,7 +1473,7 @@ void Tree::_advance(lookup_result *r, size_t more) const ++r->path_pos; } -Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const +Tree::lookup_result Tree::lookup_path(csubstr path, id_type start) const { if(start == NONE) start = root_id(); @@ -1893,9 +1486,9 @@ Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const return r; } -size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start) +id_type Tree::lookup_path_or_modify(csubstr default_value, csubstr path, id_type start) { - size_t target = _lookup_path_or_create(path, start); + id_type target = _lookup_path_or_create(path, start); if(parent_is_map(target)) to_keyval(target, key(target), default_value); else @@ -1903,14 +1496,14 @@ size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t s return target; } -size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start) +id_type Tree::lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start) { - size_t target = _lookup_path_or_create(path, start); + id_type target = _lookup_path_or_create(path, start); merge_with(src, src_node, target); return target; } -size_t Tree::_lookup_path_or_create(csubstr path, size_t start) +id_type Tree::_lookup_path_or_create(csubstr path, id_type start) { if(start == NONE) start = root_id(); @@ -1929,7 +1522,7 @@ void Tree::_lookup_path(lookup_result *r) const { C4_ASSERT( ! r->unresolved().empty()); _lookup_path_token parent{"", type(r->closest)}; - size_t node; + id_type node; do { node = _next_node(r, &parent); @@ -1947,7 +1540,7 @@ void Tree::_lookup_path_modify(lookup_result *r) { C4_ASSERT( ! r->unresolved().empty()); _lookup_path_token parent{"", type(r->closest)}; - size_t node; + id_type node; do { node = _next_node_modify(r, &parent); @@ -1961,13 +1554,13 @@ void Tree::_lookup_path_modify(lookup_result *r) } while(node != NONE); } -size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +id_type Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const { _lookup_path_token token = _next_token(r, *parent); if( ! token) return NONE; - size_t node = NONE; + id_type node = NONE; csubstr prev = token.value; if(token.type == MAP || token.type == SEQ) { @@ -1986,7 +1579,7 @@ size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const { _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); token.value = token.value.offs(1, 1).trim(' '); - size_t idx = 0; + id_type idx = 0; _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); node = child(r->closest, idx); } @@ -2010,13 +1603,13 @@ size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const return node; } -size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +id_type Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) { _lookup_path_token token = _next_token(r, *parent); if( ! token) return NONE; - size_t node = NONE; + id_type node = NONE; if(token.type == MAP || token.type == SEQ) { _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); @@ -2034,7 +1627,7 @@ size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) node = find_child(r->closest, token.value); else { - size_t pos = NONE; + id_type pos = NONE; _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); _RYML_CB_ASSERT(m_callbacks, pos != NONE); node = child(r->closest, pos); @@ -2073,7 +1666,7 @@ size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) { _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); token.value = token.value.offs(1, 1).trim(' '); - size_t idx; + id_type idx; if( ! from_chars(token.value, &idx)) return NONE; if( ! is_container(r->closest)) @@ -2095,7 +1688,7 @@ size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) if(node == NONE) { _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); - for(size_t i = num_children(r->closest); i <= idx; ++i) + for(id_type i = num_children(r->closest); i <= idx; ++i) { node = append_child(r->closest); if(i < idx) @@ -2118,7 +1711,7 @@ size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) return node; } -/** types of tokens: +/* types of tokens: * - seeing "map." ---> "map"/MAP * - finishing "scalar" ---> "scalar"/KEYVAL * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY)