diff --git a/ext/json/ext/generator/depend b/ext/json/ext/generator/depend index 21bdc420..14241d7f 100644 --- a/ext/json/ext/generator/depend +++ b/ext/json/ext/generator/depend @@ -1,4 +1,4 @@ generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h generator.o: generator.c $(srcdir)/../vendor/fpconv.c generator.o: generator.c $(srcdir)/../vendor/jeaiii-ltoa.h -generator.o: generator.c $(srcdir)/simd.h +generator.o: generator.c $(srcdir)/../simd/simd.h diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb index f58574a6..aaf02c77 100644 --- a/ext/json/ext/generator/extconf.rb +++ b/ext/json/ext/generator/extconf.rb @@ -9,31 +9,7 @@ $defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"] if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) - if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/ - # Try to compile a small program using NEON instructions - if have_header('arm_neon.h') - have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC') - #include - int main() { - uint8x16_t test = vdupq_n_u8(32); - return 0; - } - SRC - $defs.push("-DJSON_ENABLE_SIMD") - end - end - - if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC') - #include - int main() { - __m128i test = _mm_set1_epi8(32); - return 0; - } - SRC - $defs.push("-DJSON_ENABLE_SIMD") - end - - have_header('cpuid.h') + require_relative "../simd/conf.rb" end create_makefile 'json/ext/generator' diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 43a7f5f6..01e8badc 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -5,7 +5,7 @@ #include #include -#include "simd.h" +#include "../simd/simd.h" /* ruby api and some helpers */ @@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search) return 1; } -// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon -static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches) -{ - const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); - const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); - return mask & 0x8888888888888888ull; -} - -static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr) -{ - uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); - - // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 - // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ - const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); - - uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); - uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); - - return neon_match_mask(needs_escape); -} - static inline unsigned char search_escape_basic_neon(search_state *search) { if (RB_UNLIKELY(search->has_matches)) { @@ -380,14 +358,8 @@ static inline unsigned char search_escape_basic_neon(search_state *search) * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we * have at least one byte that needs to be escaped. */ - while (search->ptr + sizeof(uint8x16_t) <= search->end) { - uint64_t mask = neon_rules_update(search->ptr); - if (!mask) { - search->ptr += sizeof(uint8x16_t); - continue; - } - search->matches_mask = mask; + if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) { search->has_matches = true; search->chunk_base = search->ptr; search->chunk_end = search->ptr + sizeof(uint8x16_t); @@ -399,7 +371,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search) if (remaining >= SIMD_MINIMUM_THRESHOLD) { char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining); - uint64_t mask = neon_rules_update(s); + uint64_t mask = compute_chunk_mask_neon(s); if (!mask) { // Nothing to escape, ensure search_flush doesn't do anything by setting @@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search) #ifdef HAVE_SIMD_SSE2 -#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) -#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) -#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) -#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) - static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search) { int mask = search->matches_mask; @@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search) #define TARGET_SSE2 #endif -static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr) -{ - __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); - - // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 - // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ - __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); - __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); - __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); - return _mm_movemask_epi8(needs_escape); -} - static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search) { if (RB_UNLIKELY(search->has_matches)) { @@ -487,17 +442,10 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se } } - while (search->ptr + sizeof(__m128i) <= search->end) { - int needs_escape_mask = sse2_update(search->ptr); - - if (needs_escape_mask == 0) { - search->ptr += sizeof(__m128i); - continue; - } - + if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) { search->has_matches = true; - search->matches_mask = needs_escape_mask; search->chunk_base = search->ptr; + search->chunk_end = search->ptr + sizeof(__m128i); return sse2_next_match(search); } @@ -506,7 +454,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se if (remaining >= SIMD_MINIMUM_THRESHOLD) { char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining); - int needs_escape_mask = sse2_update(s); + int needs_escape_mask = compute_chunk_mask_sse2(s); if (needs_escape_mask == 0) { // Nothing to escape, ensure search_flush doesn't do anything by setting diff --git a/ext/json/ext/generator/simd.h b/ext/json/ext/generator/simd.h deleted file mode 100644 index 329c0387..00000000 --- a/ext/json/ext/generator/simd.h +++ /dev/null @@ -1,112 +0,0 @@ -typedef enum { - SIMD_NONE, - SIMD_NEON, - SIMD_SSE2 -} SIMD_Implementation; - -#ifdef JSON_ENABLE_SIMD - -#ifdef __clang__ - #if __has_builtin(__builtin_ctzll) - #define HAVE_BUILTIN_CTZLL 1 - #else - #define HAVE_BUILTIN_CTZLL 0 - #endif -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) - #define HAVE_BUILTIN_CTZLL 1 -#else - #define HAVE_BUILTIN_CTZLL 0 -#endif - -static inline uint32_t trailing_zeros64(uint64_t input) { -#if HAVE_BUILTIN_CTZLL - return __builtin_ctzll(input); -#else - uint32_t trailing_zeros = 0; - uint64_t temp = input; - while ((temp & 1) == 0 && temp > 0) { - trailing_zeros++; - temp >>= 1; - } - return trailing_zeros; -#endif -} - -static inline int trailing_zeros(int input) { - #if HAVE_BUILTIN_CTZLL - return __builtin_ctz(input); - #else - int trailing_zeros = 0; - int temp = input; - while ((temp & 1) == 0 && temp > 0) { - trailing_zeros++; - temp >>= 1; - } - return trailing_zeros; - #endif -} - -#define SIMD_MINIMUM_THRESHOLD 6 - -#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) -#include - -#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 -static SIMD_Implementation find_simd_implementation(void) { - return SIMD_NEON; -} - -#define HAVE_SIMD 1 -#define HAVE_SIMD_NEON 1 - -uint8x16x4_t load_uint8x16_4(const unsigned char *table) { - uint8x16x4_t tab; - tab.val[0] = vld1q_u8(table); - tab.val[1] = vld1q_u8(table+16); - tab.val[2] = vld1q_u8(table+32); - tab.val[3] = vld1q_u8(table+48); - return tab; -} - -#endif /* ARM Neon Support.*/ - -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) - -#ifdef HAVE_X86INTRIN_H -#include - -#define HAVE_SIMD 1 -#define HAVE_SIMD_SSE2 1 - -#ifdef HAVE_CPUID_H -#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 - -#include -#endif /* HAVE_CPUID_H */ - -static SIMD_Implementation find_simd_implementation(void) { - -#if defined(__GNUC__ ) || defined(__clang__) -#ifdef __GNUC__ - __builtin_cpu_init(); -#endif /* __GNUC__ */ - - // TODO Revisit. I think the SSE version now only uses SSE2 instructions. - if (__builtin_cpu_supports("sse2")) { - return SIMD_SSE2; - } -#endif /* __GNUC__ || __clang__*/ - - return SIMD_NONE; -} - -#endif /* HAVE_X86INTRIN_H */ -#endif /* X86_64 Support */ - -#endif /* JSON_ENABLE_SIMD */ - -#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED -static SIMD_Implementation find_simd_implementation(void) { - return SIMD_NONE; -} -#endif diff --git a/ext/json/ext/parser/depend b/ext/json/ext/parser/depend index c051a244..a1926b7d 100644 --- a/ext/json/ext/parser/depend +++ b/ext/json/ext/parser/depend @@ -1 +1,2 @@ parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h +parser.o: parser.c $(srcdir)/../simd/simd.h diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index 09c96377..0b62fd61 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -8,4 +8,8 @@ append_cflags("-std=c99") +if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"]) + require_relative "../simd/conf.rb" +end + create_makefile 'json/ext/parser' diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 627971eb..d7796948 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -20,6 +20,8 @@ typedef unsigned char _Bool; #endif #endif +#include "../simd/simd.h" + #ifndef RB_UNLIKELY #define RB_UNLIKELY(expr) expr #endif @@ -879,7 +881,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig * return value; } -static const bool string_scan[256] = { +static const bool string_scan_table[256] = { // ASCII Control Characters 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -892,32 +894,71 @@ static const bool string_scan[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +#if (defined(__GNUC__ ) || defined(__clang__)) +#define FORCE_INLINE __attribute__((always_inline)) +#else +#define FORCE_INLINE +#endif + +#ifdef HAVE_SIMD +static SIMD_Implementation simd_impl = SIMD_NONE; +#endif /* HAVE_SIMD */ + +static inline bool FORCE_INLINE string_scan(JSON_ParserState *state) +{ +#ifdef HAVE_SIMD +#if defined(HAVE_SIMD_NEON) + + uint64_t mask = 0; + if (string_scan_simd_neon(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros64(mask) >> 2; + return 1; + } + +#elif defined(HAVE_SIMD_SSE2) + if (simd_impl == SIMD_SSE2) { + int mask = 0; + if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) { + state->cursor += trailing_zeros(mask); + return 1; + } + } +#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */ +#endif /* HAVE_SIMD */ + + while (state->cursor < state->end) { + if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) { + return 1; + } + *state->cursor++; + } + return 0; +} + static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name) { state->cursor++; const char *start = state->cursor; bool escaped = false; - while (state->cursor < state->end) { - if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) { - switch (*state->cursor) { - case '"': { - VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name); - state->cursor++; - return json_push_value(state, config, string); - } - case '\\': { - state->cursor++; - escaped = true; - if ((unsigned char)*state->cursor < 0x20) { - raise_parse_error("invalid ASCII control character in string: %s", state); - } - break; - } - default: + while (RB_UNLIKELY(string_scan(state))) { + switch (*state->cursor) { + case '"': { + VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name); + state->cursor++; + return json_push_value(state, config, string); + } + case '\\': { + state->cursor++; + escaped = true; + if ((unsigned char)*state->cursor < 0x20) { raise_parse_error("invalid ASCII control character in string: %s", state); - break; + } + break; } + default: + raise_parse_error("invalid ASCII control character in string: %s", state); + break; } state->cursor++; @@ -1459,4 +1500,8 @@ void Init_parser(void) binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); enc_utf8 = rb_utf8_encoding(); + +#ifdef HAVE_SIMD + simd_impl = find_simd_implementation(); +#endif } diff --git a/ext/json/ext/simd/conf.rb b/ext/json/ext/simd/conf.rb new file mode 100644 index 00000000..6393cf78 --- /dev/null +++ b/ext/json/ext/simd/conf.rb @@ -0,0 +1,25 @@ +if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/ + # Try to compile a small program using NEON instructions + if have_header('arm_neon.h') + have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC') + #include + int main() { + uint8x16_t test = vdupq_n_u8(32); + return 0; + } + SRC + $defs.push("-DJSON_ENABLE_SIMD") + end +end + +if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC') + #include + int main() { + __m128i test = _mm_set1_epi8(32); + return 0; + } + SRC + $defs.push("-DJSON_ENABLE_SIMD") +end + +have_header('cpuid.h') diff --git a/ext/json/ext/simd/simd.h b/ext/json/ext/simd/simd.h new file mode 100644 index 00000000..ed2a6d46 --- /dev/null +++ b/ext/json/ext/simd/simd.h @@ -0,0 +1,189 @@ +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifdef JSON_ENABLE_SIMD + +#ifdef __clang__ + #if __has_builtin(__builtin_ctzll) + #define HAVE_BUILTIN_CTZLL 1 + #else + #define HAVE_BUILTIN_CTZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define HAVE_BUILTIN_CTZLL 1 +#else + #define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) { +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) { + #if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); + #else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; + #endif +} + +#if (defined(__GNUC__ ) || defined(__clang__)) +#define FORCE_INLINE __attribute__((always_inline)) +#else +#define FORCE_INLINE +#endif + + +#define SIMD_MINIMUM_THRESHOLD 6 + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +static SIMD_Implementation find_simd_implementation(void) { + return SIMD_NEON; +} + +#define HAVE_SIMD 1 +#define HAVE_SIMD_NEON 1 + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches) +{ + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr) +{ + uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr); + + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33)); + + uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\')); + uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash); + return neon_match_mask(needs_escape); +} + +static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask) +{ + while(*ptr + sizeof(uint8x16_t) <= end) { + uint64_t chunk_mask = compute_chunk_mask_neon(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(uint8x16_t); + } + return 0; +} + +uint8x16x4_t load_uint8x16_4(const unsigned char *table) { + uint8x16x4_t tab; + tab.val[0] = vld1q_u8(table); + tab.val[1] = vld1q_u8(table+16); + tab.val[2] = vld1q_u8(table+32); + tab.val[3] = vld1q_u8(table+48); + return tab; +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include + +#define HAVE_SIMD 1 +#define HAVE_SIMD_SSE2 1 + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#if defined(__clang__) || defined(__GNUC__) +#define TARGET_SSE2 __attribute__((target("sse2"))) +#else +#define TARGET_SSE2 +#endif + +#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr) +{ + __m128i chunk = _mm_loadu_si128((__m128i const*)ptr); + // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33 + // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ + __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33)); + __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\')); + __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash); + return _mm_movemask_epi8(needs_escape); +} + +static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask) +{ + while (*ptr + sizeof(__m128i) <= end) { + int chunk_mask = compute_chunk_mask_sse2(*ptr); + if (chunk_mask) { + *mask = chunk_mask; + return 1; + } + *ptr += sizeof(__m128i); + } + + return 0; +} + +#include +#endif /* HAVE_CPUID_H */ + +static SIMD_Implementation find_simd_implementation(void) { + +#if defined(__GNUC__ ) || defined(__clang__) +#ifdef __GNUC__ + __builtin_cpu_init(); +#endif /* __GNUC__ */ + + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } +#endif /* __GNUC__ || __clang__*/ + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* JSON_ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +static SIMD_Implementation find_simd_implementation(void) { + return SIMD_NONE; +} +#endif diff --git a/json.gemspec b/json.gemspec index 943c78aa..07426363 100644 --- a/json.gemspec +++ b/json.gemspec @@ -52,7 +52,7 @@ spec = Gem::Specification.new do |s| s.files += Dir["lib/json/ext/**/*.jar"] else s.extensions = Dir["ext/json/**/extconf.rb"] - s.files += Dir["ext/json/**/*.{c,h}"] + s.files += Dir["ext/json/**/*.{c,h,rb}"] end end diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 739a4cf6..106492e1 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -469,6 +469,90 @@ def test_backslash json = '["\/"]' data = [ '/' ] assert_equal data, parse(json) + + data = ['"""""""""""""""""""""""""'] + json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]' + assert_equal data, parse(json) + + data = '["This is a "test" of the emergency broadcast system."]' + json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\"" + assert_equal data, parse(json) + + data = '\tThis is a test of the emergency broadcast system.' + json = "\"\\\\tThis is a test of the emergency broadcast system.\"" + assert_equal data, parse(json) + + data = 'This\tis a test of the emergency broadcast system.' + json = "\"This\\\\tis a test of the emergency broadcast system.\"" + assert_equal data, parse(json) + + data = 'This is\ta test of the emergency broadcast system.' + json = "\"This is\\\\ta test of the emergency broadcast system.\"" + assert_equal data, parse(json) + + data = 'This is a test of the emergency broadcast\tsystem.' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\"" + assert_equal data, parse(json) + + data = 'This is a test of the emergency broadcast\tsystem.\n' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\"" + assert_equal data, parse(json) + + data = '"' * 15 + json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\"" + assert_equal data, parse(json) + + data = "\"\"\"\"\"\"\"\"\"\"\"\"\"\"a" + json = "\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"\\\"a\"" + assert_equal data, parse(json) + + data = "\u0001\u0001\u0001\u0001" + json = "\"\\u0001\\u0001\\u0001\\u0001\"" + assert_equal data, parse(json) + + data = "\u0001a\u0001a\u0001a\u0001a" + json = "\"\\u0001a\\u0001a\\u0001a\\u0001a\"" + assert_equal data, parse(json) + + data = "\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\"" + assert_equal data, parse(json) + + data = "\u0001aa\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\\u0001aa\"" + assert_equal data, parse(json) + + data = "\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa\u0001aa" + json = "\"\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\\u0001aa\"" + assert_equal data, parse(json) + + data = "\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002\u0001a\u0002" + json = "\"\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\\u0001a\\u0002\"" + assert_equal data, parse(json) + + data = "ab\u0002c" + json = "\"ab\\u0002c\"" + assert_equal data, parse(json) + + data = "ab\u0002cab\u0002cab\u0002cab\u0002c" + json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002c\"" + assert_equal data, parse(json) + + data = "ab\u0002cab\u0002cab\u0002cab\u0002cab\u0002cab\u0002c" + json = "\"ab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002cab\\u0002c\"" + assert_equal data, parse(json) + + data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f" + json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\"" + assert_equal data, parse(json) + + data = "\n\t\f\b\n\t\f\b\n\t\f\b\n\t\f\b" + json = "\"\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\"" + assert_equal data, parse(json) + + data = "a\n\t\f\b\n\t\f\b\n\t\f\b\n\t" + json = "\"a\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\\f\\b\\n\\t\"" + assert_equal data, parse(json) end class SubArray < Array