Skip to content

Optimize 'json_parse_string' using ARM Neon. #816

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/json/ext/generator/depend
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h
generator.o: generator.c $(srcdir)/../vendor/fpconv.c
generator.o: generator.c $(srcdir)/../vendor/jeaiii-ltoa.h
generator.o: generator.c $(srcdir)/simd.h
generator.o: generator.c $(srcdir)/../simd/simd.h
26 changes: 1 addition & 25 deletions ext/json/ext/generator/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,7 @@
$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]

if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
# Try to compile a small program using NEON instructions
if have_header('arm_neon.h')
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
#include <arm_neon.h>
int main() {
uint8x16_t test = vdupq_n_u8(32);
return 0;
}
SRC
$defs.push("-DJSON_ENABLE_SIMD")
end
end

if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
#include <x86intrin.h>
int main() {
__m128i test = _mm_set1_epi8(32);
return 0;
}
SRC
$defs.push("-DJSON_ENABLE_SIMD")
end

have_header('cpuid.h')
require_relative "../simd/conf.rb"
end

create_makefile 'json/ext/generator'
Expand Down
64 changes: 6 additions & 58 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <math.h>
#include <ctype.h>

#include "simd.h"
#include "../simd/simd.h"

/* ruby api and some helpers */

Expand Down Expand Up @@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
return 1;
}

// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
{
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
return mask & 0x8888888888888888ull;
}

static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
{
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);

// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));

uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);

return neon_match_mask(needs_escape);
}

static inline unsigned char search_escape_basic_neon(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
Expand Down Expand Up @@ -380,14 +358,8 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
* have at least one byte that needs to be escaped.
*/
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
uint64_t mask = neon_rules_update(search->ptr);

if (!mask) {
search->ptr += sizeof(uint8x16_t);
continue;
}
search->matches_mask = mask;
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
search->has_matches = true;
search->chunk_base = search->ptr;
search->chunk_end = search->ptr + sizeof(uint8x16_t);
Expand All @@ -399,7 +371,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);

uint64_t mask = neon_rules_update(s);
uint64_t mask = compute_chunk_mask_neon(s);

if (!mask) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
Expand Down Expand Up @@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)

#ifdef HAVE_SIMD_SSE2

#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)

static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
{
int mask = search->matches_mask;
Expand All @@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
#define TARGET_SSE2
#endif

static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
{
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);

// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
return _mm_movemask_epi8(needs_escape);
}

static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
{
if (RB_UNLIKELY(search->has_matches)) {
Expand All @@ -487,17 +442,10 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
}
}

while (search->ptr + sizeof(__m128i) <= search->end) {
int needs_escape_mask = sse2_update(search->ptr);

if (needs_escape_mask == 0) {
search->ptr += sizeof(__m128i);
continue;
}

if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
search->has_matches = true;
search->matches_mask = needs_escape_mask;
search->chunk_base = search->ptr;
search->chunk_end = search->ptr + sizeof(__m128i);
return sse2_next_match(search);
}

Expand All @@ -506,7 +454,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);

int needs_escape_mask = sse2_update(s);
int needs_escape_mask = compute_chunk_mask_sse2(s);

if (needs_escape_mask == 0) {
// Nothing to escape, ensure search_flush doesn't do anything by setting
Expand Down
112 changes: 0 additions & 112 deletions ext/json/ext/generator/simd.h

This file was deleted.

1 change: 1 addition & 0 deletions ext/json/ext/parser/depend
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h
parser.o: parser.c $(srcdir)/../simd/simd.h
4 changes: 4 additions & 0 deletions ext/json/ext/parser/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@

append_cflags("-std=c99")

if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
require_relative "../simd/conf.rb"
end

create_makefile 'json/ext/parser'
Loading