From 352169cc95a184f1bde02f93ebe9ce47c60e223c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 31 Oct 2022 14:26:43 +0000 Subject: [PATCH 01/10] Do not hard-code GCC as compiler --- Release/makefile | 7 ++++--- Release/src/subdir.mk | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Release/makefile b/Release/makefile index 9b2dbc1..c5a9237 100644 --- a/Release/makefile +++ b/Release/makefile @@ -1,6 +1,7 @@ -include ../makefile.init RM := rm -rf +CC ?= gcc -include sources.mk -include src/subdir.mk @@ -32,8 +33,8 @@ all: libcleri # Tool invocations libcleri: $(OBJS) $(USER_OBJS) @echo 'Building target: $@' - @echo 'Invoking: Cross GCC Linker' - gcc -shared -Wl,-$(SO_NAME),$(FN).$(MAJOR) -o $(FN) $(OBJS) $(USER_OBJS) $(LIBS) $(LDFLAGS) + @echo "Invoking: Cross $(CC) Linker" + $(CC) -shared -Wl,-$(SO_NAME),$(FN).$(MAJOR) -o $(FN) $(OBJS) $(USER_OBJS) $(LIBS) $(LDFLAGS) @chmod -x $(FN) @echo 'Finished building target: $@' @echo ' ' @@ -49,4 +50,4 @@ clean: -include ../makefile.targets test: - @cd ../test && ./test.sh \ No newline at end of file + @cd ../test && ./test.sh diff --git a/Release/src/subdir.mk b/Release/src/subdir.mk index fe38605..0b88c34 100644 --- a/Release/src/subdir.mk +++ b/Release/src/subdir.mk @@ -73,7 +73,7 @@ C_DEPS += \ src/%.o: ../src/%.c @echo 'Building file: $<' - @echo 'Invoking: Cross GCC Compiler' - gcc -DNDEBUG -I../inc -O3 -Winline -Wall $(CPPFLAGS) $(CFLAGS) -c -fmessage-length=0 -fPIC -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" + @echo "Invoking: Cross $(CC) Compiler" + $(CC) -DNDEBUG -I../inc -O3 -Winline -Wall $(CPPFLAGS) $(CFLAGS) -c -fmessage-length=0 -fPIC -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" -o "$@" "$<" @echo 'Finished building: $<' @echo ' ' From 0d492754f291c9fb99d868bec56f41551f54e1e8 Mon Sep 17 00:00:00 2001 From: Jeroen van der Heijden Date: Thu, 26 Oct 2023 11:31:57 +0200 Subject: [PATCH 02/10] version in makefile according lib version --- makefile.init | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/makefile.init b/makefile.init index bf9d122..0c2b8c8 100644 --- a/makefile.init +++ b/makefile.init @@ -1,4 +1,4 @@ -MAJOR := 0 -MINOR := 12 -PATCH := 2 -VERSION := $(MAJOR).$(MINOR).$(PATCH) \ No newline at end of file +MAJOR := 1 +MINOR := 0 +PATCH := 1 +VERSION := $(MAJOR).$(MINOR).$(PATCH) From 8f6cbbccdced7a81c6bdd2cc051499d0e0d4c2f3 Mon Sep 17 00:00:00 2001 From: Jeroen van der Heijden Date: Thu, 26 Oct 2023 15:09:32 +0200 Subject: [PATCH 03/10] Upd ver and changelog; applied path by Helmut Grohne --- debian/changelog | 15 +++++++++++++++ debian/rules | 2 +- inc/cleri/version.h | 2 +- makefile.init | 2 +- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index affef18..c07c839 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,18 @@ +libcleri (1.0.2-0~tt1) unstable; urgency=medium + + * Fixed makefile to correct version. + + -- Jeroen van der Heijden Thu, 26 Oct 2023 15:03:14 +0200 + +libcleri (1.0.1-0~tt1.1) UNRELEASED; urgency=medium + + * Non-maintainer upload. + * Fix FTCBFS: (Closes: #-1) + + Let dh_auto_build pass cross tools to make. + + cross.patch: Make gcc substitutable. + + -- Helmut Grohne Sun, 17 May 2020 12:14:38 +0200 + libcleri (1.0.1-0~tt1) unstable; urgency=medium * Changed company name to Cesbit. diff --git a/debian/rules b/debian/rules index 1ca834a..422408d 100755 --- a/debian/rules +++ b/debian/rules @@ -12,7 +12,7 @@ export SONAME=0 dh $@ override_dh_auto_build-arch: - $(MAKE) --directory=Release all + dh_auto_build --sourcedirectory=Release -- all override_dh_auto_clean: $(MAKE) --directory=Release clean diff --git a/inc/cleri/version.h b/inc/cleri/version.h index bcb7a32..a7eea62 100644 --- a/inc/cleri/version.h +++ b/inc/cleri/version.h @@ -6,7 +6,7 @@ #define CLERI_VERSION_MAJOR 1 #define CLERI_VERSION_MINOR 0 -#define CLERI_VERSION_PATCH 1 +#define CLERI_VERSION_PATCH 2 #define VERSION__STRINGIFY(num) #num #define VERSION___STR(major,minor,patch) \ diff --git a/makefile.init b/makefile.init index 0c2b8c8..fed5bb2 100644 --- a/makefile.init +++ b/makefile.init @@ -1,4 +1,4 @@ MAJOR := 1 MINOR := 0 -PATCH := 1 +PATCH := 2 VERSION := $(MAJOR).$(MINOR).$(PATCH) From 2964b6ce6ee57dac5d6dfd13f5a3a351c568fb2f Mon Sep 17 00:00:00 2001 From: Jeroen van der Heijden Date: Thu, 26 Oct 2023 15:37:12 +0200 Subject: [PATCH 04/10] upd changelod and version check --- debian/changelog | 1 + test/test_version/test_version.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index c07c839..7d554a1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ libcleri (1.0.2-0~tt1) unstable; urgency=medium * Fixed makefile to correct version. + * Do not hard-code GCC as compiler #24, @giordano -- Jeroen van der Heijden Thu, 26 Oct 2023 15:03:14 +0200 diff --git a/test/test_version/test_version.c b/test/test_version/test_version.c index 3ad2622..7f0be5a 100644 --- a/test/test_version/test_version.c +++ b/test/test_version/test_version.c @@ -5,7 +5,7 @@ static int test_version(void) { test_start("version"); - _assert ( strcmp(cleri_version(), "1.0.1") == 0 ); + _assert ( strcmp(cleri_version(), "1.0.2") == 0 ); return test_end(); } From afa3f7b309b6693b97554754fb5bb7c8c6c71f10 Mon Sep 17 00:00:00 2001 From: James Kermode Date: Wed, 27 Sep 2023 17:29:19 +0100 Subject: [PATCH 05/10] add meson build script --- meson.build | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 meson.build diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..5c2e59f --- /dev/null +++ b/meson.build @@ -0,0 +1,32 @@ +inc_dir = include_directories('inc') + +libcleri = static_library('cleri', + 'src/children.c', + 'src/choice.c', + 'src/dup.c', + 'src/expecting.c', + 'src/grammar.c', + 'src/keyword.c', + 'src/kwcache.c', + 'src/list.c', + 'src/node.c', + 'src/cleri.c', + 'src/olist.c', + 'src/optional.c', + 'src/parse.c', + 'src/prio.c', + 'src/ref.c', + 'src/regex.c', + 'src/repeat.c', + 'src/rule.c', + 'src/sequence.c', + 'src/this.c', + 'src/token.c', + 'src/tokens.c', + 'src/version.c', + dependencies: pcre2, + include_directories: inc_dir) + +cleri = declare_dependency( + link_with: libcleri, + include_directories: inc_dir) \ No newline at end of file From bb1a7c348f4f5bd125113c283e6ab31f1cbe3dfd Mon Sep 17 00:00:00 2001 From: James Kermode Date: Fri, 31 Oct 2025 21:21:44 +0000 Subject: [PATCH 06/10] Add minimal Windows/MSVC compatibility for __attribute__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a minimal compatibility header wincompat.h that makes __attribute__ a no-op on MSVC compilers, which don't support it. This is the only compatibility fix needed on top of upstream/master, as the ssize_t issue was already resolved upstream by changing return types to uint8_t in kwcache.h. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- inc/cleri/cleri.h | 2 ++ inc/cleri/wincompat.h | 12 ++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 inc/cleri/wincompat.h diff --git a/inc/cleri/cleri.h b/inc/cleri/cleri.h index eabe606..c7d70ac 100644 --- a/inc/cleri/cleri.h +++ b/inc/cleri/cleri.h @@ -4,6 +4,8 @@ #ifndef CLERI_OBJECT_H_ #define CLERI_OBJECT_H_ +#include + #ifdef __cplusplus #define cleri__malloc(__t) ((__t*)malloc(sizeof(__t))) #else diff --git a/inc/cleri/wincompat.h b/inc/cleri/wincompat.h new file mode 100644 index 0000000..ba801b1 --- /dev/null +++ b/inc/cleri/wincompat.h @@ -0,0 +1,12 @@ +/* + * wincompat.h - Windows/MSVC compatibility definitions + */ +#ifndef CLERI_WINCOMPAT_H_ +#define CLERI_WINCOMPAT_H_ + +/* MSVC doesn't support __attribute__, make it a no-op */ +#ifdef _MSC_VER +#define __attribute__(x) +#endif + +#endif /* CLERI_WINCOMPAT_H_ */ From 5cbb6b94700c914a9527f4c655dc09a3ccf1b252 Mon Sep 17 00:00:00 2001 From: James Kermode Date: Fri, 31 Oct 2025 21:29:40 +0000 Subject: [PATCH 07/10] Fix meson.build: remove deleted children.c source file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The children.c file was removed in commit 3894be2 ("no more children type") but the meson.build file was not updated. This commit removes the reference to the deleted source file from the static library sources list. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/meson.build b/meson.build index 5c2e59f..f83d8d4 100644 --- a/meson.build +++ b/meson.build @@ -1,7 +1,6 @@ inc_dir = include_directories('inc') libcleri = static_library('cleri', - 'src/children.c', 'src/choice.c', 'src/dup.c', 'src/expecting.c', From e36632af172b8458e256222d9b06b9ac6ef637d3 Mon Sep 17 00:00:00 2001 From: James Kermode Date: Fri, 31 Oct 2025 22:34:32 +0000 Subject: [PATCH 08/10] Add strncasecmp compatibility for MSVC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MSVC doesn't have strncasecmp, it uses _strnicmp instead. This adds a define to map strncasecmp to _strnicmp on MSVC. Fixes Windows linking error: unresolved external symbol strncasecmp 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- inc/cleri/wincompat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inc/cleri/wincompat.h b/inc/cleri/wincompat.h index ba801b1..f8dab5d 100644 --- a/inc/cleri/wincompat.h +++ b/inc/cleri/wincompat.h @@ -4,9 +4,10 @@ #ifndef CLERI_WINCOMPAT_H_ #define CLERI_WINCOMPAT_H_ -/* MSVC doesn't support __attribute__, make it a no-op */ +/* MSVC compatibility definitions */ #ifdef _MSC_VER #define __attribute__(x) +#define strncasecmp _strnicmp #endif #endif /* CLERI_WINCOMPAT_H_ */ From afc3f0140b79db519fb5ce7661e6bba7a921a73c Mon Sep 17 00:00:00 2001 From: James Kermode Date: Fri, 31 Oct 2025 22:44:47 +0000 Subject: [PATCH 09/10] Add libcleri.a static library target to makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The makefile only had targets for the shared library, but the Python package workflow needs to build the static library. This adds an ar target to create libcleri.a from the object files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Release/makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Release/makefile b/Release/makefile index c5a9237..3dd7bc0 100644 --- a/Release/makefile +++ b/Release/makefile @@ -39,6 +39,13 @@ libcleri: $(OBJS) $(USER_OBJS) @echo 'Finished building target: $@' @echo ' ' +# Static library target +libcleri.a: $(OBJS) $(USER_OBJS) + @echo 'Building static library: $@' + ar rcs $@ $(OBJS) $(USER_OBJS) + @echo 'Finished building static library: $@' + @echo ' ' + # Other Targets clean: -$(RM) $(LIBRARIES)$(OBJS)$(C_DEPS) $(FN) From 31a507b3c508fefdb5679bac508146b156c49134 Mon Sep 17 00:00:00 2001 From: James Kermode Date: Thu, 30 Apr 2026 09:19:15 +0100 Subject: [PATCH 10/10] perf: JIT-compile PCRE2 patterns when supported Add pcre2_jit_compile(re, PCRE2_JIT_COMPLETE) immediately after each pcre2_compile() call: - src/regex.c: per-rule regex objects (cleri_regex) - src/grammar.c: the global keyword-matching pattern pcre2_match() automatically uses the JIT'd code paths when present and silently falls back to the interpreter otherwise, so this is a strict perf win where supported and a no-op everywhere else (e.g. PCRE2 builds without --enable-jit, in which case pcre2_jit_compile returns PCRE2_ERROR_JIT_BADOPTION which we ignore). Measured against the libAtoms/extxyz benchmark (ase.io.read on a trajectory of N atoms, comparing extxyz's cextxyz path to ASE's built-in regex extxyz reader), the per-atom JIT in extxyz.c gave ~1.85x; this libcleri-side JIT contributes additional speedup on files with rich comment-line info dicts where cleri grammar walking + regex matching dominates. --- src/grammar.c | 17 +++++++++++++++++ src/regex.c | 25 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/src/grammar.c b/src/grammar.c index 57ca171..a272bd8 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -6,9 +6,18 @@ #include #include #include +#include #include #include +/* See comment in src/regex.c: skip PCRE2 JIT under valgrind to avoid + * its known JIT-related false-positive uninitialised-value warnings. */ +static int cleri__grammar_under_valgrind(void) +{ + const char *p = getenv("LD_PRELOAD"); + return p != NULL && strstr(p, "valgrind") != NULL; +} + /* * Returns a grammar object or NULL in case of an error. * @@ -58,6 +67,14 @@ cleri_grammar_t * cleri_grammar(cleri_t * start, const char * re_keywords) return NULL; } + /* JIT-compile the keyword-matching pattern; significant speedup on + * long inputs. Falls through silently when JIT is unavailable. + * Skipped under valgrind (see regex.c). */ + if (!cleri__grammar_under_valgrind()) + { + (void) pcre2_jit_compile(grammar->re_keywords, PCRE2_JIT_COMPLETE); + } + grammar->match_data = \ pcre2_match_data_create_from_pattern(grammar->re_keywords, NULL); diff --git a/src/regex.c b/src/regex.c index 929b39e..cde35ca 100644 --- a/src/regex.c +++ b/src/regex.c @@ -4,9 +4,21 @@ #include #include #include +#include #include #include +/* PCRE2 JIT'd code intentionally reads bytes past the input end as a + * speed trick, which valgrind reports as "uninitialised value" reads + * (the PCRE2 docs warn about this). Detect valgrind via the LD_PRELOAD + * libs it injects on Linux and skip JIT compilation in that case. + * Outside of valgrind this is a no-op. */ +static int cleri__under_valgrind(void) +{ + const char *p = getenv("LD_PRELOAD"); + return p != NULL && strstr(p, "valgrind") != NULL; +} + static void regex__free(cleri_t * cl_object); static cleri_node_t * regex__parse( @@ -76,6 +88,19 @@ cleri_t * cleri_regex(uint32_t gid, const char * pattern) return NULL; } + /* JIT-compile the pattern when supported. The hot regex__parse loop + * runs pcre2_match() many times per parse — with JIT enabled it is + * typically 5-30x faster. Falls through silently when JIT is not + * available in the linked PCRE2 build (returns + * PCRE2_ERROR_JIT_BADOPTION); pcre2_match auto-detects whether JIT + * compiled code is present. Skipped under valgrind to avoid known + * JIT-vs-valgrind false-positive reports. + */ + if (!cleri__under_valgrind()) + { + (void) pcre2_jit_compile(cl_object->via.regex->regex, PCRE2_JIT_COMPLETE); + } + cl_object->via.regex->match_data = pcre2_match_data_create_from_pattern( cl_object->via.regex->regex, NULL);