From c07e52d10d215b54704ac4be7243214ef24d39f6 Mon Sep 17 00:00:00 2001 From: Ivan Chikish Date: Tue, 17 Dec 2024 16:17:59 +0300 Subject: [PATCH] [C++] Optimize unicode.cpp with binary search --- Makefile | 2 +- scripts/generate_xcompose | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 03f5e2a..ebe7493 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ endif all: compose man mkdir -p bin cp scripts/keyd-application-mapper bin/ - $(CXX) $(CXXFLAGS) -O3 $(COMPAT_FILES) src/*.c src/*.cpp src/vkbd/$(VKBD).cpp -lpthread -o bin/keyd $(LDFLAGS) + $(CXX) $(CXXFLAGS) -O3 $(COMPAT_FILES) src/*.cpp src/vkbd/$(VKBD).cpp -lpthread -o bin/keyd $(LDFLAGS) debug: CFLAGS="-g -fsanitize=address -Wunused" $(MAKE) compose: diff --git a/scripts/generate_xcompose b/scripts/generate_xcompose index d1edb6f..6084b3b 100755 --- a/scripts/generate_xcompose +++ b/scripts/generate_xcompose @@ -44,23 +44,37 @@ open('data/keyd.compose', 'w').write(data) # table to capitalize on codepoint contiguity, but 35k is small enough to # warrant keeping the entire thing in memory. -open('src/unicode.c', 'w').write(f''' +open('src/unicode.cpp', 'w').write(f''' /* GENERATED BY {sys.argv[0]}, DO NOT MODIFY BY HAND. */ #include #include #include "keys.h" + #include + #include - uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }}; + constexpr uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }}; int unicode_lookup_index(uint32_t codepoint) {{ - size_t i = 0; - - for(i = 0; i < sizeof(unicode_table)/sizeof(unicode_table[0]); i++) {{ - if (unicode_table[i] == codepoint) - return i; - }} + // Slight search optimizations: prevent CPU from fetching data from unlikely areas + constexpr auto cb = std::cbegin(unicode_table); + constexpr auto ce = std::cend(unicode_table); + constexpr auto ce2 = std::lower_bound(cb, ce, 0x1100); // before hangul + constexpr auto ce3 = std::lower_bound(cb, ce2, 0x531); // after cyrillic + constexpr auto ce4 = std::lower_bound(cb, ce3, 0x300); // after latin, modifiers + + auto beg = cb; + auto end = ce; + if (codepoint < *ce4) + end = ce4; + else if (codepoint < *ce3) + beg = ce4, end = ce3; + else if (codepoint < *ce2) + beg = ce3, end = ce4; + auto res = std::lower_bound(beg, end, codepoint); + if (res != end && *res == codepoint) + return res - cb; return -1; }}