Skip to content

Commit

Permalink
[C++] Optimize unicode.cpp with binary search
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekotekina committed Dec 18, 2024
1 parent dba48c1 commit c07e52d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ endif
all: compose man
mkdir -p bin
cp scripts/keyd-application-mapper bin/
$(CXX) $(CXXFLAGS) -O3 $(COMPAT_FILES) src/*.c src/*.cpp src/vkbd/$(VKBD).cpp -lpthread -o bin/keyd $(LDFLAGS)
$(CXX) $(CXXFLAGS) -O3 $(COMPAT_FILES) src/*.cpp src/vkbd/$(VKBD).cpp -lpthread -o bin/keyd $(LDFLAGS)
debug:
CFLAGS="-g -fsanitize=address -Wunused" $(MAKE)
compose:
Expand Down
30 changes: 22 additions & 8 deletions scripts/generate_xcompose
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,37 @@ open('data/keyd.compose', 'w').write(data)
# table to capitalize on codepoint contiguity, but 35k is small enough to
# warrant keeping the entire thing in memory.

open('src/unicode.c', 'w').write(f'''
open('src/unicode.cpp', 'w').write(f'''
/* GENERATED BY {sys.argv[0]}, DO NOT MODIFY BY HAND. */
#include <stdint.h>
#include <stdlib.h>
#include "keys.h"
#include <algorithm>
#include <vector>
uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }};
constexpr uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }};
int unicode_lookup_index(uint32_t codepoint)
{{
size_t i = 0;
for(i = 0; i < sizeof(unicode_table)/sizeof(unicode_table[0]); i++) {{
if (unicode_table[i] == codepoint)
return i;
}}
// Slight search optimizations: prevent CPU from fetching data from unlikely areas
constexpr auto cb = std::cbegin(unicode_table);
constexpr auto ce = std::cend(unicode_table);
constexpr auto ce2 = std::lower_bound(cb, ce, 0x1100); // before hangul
constexpr auto ce3 = std::lower_bound(cb, ce2, 0x531); // after cyrillic
constexpr auto ce4 = std::lower_bound(cb, ce3, 0x300); // after latin, modifiers
auto beg = cb;
auto end = ce;
if (codepoint < *ce4)
end = ce4;
else if (codepoint < *ce3)
beg = ce4, end = ce3;
else if (codepoint < *ce2)
beg = ce3, end = ce4;
auto res = std::lower_bound(beg, end, codepoint);
if (res != end && *res == codepoint)
return res - cb;
return -1;
}}
Expand Down

0 comments on commit c07e52d

Please sign in to comment.