Skip to content

Commit d979a1c

Browse files
committed
Add BNF syntax highlighting
1 parent 4a8311a commit d979a1c

File tree

6 files changed

+230
-3
lines changed

6 files changed

+230
-3
lines changed

llamafile/highlight.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,5 +225,10 @@ Highlight *Highlight::create(const std::string_view &lang) {
225225
if (lang == "cmake")
226226
return new HighlightCmake;
227227

228+
if (lang == "bnf" || //
229+
lang == "abnf" || //
230+
lang == "grammar")
231+
return new HighlightBnf;
232+
228233
return nullptr;
229234
}

llamafile/highlight.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -766,3 +766,15 @@ class HighlightCmake : public Highlight {
766766
int spaces_ = 0;
767767
std::string word_;
768768
};
769+
770+
class HighlightBnf : public Highlight {
771+
public:
772+
HighlightBnf();
773+
~HighlightBnf() override;
774+
void feed(std::string *result, std::string_view input) override;
775+
void flush(std::string *result) override;
776+
777+
private:
778+
int t_ = 0;
779+
std::string operator_;
780+
};

llamafile/highlight_bnf.cpp

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#include "highlight.h"
19+
20+
enum {
21+
NORMAL,
22+
COMMENT,
23+
DQUOTE,
24+
DQUOTE_BACKSLASH,
25+
ESCAPE,
26+
ESCAPE_HEX,
27+
ESCAPE_HEX1,
28+
OPERATOR,
29+
};
30+
31+
static bool is_operator(const std::string &op) {
32+
return op == "::=" || //
33+
op == "|" || //
34+
op == "?" || //
35+
op == "*" || //
36+
op == "+";
37+
}
38+
39+
static bool is_operator_char(int c) {
40+
switch (c) {
41+
case '!':
42+
case '$':
43+
case '%':
44+
case '&':
45+
case '\'':
46+
case '*':
47+
case '+':
48+
case ',':
49+
case '-':
50+
case '.':
51+
case '/':
52+
case ':':
53+
case '=':
54+
case '?':
55+
case '@':
56+
case '^':
57+
case '_':
58+
case '`':
59+
case '|':
60+
case '~':
61+
return true;
62+
default:
63+
return false;
64+
}
65+
}
66+
67+
HighlightBnf::HighlightBnf() {
68+
}
69+
70+
HighlightBnf::~HighlightBnf() {
71+
}
72+
73+
void HighlightBnf::feed(std::string *r, std::string_view input) {
74+
int c;
75+
for (size_t i = 0; i < input.size(); ++i) {
76+
c = input[i] & 255;
77+
78+
switch (t_) {
79+
80+
Normal:
81+
case NORMAL:
82+
if (c == '#' || c == ';') {
83+
t_ = COMMENT;
84+
*r += HI_COMMENT;
85+
*r += c;
86+
} else if (c == '"') {
87+
t_ = DQUOTE;
88+
*r += HI_STRING;
89+
*r += '"';
90+
} else if (c == '\\') {
91+
t_ = ESCAPE;
92+
*r += HI_ESCAPE;
93+
*r += '\\';
94+
} else if (is_operator_char(c)) {
95+
operator_ += c;
96+
t_ = OPERATOR;
97+
} else {
98+
*r += c;
99+
}
100+
break;
101+
102+
case OPERATOR:
103+
if (is_operator_char(c)) {
104+
operator_ += c;
105+
} else {
106+
if (is_operator(operator_)) {
107+
*r += HI_OPERATOR;
108+
*r += operator_;
109+
*r += HI_RESET;
110+
} else {
111+
*r += operator_;
112+
}
113+
operator_.clear();
114+
t_ = NORMAL;
115+
goto Normal;
116+
}
117+
break;
118+
119+
case ESCAPE:
120+
*r += c;
121+
if (c == 'x') {
122+
t_ = ESCAPE_HEX;
123+
} else {
124+
*r += HI_RESET;
125+
t_ = NORMAL;
126+
}
127+
break;
128+
129+
case ESCAPE_HEX:
130+
if (isxdigit(c)) {
131+
*r += c;
132+
t_ = ESCAPE_HEX1;
133+
} else {
134+
*r += HI_RESET;
135+
t_ = NORMAL;
136+
goto Normal;
137+
}
138+
break;
139+
140+
case ESCAPE_HEX1:
141+
if (isxdigit(c)) {
142+
*r += c;
143+
*r += HI_RESET;
144+
t_ = NORMAL;
145+
} else {
146+
*r += HI_RESET;
147+
t_ = NORMAL;
148+
goto Normal;
149+
}
150+
break;
151+
152+
case COMMENT:
153+
*r += c;
154+
if (c == '\n') {
155+
*r += HI_RESET;
156+
t_ = NORMAL;
157+
}
158+
break;
159+
160+
case DQUOTE:
161+
*r += c;
162+
if (c == '"') {
163+
*r += HI_RESET;
164+
t_ = NORMAL;
165+
} else if (c == '\\') {
166+
t_ = DQUOTE_BACKSLASH;
167+
}
168+
break;
169+
170+
case DQUOTE_BACKSLASH:
171+
*r += c;
172+
t_ = DQUOTE;
173+
break;
174+
175+
default:
176+
__builtin_unreachable();
177+
}
178+
}
179+
}
180+
181+
void HighlightBnf::flush(std::string *r) {
182+
switch (t_) {
183+
case OPERATOR:
184+
if (is_operator(operator_)) {
185+
*r += HI_OPERATOR;
186+
*r += operator_;
187+
*r += HI_RESET;
188+
} else {
189+
*r += operator_;
190+
}
191+
operator_.clear();
192+
break;
193+
case DQUOTE:
194+
case COMMENT:
195+
case DQUOTE_BACKSLASH:
196+
case ESCAPE:
197+
case ESCAPE_HEX:
198+
case ESCAPE_HEX1:
199+
*r += HI_RESET;
200+
break;
201+
default:
202+
break;
203+
}
204+
t_ = NORMAL;
205+
}

llamafile/highlight_perl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,9 @@ void HighlightPerl::feed(std::string *r, std::string_view input) {
453453
t_ = LT_LT_NAME;
454454
lf::append_wchar(&heredoc_, c);
455455
lf::append_wchar(r, c);
456-
} else if (!isblank(c)) {
456+
} else if (isascii(c) && isblank(c)) {
457+
*r += c;
458+
} else {
457459
t_ = NORMAL;
458460
goto Normal;
459461
}

llamafile/highlight_shell.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,9 @@ void HighlightShell::feed(std::string *r, std::string_view input) {
368368
t_ = LT_LT_NAME;
369369
lf::append_wchar(&heredoc_, c);
370370
lf::append_wchar(r, c);
371-
} else if (!isblank(c)) {
371+
} else if (isascii(c) && isblank(c)) {
372+
*r += c;
373+
} else {
372374
t_ = NORMAL;
373375
goto Normal;
374376
}

llamafile/highlight_test.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@
2525

2626
#define LENGTH 10
2727
#define ITERATIONS 200000
28-
#define CHARSET "aQq123{}[]!@#$%^*().\"'`\\/\n-_=&;:<>,"
28+
#define CHARSET " aQq123{}[]!@#$%^*().\"'`\\/\n-_=&;:<>,"
2929

3030
const char *const kLanguages[] = {
3131
"ada", //
3232
"asm", //
3333
"basic", //
34+
"bnf", //
3435
"c", //
3536
"c#", //
3637
"c++", //

0 commit comments

Comments
 (0)