25
25
26
26
#define MAXUTF 0x7FFFFFFFu
27
27
28
+
29
+ #define MSGInvalid "invalid UTF-8 code"
30
+
28
31
/*
29
32
** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
30
33
*/
@@ -35,7 +38,8 @@ typedef unsigned long utfint;
35
38
#endif
36
39
37
40
38
- #define iscont (p ) ((*(p) & 0xC0) == 0x80)
41
+ #define iscont (c ) (((c) & 0xC0) == 0x80)
42
+ #define iscontp (p ) iscont(*(p))
39
43
40
44
41
45
/* from strlib */
@@ -65,7 +69,7 @@ static const char *utf8_decode (const char *s, utfint *val, int strict) {
65
69
int count = 0 ; /* to count number of continuation bytes */
66
70
for (; c & 0x40 ; c <<= 1 ) { /* while it needs continuation bytes... */
67
71
unsigned int cc = (unsigned char )s [++ count ]; /* read next byte */
68
- if ((cc & 0xC0 ) != 0x80 ) /* not a continuation byte? */
72
+ if (! iscont (cc ) ) /* not a continuation byte? */
69
73
return NULL ; /* invalid byte sequence */
70
74
res = (res << 6 ) | (cc & 0x3F ); /* add lower 6 bits from cont. byte */
71
75
}
@@ -140,7 +144,7 @@ static int codepoint (lua_State *L) {
140
144
utfint code ;
141
145
s = utf8_decode (s , & code , !lax );
142
146
if (s == NULL )
143
- return luaL_error (L , "invalid UTF-8 code" );
147
+ return luaL_error (L , MSGInvalid );
144
148
lua_pushinteger (L , code );
145
149
n ++ ;
146
150
}
@@ -190,16 +194,16 @@ static int byteoffset (lua_State *L) {
190
194
"position out of bounds" );
191
195
if (n == 0 ) {
192
196
/* find beginning of current byte sequence */
193
- while (posi > 0 && iscont (s + posi )) posi -- ;
197
+ while (posi > 0 && iscontp (s + posi )) posi -- ;
194
198
}
195
199
else {
196
- if (iscont (s + posi ))
200
+ if (iscontp (s + posi ))
197
201
return luaL_error (L , "initial position is a continuation byte" );
198
202
if (n < 0 ) {
199
203
while (n < 0 && posi > 0 ) { /* move back */
200
204
do { /* find beginning of previous character */
201
205
posi -- ;
202
- } while (posi > 0 && iscont (s + posi ));
206
+ } while (posi > 0 && iscontp (s + posi ));
203
207
n ++ ;
204
208
}
205
209
}
@@ -208,7 +212,7 @@ static int byteoffset (lua_State *L) {
208
212
while (n > 0 && posi < (lua_Integer )len ) {
209
213
do { /* find beginning of next character */
210
214
posi ++ ;
211
- } while (iscont (s + posi )); /* (cannot pass final '\0') */
215
+ } while (iscontp (s + posi )); /* (cannot pass final '\0') */
212
216
n -- ;
213
217
}
214
218
}
@@ -226,15 +230,15 @@ static int iter_aux (lua_State *L, int strict) {
226
230
const char * s = luaL_checklstring (L , 1 , & len );
227
231
lua_Unsigned n = (lua_Unsigned )lua_tointeger (L , 2 );
228
232
if (n < len ) {
229
- while (iscont (s + n )) n ++ ; /* skip continuation bytes */
233
+ while (iscontp (s + n )) n ++ ; /* go to next character */
230
234
}
231
235
if (n >= len ) /* (also handles original 'n' being negative) */
232
236
return 0 ; /* no more codepoints */
233
237
else {
234
238
utfint code ;
235
239
const char * next = utf8_decode (s + n , & code , strict );
236
- if (next == NULL )
237
- return luaL_error (L , "invalid UTF-8 code" );
240
+ if (next == NULL || iscontp ( next ) )
241
+ return luaL_error (L , MSGInvalid );
238
242
lua_pushinteger (L , n + 1 );
239
243
lua_pushinteger (L , code );
240
244
return 2 ;
@@ -253,7 +257,8 @@ static int iter_auxlax (lua_State *L) {
253
257
254
258
static int iter_codes (lua_State * L ) {
255
259
int lax = lua_toboolean (L , 2 );
256
- luaL_checkstring (L , 1 );
260
+ const char * s = luaL_checkstring (L , 1 );
261
+ luaL_argcheck (L , !iscontp (s ), 1 , MSGInvalid );
257
262
lua_pushcfunction (L , lax ? iter_auxlax : iter_auxstrict );
258
263
lua_pushvalue (L , 1 );
259
264
lua_pushinteger (L , 0 );
0 commit comments