Skip to content

Commit fdd75f6

Browse files
committed
exposed tokenizer and padded to support basic C/C++
1 parent fbf3cd9 commit fdd75f6

File tree

4 files changed

+197
-33
lines changed

4 files changed

+197
-33
lines changed

ext/sgsmeta.c

+153
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static int sgs_meta_opname( SGS_CTX )
255255
const char* str;
256256
sgs_Int op;
257257

258+
SGSFN( "meta_opname" );
258259
if( !sgs_LoadArgs( C, "i", &op ) )
259260
return 0;
260261

@@ -268,11 +269,163 @@ static int sgs_meta_opname( SGS_CTX )
268269
}
269270

270271

272+
static const char* meta_operator_names[] =
273+
{
274+
"===",
275+
"!==",
276+
"==",
277+
"!=",
278+
"<=",
279+
">=",
280+
"<",
281+
">",
282+
"<=>",
283+
"+=",
284+
"-=",
285+
"*=",
286+
"/=",
287+
"%=",
288+
"&=",
289+
"|=",
290+
"^=",
291+
"<<=",
292+
">>=",
293+
"&&=",
294+
"||=",
295+
"\?\?=",
296+
"$=",
297+
"=",
298+
"@",
299+
"&&",
300+
"||",
301+
"??",
302+
"+",
303+
"-",
304+
"*",
305+
"/",
306+
"%",
307+
"&",
308+
"|",
309+
"^",
310+
"<<",
311+
">>",
312+
".",
313+
"$",
314+
"!",
315+
"~",
316+
"++",
317+
"--",
318+
"?",
319+
};
320+
321+
static int sgs_meta_tokens_parse( SGS_CTX )
322+
{
323+
const char* str = NULL;
324+
sgs_SizeVal sz = 0;
325+
sgs_TokenList t, tl;
326+
int count = 0;
327+
328+
SGSFN( "meta_tokens_parse" );
329+
if( !sgs_LoadArgs( C, "m", &str, &sz ) )
330+
return 0;
331+
332+
t = tl = sgsT_Gen( C, str, sz );
333+
while( *t != 0 )
334+
{
335+
sgs_SizeVal ssz = sgs_StackSize( C );
336+
337+
sgs_PushStringLit( C, "type" );
338+
if( SGS_ST_ISOP( *t ) )
339+
{
340+
sgs_PushString( C, meta_operator_names[ *t - 200 ] );
341+
}
342+
else
343+
{
344+
switch( *t )
345+
{
346+
case SGS_ST_RBRKL:
347+
case SGS_ST_RBRKR:
348+
case SGS_ST_SBRKL:
349+
case SGS_ST_SBRKR:
350+
case SGS_ST_CBRKL:
351+
case SGS_ST_CBRKR:
352+
case SGS_ST_ARGSEP:
353+
case SGS_ST_STSEP:
354+
case SGS_ST_PICKSEP:
355+
case SGS_ST_HASH:
356+
case SGS_ST_BACKSLASH: sgs_PushStringBuf( C, (char*) t, 1 ); break;
357+
case SGS_ST_IDENT: sgs_PushStringLit( C, "ident" ); break;
358+
case SGS_ST_KEYWORD: sgs_PushStringLit( C, "keyword" ); break;
359+
case SGS_ST_NUMREAL: sgs_PushStringLit( C, "real" ); break;
360+
case SGS_ST_NUMINT: sgs_PushStringLit( C, "int" ); break;
361+
case SGS_ST_NUMPTR: sgs_PushStringLit( C, "ptr" ); break;
362+
case SGS_ST_STRING: sgs_PushStringLit( C, "string" ); break;
363+
default: sgs_PushStringLit( C, "<unknown>" ); break;
364+
}
365+
}
366+
367+
switch( *t )
368+
{
369+
case SGS_ST_IDENT:
370+
case SGS_ST_KEYWORD:
371+
sgs_PushStringLit( C, "value" );
372+
sgs_PushStringBuf( C, (char*) t + 2, t[1] );
373+
break;
374+
case SGS_ST_NUMREAL:
375+
sgs_PushStringLit( C, "value" );
376+
{
377+
sgs_Real val;
378+
SGS_AS_REAL( val, t+1 );
379+
sgs_PushReal( C, val );
380+
}
381+
break;
382+
case SGS_ST_NUMINT:
383+
sgs_PushStringLit( C, "value" );
384+
{
385+
sgs_Int val;
386+
SGS_AS_INTEGER( val, t+1 );
387+
sgs_PushInt( C, val );
388+
}
389+
break;
390+
case SGS_ST_NUMPTR:
391+
sgs_PushStringLit( C, "value" );
392+
{
393+
sgs_Int val;
394+
SGS_AS_INTEGER( val, t+1 );
395+
sgs_PushPtr( C, (void*) (intptr_t) val );
396+
}
397+
break;
398+
case SGS_ST_STRING:
399+
sgs_PushStringLit( C, "value" );
400+
{
401+
int32_t size;
402+
SGS_ST_READINT( size, t + 1 );
403+
sgs_PushStringBuf( C, (char*) t + 5, size );
404+
}
405+
break;
406+
}
407+
408+
sgs_PushStringLit( C, "line" );
409+
sgs_PushInt( C, sgsT_LineNum( t ) );
410+
411+
sgs_CreateDict( C, NULL, sgs_StackSize( C ) - ssz );
412+
count++;
413+
414+
t = sgsT_Next( t );
415+
}
416+
417+
sgs_CreateArray( C, NULL, count );
418+
sgsT_Free( C, tl );
419+
return 1;
420+
}
421+
422+
271423
static const sgs_RegFuncConst meta_funcs[] =
272424
{
273425
{ "meta_globals", sgs_meta_globals },
274426
{ "meta_unpack", sgs_meta_unpack },
275427
{ "meta_opname", sgs_meta_opname },
428+
{ "meta_tokens_parse", sgs_meta_tokens_parse },
276429
};
277430

278431

src/sgs_int.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ extern "C" {
6262
#define SGS_ST_STSEP ';'
6363
#define SGS_ST_PICKSEP ':'
6464
#define SGS_ST_HASH '#'
65+
#define SGS_ST_BACKSLASH '\\'
6566
/* other id additional data */
6667
#define SGS_ST_IDENT 'N' /* 1 byte (string size), N bytes (string), not null-terminated */
6768
#define SGS_ST_KEYWORD 'K' /* same as IDENT */
@@ -114,8 +115,9 @@ extern "C" {
114115
#define SGS_ST_OP_INV 241 /* ~ */
115116
#define SGS_ST_OP_INC 242 /* ++ */
116117
#define SGS_ST_OP_DEC 243 /* -- */
118+
#define SGS_ST_OP_QMARK 244 /* ? */
117119

118-
#define SGS_ST_ISOP( chr ) ( (chr) >= 200 && (chr) <= 243 )
120+
#define SGS_ST_ISOP( chr ) ( (chr) >= 200 && (chr) <= 244 )
119121
#define SGS_ST_OP_UNARY( chr ) ( (chr) == SGS_ST_OP_ERSUP || (chr) == SGS_ST_OP_ADD || \
120122
(chr) == SGS_ST_OP_SUB || ( (chr) >= SGS_ST_OP_NOT && (chr) <= SGS_ST_OP_DEC ) )
121123
#define SGS_ST_OP_BINARY( chr ) ( (chr) >= 200 && (chr) <= 239 && (chr) != 224 )
@@ -128,7 +130,7 @@ extern "C" {
128130
(chr) == SGS_ST_OP_BLAND || (chr) == SGS_ST_OP_BLOR )
129131
#define SGS_ST_OP_FNN( chr ) ( (chr) == SGS_ST_OP_NLOEQ || (chr) == SGS_ST_OP_NLOR )
130132

131-
#define SGS_ST_ISSPEC( chr ) sgs_isoneof( (chr), "()[]{},;:#" )
133+
#define SGS_ST_ISSPEC( chr ) sgs_isoneof( (chr), "()[]{},;:#\\" )
132134

133135
#define SGS_ST_READINT( tgt, pos ) SGS_AS_INT32( tgt, pos )
134136
#define SGS_ST_READLN( tgt, pos ) SGS_AS_( tgt, pos, sgs_LineNum )

src/sgs_tok.c

+32-30
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ static int32_t string_inplace_fix( char* str, int32_t len )
110110
}
111111

112112

113-
#define STRLIT_LEN(lit) (sizeof(lit)-1)
114-
#define STRLIT_BUF(lit) lit, STRLIT_LEN(lit)
115-
116113
static int ident_equal( const char* ptr, int size, const char* what, int wlen )
117114
{
118115
return size == wlen && memcmp( ptr, what, (size_t) size ) == 0;
@@ -138,29 +135,29 @@ static void readident( SGS_CTX, sgs_MemBuf* out, const char* code, int32_t* at,
138135
}
139136
if( sz >= 255 ) sz = 255;
140137
out->ptr[ pos_rev + 1 ] = (char) sz;
141-
if( ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("var") ) ||
142-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("global") ) ||
143-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("thread") ) ||
144-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("subthread") ) ||
145-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("sync") ) ||
146-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("race") ) ||
147-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("null") ) ||
148-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("true") ) ||
149-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("false") ) ||
150-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("if") ) ||
151-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("else") ) ||
152-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("do") ) ||
153-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("while") ) ||
154-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("for") ) ||
155-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("foreach") ) ||
156-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("break") ) ||
157-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("continue") ) ||
158-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("function") ) ||
159-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("use") ) ||
160-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("return") ) ||
161-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("this") ) ||
162-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("new") ) ||
163-
ident_equal( out->ptr + pos_rev + 2, sz, STRLIT_BUF("defer") ) )
138+
if( ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("var") ) ||
139+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("global") ) ||
140+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("thread") ) ||
141+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("subthread") ) ||
142+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("sync") ) ||
143+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("race") ) ||
144+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("null") ) ||
145+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("true") ) ||
146+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("false") ) ||
147+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("if") ) ||
148+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("else") ) ||
149+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("do") ) ||
150+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("while") ) ||
151+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("for") ) ||
152+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("foreach") ) ||
153+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("break") ) ||
154+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("continue") ) ||
155+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("function") ) ||
156+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("use") ) ||
157+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("return") ) ||
158+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("this") ) ||
159+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("new") ) ||
160+
ident_equal( out->ptr + pos_rev + 2, sz, SGS_STRLITBUF("defer") ) )
164161
{
165162
out->ptr[ pos_rev ] = SGS_ST_KEYWORD;
166163
}
@@ -229,7 +226,7 @@ static void readstring( SGS_CTX, sgs_MemBuf* out, sgs_LineNum* line, const char*
229226

230227
static const char* sgs_opchars = "=<>+-*/%?!~&|^.$@";
231228
static const char* sgs_operators = "<=>;===;!==;==;!=;<=;>=;+=;-=;*=;/=;%=;&=;|=;^=;<<=;>>=;$=;..=;"
232-
"<<;>>;&&=;||=;?""?=;&&;||;?""?;..;<;>;=;++;--;+;-;*;/;%;&;|;^;.;$;!;~;@"; /* trigraphs detected */
229+
"<<;>>;&&=;||=;?""?=;&&;||;?""?;..;<;>;=;++;--;+;-;*;/;%;&;|;^;.;$;!;~;@;?"; /* trigraphs detected */
233230
static const sgs_TokenType sgs_optable[] =
234231
{
235232
SGS_ST_OP_RWCMP, SGS_ST_OP_SEQ, SGS_ST_OP_SNEQ, SGS_ST_OP_EQ, SGS_ST_OP_NEQ, SGS_ST_OP_LEQ, SGS_ST_OP_GEQ,
@@ -238,7 +235,7 @@ static const sgs_TokenType sgs_optable[] =
238235
SGS_ST_OP_LSH, SGS_ST_OP_RSH, SGS_ST_OP_BLAEQ, SGS_ST_OP_BLOEQ, SGS_ST_OP_NLOEQ, SGS_ST_OP_BLAND,
239236
SGS_ST_OP_BLOR, SGS_ST_OP_NLOR, SGS_ST_OP_CAT, SGS_ST_OP_LESS, SGS_ST_OP_GRTR, SGS_ST_OP_SET, SGS_ST_OP_INC, SGS_ST_OP_DEC,
240237
SGS_ST_OP_ADD, SGS_ST_OP_SUB, SGS_ST_OP_MUL, SGS_ST_OP_DIV, SGS_ST_OP_MOD, SGS_ST_OP_AND,
241-
SGS_ST_OP_OR, SGS_ST_OP_XOR, SGS_ST_OP_MMBR, SGS_ST_OP_CAT, SGS_ST_OP_NOT, SGS_ST_OP_INV, SGS_ST_OP_ERSUP
238+
SGS_ST_OP_OR, SGS_ST_OP_XOR, SGS_ST_OP_MMBR, SGS_ST_OP_CAT, SGS_ST_OP_NOT, SGS_ST_OP_INV, SGS_ST_OP_ERSUP, SGS_ST_OP_QMARK
242239
};
243240
static const char sgs_opsep = ';';
244241

@@ -333,7 +330,7 @@ sgs_TokenList sgsT_Gen( SGS_CTX, const char* code, size_t length )
333330
|| code[ i + 1 ] == '*' ) ) skipcomment( C, &line, code, &i, ilen );
334331

335332
/* special symbol */
336-
else if( sgs_isoneof( fc, "()[]{},;:#" ) ) sgs_membuf_appchr( &s, C, fc );
333+
else if( sgs_isoneof( fc, "()[]{},;:#\\" ) ) sgs_membuf_appchr( &s, C, fc );
337334

338335
/* identifier */
339336
else if( fc == '_' || sgs_isalpha( fc ) || ( fc == '$' && pcfg.ident_dollar_sign ) )
@@ -490,6 +487,8 @@ static void tp_token( SGS_CTX, sgs_MemBuf* out, sgs_TokenList t )
490487
case SGS_ST_ARGSEP:
491488
case SGS_ST_STSEP:
492489
case SGS_ST_PICKSEP:
490+
case SGS_ST_HASH:
491+
case SGS_ST_BACKSLASH:
493492
sgs_membuf_appchr( out, C, (char) *t );
494493
break;
495494
case SGS_ST_IDENT:
@@ -543,7 +542,7 @@ static void tp_token( SGS_CTX, sgs_MemBuf* out, sgs_TokenList t )
543542
}
544543
}
545544
break;
546-
#define OPR( op ) sgs_membuf_appbuf( out, C, STRLIT_BUF(op) )
545+
#define OPR( op ) sgs_membuf_appbuf( out, C, SGS_STRLITBUF(op) )
547546
case SGS_ST_OP_RWCMP: OPR( "<=>" ); break;
548547
case SGS_ST_OP_SEQ: OPR( "===" ); break;
549548
case SGS_ST_OP_SNEQ: OPR( "!==" ); break;
@@ -585,6 +584,7 @@ static void tp_token( SGS_CTX, sgs_MemBuf* out, sgs_TokenList t )
585584
case SGS_ST_OP_INV: OPR( "~" ); break;
586585
case SGS_ST_OP_INC: OPR( "++" ); break;
587586
case SGS_ST_OP_DEC: OPR( "--" ); break;
587+
case SGS_ST_OP_QMARK: OPR( "?" ); break;
588588
#undef OPR
589589
default:
590590
sgs_membuf_appbuf( out, C, "<error>", 7 );
@@ -653,6 +653,8 @@ void sgsT_DumpToken( SGS_CTX, sgs_TokenList tok )
653653
case SGS_ST_ARGSEP:
654654
case SGS_ST_STSEP:
655655
case SGS_ST_PICKSEP:
656+
case SGS_ST_HASH:
657+
case SGS_ST_BACKSLASH:
656658
sgs_ErrWritef( C, "%c", *tok );
657659
break;
658660
case SGS_ST_IDENT:

tests/830-meta_TF.sgs

+8-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ global ERRORS, CURFN;
33
global tests_failed, tests_ran;
44

55
ERRORS = "";
6-
include "math", "sgsmeta";
6+
include "io", "math", "sgsmeta";
77

88
testEqual( typeof(meta_unpack), "cfunc", "meta library is loaded" );
99
testEqual( ERRORS, "", "meta library loaded without errors" );
@@ -70,3 +70,10 @@ testFunction( subfn, "f1", 1, false, 0 );
7070
subfn = testFindFunction( subfn, "f2" );
7171
testFunction( subfn, "f2", 2, false, 0 );
7272

73+
74+
section( "meta_tokens_parse" );
75+
76+
ERRORS = "";
77+
tokens = meta_tokens_parse( io_file_read( "src/sgscript.h" ) );
78+
testEqual( ERRORS, "", "parse C header - no errors" );
79+

0 commit comments

Comments
 (0)