Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New lexer 2 — Electric Boogaloo #557

Merged
merged 59 commits into from
Oct 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
6dc4ce6
Implement infrastructure around new lexer
ISSOtm Jul 23, 2020
71f8871
Implement more functionality
ISSOtm Jul 23, 2020
4c9a929
Implement almost all functionality
ISSOtm Jul 28, 2020
e56c6cc
Fix PC's name not being passed to parser
ISSOtm Jul 28, 2020
2ec1001
Fix mmap read offset not being initialized
ISSOtm Jul 28, 2020
5ad7a93
Add EQUS expansion
ISSOtm Jul 28, 2020
61b2fd9
Add string expansion reporting
ISSOtm Jul 28, 2020
fed252b
Fix nested expansions being incorrectly handled
ISSOtm Jul 30, 2020
149db9a
Fix incorrect freeing of expansions
ISSOtm Jul 30, 2020
38bda7e
Fix string expansion reporting
ISSOtm Jul 31, 2020
7c895f8
Fix diagnostic formatting
ISSOtm Jul 31, 2020
e11f250
Add test for built-in file symbol
ISSOtm Jul 31, 2020
6e805cd
Implement macro args
ISSOtm Jul 31, 2020
81a77a9
Re-implement block copy to avoid expanding macro args
ISSOtm Aug 1, 2020
adcaf4c
Fix crash when no macro args are being used
ISSOtm Aug 2, 2020
df75fd2
Fix expansion reporting being incorrect
ISSOtm Aug 2, 2020
cd747d8
Fix many lexer bugs
ISSOtm Aug 14, 2020
d9ecaab
Add debug tracing code to lexer
ISSOtm Aug 15, 2020
b27b821
Fix RAW lexer length underflow
ISSOtm Aug 15, 2020
cf99216
Fix lexer capture sometimes not being reset
ISSOtm Aug 15, 2020
9081fea
Reinstate macro arg scan distance
ISSOtm Aug 15, 2020
08867b3
Enable catching invalid macro arg 0
ISSOtm Aug 15, 2020
3f5f9bc
Fix numeric constant overflow checks
ISSOtm Aug 16, 2020
e4f2fad
Support line continuations in main scope
ISSOtm Aug 16, 2020
62ecdce
Fix line-continuation-macro test
ISSOtm Aug 16, 2020
fd02ffb
Implement __FILE__ symbol
ISSOtm Aug 16, 2020
baeb180
Apply error reporting changes to tests
ISSOtm Aug 16, 2020
ae77893
Fix file name reporting
ISSOtm Aug 16, 2020
8d18b39
Support missing register tokens
ISSOtm Aug 16, 2020
35396e6
Fix files being unmapped when still referenced by macros
ISSOtm Aug 16, 2020
4d1333e
Fix incorrect error reporting of `INCLUDE`d files
ISSOtm Aug 17, 2020
d641972
Fix macro args not being restored when exiting macros
ISSOtm Aug 17, 2020
b83b982
Fix _NARG crashing outside of macros
ISSOtm Aug 17, 2020
aa76603
Add line+col trace info to lexer
ISSOtm Aug 17, 2020
f9b48c0
Fix else working incorrectly from macros
ISSOtm Aug 18, 2020
b7b03ee
Fix "REPT 0" not being a no-op
ISSOtm Aug 18, 2020
ece6853
Implement `opt b` and `opt g`
ISSOtm Aug 18, 2020
f7b7a97
Prevent expanding macro args in comments
ISSOtm Aug 18, 2020
615f107
Fix `readFractionalPart` never shifting characters
ISSOtm Aug 18, 2020
e33c2ad
Fix `INCLUDE` ignoring `-MG`
ISSOtm Aug 18, 2020
9e3d7a5
Handle comments in line continuations
ISSOtm Aug 18, 2020
ac011fe
Use common function to discard comments in macro args
ISSOtm Aug 18, 2020
71a0a42
Fix C2x use of `static_assert`
ISSOtm Aug 18, 2020
542b5d1
Fix possible capture buffer size overflow
ISSOtm Aug 22, 2020
b65ea64
Add newlines to all test output
ISSOtm Aug 22, 2020
c952dd8
Fix fixed-point constants not working correctly
ISSOtm Aug 23, 2020
dbef51b
Move `isWhitespace` to a place where it makes more sense
ISSOtm Aug 23, 2020
7381d7b
Remove unnecessarily nested symbol data union
ISSOtm Aug 23, 2020
b224cab
Harmonize printing `distance`
ISSOtm Aug 23, 2020
96cb5e1
Fix range-dependent dead code in recursion depth check
ISSOtm Aug 23, 2020
82469ac
Shim around `mmap` on Windows
ISSOtm Aug 31, 2020
1385235
Fix possible uninitialized read on Windows
ISSOtm Aug 31, 2020
8e7afb0
Move some MSVC-specific defines to `platform.h`
ISSOtm Aug 31, 2020
930080f
Mark not unmapping macro-containing files as okay
ISSOtm Sep 22, 2020
5a65188
Implement compact file stacks in object files
ISSOtm Sep 29, 2020
ee9e45b
Change assertion condition in __FILE__ buf dumping
ISSOtm Sep 29, 2020
423a7c4
Handle \\r better
ISSOtm Sep 29, 2020
c246942
Fix incomplete duplication of REPT nodes
ISSOtm Sep 30, 2020
2eca43c
Fix critical oversight in lexer buffer refilling
ISSOtm Oct 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ if(MSVC)
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
else()
if(DEVELOP)
add_compile_options(-Werror -Wall -Wextra -pedantic
add_compile_options(-Werror -Wall -Wextra -pedantic -Wno-type-limits
-Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2
-Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused
-Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5
Expand Down
7 changes: 3 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ rgbasm_obj := \
src/asm/asmy.o \
src/asm/charmap.o \
src/asm/fstack.o \
src/asm/globlex.o \
src/asm/lexer.o \
src/asm/macro.o \
src/asm/main.o \
Expand All @@ -73,7 +72,7 @@ rgbasm_obj := \
src/hashmap.o \
src/linkdefs.o

src/asm/globlex.o src/asm/lexer.o src/asm/constexpr.o: src/asm/asmy.h
src/asm/lexer.o src/asm/main.o: src/asm/asmy.h

rgblink_obj := \
src/link/assign.o \
Expand Down Expand Up @@ -187,7 +186,7 @@ checkpatch:
# compilation and make the continous integration infrastructure return failure.

develop:
$Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic \
$Qenv $(MAKE) -j WARNFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-type-limits \
-Wno-sign-compare -Wformat -Wformat-security -Wformat-overflow=2 \
-Wformat-truncation=1 -Wformat-y2k -Wswitch-enum -Wunused \
-Wuninitialized -Wunknown-pragmas -Wstrict-overflow=5 \
Expand All @@ -199,7 +198,7 @@ develop:
-fsanitize=unreachable -fsanitize=vla-bound \
-fsanitize=signed-integer-overflow -fsanitize=bounds \
-fsanitize=object-size -fsanitize=bool -fsanitize=enum \
-fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-g -O0"
-fsanitize=alignment -fsanitize=null -DDEVELOP" CFLAGS="-ggdb3 -O0"

# Targets for the project maintainer to easily create Windows exes.
# This is not for Windows users!
Expand Down
7 changes: 0 additions & 7 deletions include/asm/asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,8 @@
#define MAXMACROARGS 99999
#define MAXINCPATHS 128

extern int32_t nLineNo;
extern uint32_t nTotalLines;
extern uint32_t nIFDepth;
extern bool skipElif;
extern char tzCurrentFileName[_MAX_PATH + 1];
extern struct Section *pCurrentSection;
extern bool oDontExpandStrings;

size_t symvaluetostring(char *dest, size_t maxLength, char *sym,
const char *mode);

#endif /* RGBDS_ASM_ASM_H */
79 changes: 51 additions & 28 deletions include/asm/fstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,36 +21,59 @@

#include "types.h"

struct MacroArgs;
struct FileStackNode {
struct FileStackNode *parent; /* Pointer to parent node, for error reporting */
/* Line at which the parent context was exited; meaningless for the root level */
uint32_t lineNo;

struct FileStackNode *next; /* Next node in the output linked list */
bool referenced; /* If referenced, don't free! */
uint32_t ID; /* Set only if referenced: ID within the object file, -1 if not output yet */

enum {
NODE_REPT,
NODE_FILE,
NODE_MACRO,
} type;
};

struct sContext {
YY_BUFFER_STATE FlexHandle;
struct Symbol const *pMacro;
struct sContext *next;
char tzFileName[_MAX_PATH + 1];
struct MacroArgs *macroArgs;
uint32_t uniqueID;
int32_t nLine;
uint32_t nStatus;
FILE *pFile;
char *pREPTBlock;
uint32_t nREPTBlockCount;
uint32_t nREPTBlockSize;
int32_t nREPTBodyFirstLine;
int32_t nREPTBodyLastLine;
struct FileStackReptNode { /* NODE_REPT */
struct FileStackNode node;
uint32_t reptDepth;
/* WARNING: if changing this type, change overflow check in `fstk_Init` */
uint32_t iters[]; /* REPT iteration counts since last named node, in reverse depth order */
};

extern unsigned int nMaxRecursionDepth;

void fstk_RunInclude(char *tzFileName);
void fstk_Init(char *s);
void fstk_Dump(void);
void fstk_DumpToStr(char *buf, size_t len);
void fstk_DumpStringExpansions(void);
void fstk_AddIncludePath(char *s);
void fstk_RunMacro(char *s, struct MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t nReptLineNo);
FILE *fstk_FindFile(char const *fname, char **incPathUsed);
int32_t fstk_GetLine(void);
struct FileStackNamedNode { /* NODE_FILE, NODE_MACRO */
struct FileStackNode node;
char name[]; /* File name for files, file::macro name for macros */
};

extern size_t nMaxRecursionDepth;

struct MacroArgs;

void fstk_Dump(struct FileStackNode const *node, uint32_t lineNo);
void fstk_DumpCurrent(void);
struct FileStackNode *fstk_GetFileStack(void);
/* The lifetime of the returned chars is until reaching the end of that file */
char const *fstk_GetFileName(void);

void fstk_AddIncludePath(char const *s);
/**
* @param path The user-provided file name
* @param fullPath The address of a pointer, which will be made to point at the full path
* The pointer's value must be a valid argument to `realloc`, including NULL
* @param size Current size of the buffer, or 0 if the pointer is NULL
* @return True if the file was found, false if no path worked
*/
bool fstk_FindFile(char const *path, char **fullPath, size_t *size);

bool yywrap(void);
void fstk_RunInclude(char const *path);
void fstk_RunMacro(char const *macroName, struct MacroArgs *args);
void fstk_RunRept(uint32_t count, int32_t nReptLineNo, char *body, size_t size);

void fstk_Init(char const *mainPath, size_t maxRecursionDepth);

#endif /* RGBDS_ASM_FSTACK_H */
109 changes: 48 additions & 61 deletions include/asm/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,78 +9,65 @@
#ifndef RGBDS_ASM_LEXER_H
#define RGBDS_ASM_LEXER_H

#include <stdint.h>
#include <stdio.h>

#define LEXHASHSIZE (1 << 11)
#define MAXSTRLEN 255

struct sLexInitString {
char *tzName;
uint32_t nToken;
};
struct LexerState;
extern struct LexerState *lexerState;
extern struct LexerState *lexerStateEOL;

struct sLexFloat {
uint32_t (*Callback)(char *s, uint32_t size);
uint32_t nToken;
};
static inline struct LexerState *lexer_GetState(void)
{
return lexerState;
}

struct yy_buffer_state {
/* Actual starting address */
char *pBufferRealStart;
/* Address where the data is initially written after a safety margin */
char *pBufferStart;
char *pBuffer;
size_t nBufferSize;
uint32_t oAtLineStart;
};
static inline void lexer_SetState(struct LexerState *state)
{
lexerState = state;
}

enum eLexerState {
LEX_STATE_NORMAL,
LEX_STATE_MACROARGS
};
static inline void lexer_SetStateAtEOL(struct LexerState *state)
{
lexerStateEOL = state;
}

struct sStringExpansionPos {
char *tzName;
char *pBuffer;
char *pBufferPos;
struct sStringExpansionPos *pParent;
};
extern char const *binDigits;
extern char const *gfxDigits;

#define INITIAL 0
#define macroarg 3
static inline void lexer_SetBinDigits(char const *digits)
{
binDigits = digits;
}

typedef struct yy_buffer_state *YY_BUFFER_STATE;
static inline void lexer_SetGfxDigits(char const *digits)
{
gfxDigits = digits;
}

void setup_lexer(void);
/*
* `path` is referenced, but not held onto..!
*/
struct LexerState *lexer_OpenFile(char const *path);
struct LexerState *lexer_OpenFileView(char *buf, size_t size, uint32_t lineNo);
void lexer_RestartRept(uint32_t lineNo);
void lexer_DeleteState(struct LexerState *state);
void lexer_Init(void);

void yy_set_state(enum eLexerState i);
YY_BUFFER_STATE yy_create_buffer(FILE *f);
YY_BUFFER_STATE yy_scan_bytes(char const *mem, uint32_t size);
void yy_delete_buffer(YY_BUFFER_STATE buf);
void yy_switch_to_buffer(YY_BUFFER_STATE buf);
uint32_t lex_FloatAlloc(const struct sLexFloat *tok);
void lex_FloatAddRange(uint32_t id, uint16_t start, uint16_t end);
void lex_FloatDeleteRange(uint32_t id, uint16_t start, uint16_t end);
void lex_FloatAddFirstRange(uint32_t id, uint16_t start, uint16_t end);
void lex_FloatDeleteFirstRange(uint32_t id, uint16_t start, uint16_t end);
void lex_FloatAddSecondRange(uint32_t id, uint16_t start, uint16_t end);
void lex_FloatDeleteSecondRange(uint32_t id, uint16_t start, uint16_t end);
void lex_Init(void);
void lex_AddStrings(const struct sLexInitString *lex);
void lex_SetBuffer(char *buffer, uint32_t len);
void lex_BeginStringExpansion(const char *tzName);
int yywrap(void);
int yylex(void);
void yyunput(char c);
void yyunputstr(const char *s);
void yyskipbytes(uint32_t count);
void yyunputbytes(uint32_t count);
enum LexerMode {
LEXER_NORMAL,
LEXER_RAW,
LEXER_SKIP_TO_ELIF,
LEXER_SKIP_TO_ENDC
};

extern YY_BUFFER_STATE pCurrentBuffer;
extern struct sStringExpansionPos *pCurrentStringExpansion;
void lexer_SetMode(enum LexerMode mode);
void lexer_ToggleStringExpansion(bool enable);

void upperstring(char *s);
void lowerstring(char *s);
char const *lexer_GetFileName(void);
uint32_t lexer_GetLineNo(void);
uint32_t lexer_GetColNo(void);
void lexer_DumpStringExpansions(void);
int yylex(void);
void lexer_CaptureRept(char **capture, size_t *size);
void lexer_CaptureMacroBody(char **capture, size_t *size);

#endif /* RGBDS_ASM_LEXER_H */
1 change: 1 addition & 0 deletions include/asm/macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ char const *macro_GetArg(uint32_t i);
uint32_t macro_GetUniqueID(void);
char const *macro_GetUniqueIDStr(void);
void macro_SetUniqueID(uint32_t id);
uint32_t macro_UseNewUniqueID(void);
void macro_ShiftCurrentArgs(void);
uint32_t macro_NbArgs(void);

Expand Down
4 changes: 4 additions & 0 deletions include/asm/main.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ void opt_Push(void);
void opt_Pop(void);
void opt_Parse(char *s);

void upperstring(char *s);
void lowerstring(char *s);

/* TODO: are these really needed? */
#define YY_FATAL_ERROR fatalerror

#ifdef YYLMAX
Expand Down
2 changes: 2 additions & 0 deletions include/asm/output.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct Expression;
extern char *tzObjectname;
extern struct Section *pSectionList, *pCurrentSection;

void out_RegisterNode(struct FileStackNode *node);
void out_ReplaceNode(struct FileStackNode *node);
void out_SetFileName(char *s);
void out_CreatePatch(uint32_t type, struct Expression const *expr,
uint32_t ofs);
Expand Down
24 changes: 15 additions & 9 deletions include/asm/symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,21 @@ struct Symbol {
bool isExported; /* Whether the symbol is to be exported */
bool isBuiltin; /* Whether the symbol is a built-in */
struct Section *section;
char fileName[_MAX_PATH + 1]; /* File where the symbol was defined. */
uint32_t fileLine; /* Line where the symbol was defined. */
struct FileStackNode *src; /* Where the symbol was defined */
uint32_t fileLine; /* Line where the symbol was defined */

bool hasCallback;
union {
struct { /* If sym_IsNumeric */
int32_t value;
int32_t (*callback)(void);
};
struct { /* For SYM_MACRO */
uint32_t macroSize;
/* If sym_IsNumeric */
int32_t value;
int32_t (*numCallback)(void);
/* For SYM_MACRO */
struct {
size_t macroSize;
char *macro;
};
/* For SYM_EQUS, TODO: separate "base" fields from SYM_MACRO */
char const *(*strCallback)(void); /* For SYM_EQUS */
};
ISSOtm marked this conversation as resolved.
Show resolved Hide resolved

uint32_t ID; /* ID of the symbol in the object file (-1 if none) */
Expand Down Expand Up @@ -101,6 +104,8 @@ static inline bool sym_IsExported(struct Symbol const *sym)
*/
static inline char const *sym_GetStringValue(struct Symbol const *sym)
{
if (sym->hasCallback)
return sym->strCallback();
return sym->macro;
}

Expand All @@ -114,9 +119,10 @@ void sym_Export(char const *symName);
struct Symbol *sym_AddEqu(char const *symName, int32_t value);
struct Symbol *sym_AddSet(char const *symName, int32_t value);
uint32_t sym_GetPCValue(void);
uint32_t sym_GetConstantSymValue(struct Symbol const *sym);
uint32_t sym_GetConstantValue(char const *s);
struct Symbol *sym_FindSymbol(char const *symName);
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo);
struct Symbol *sym_AddMacro(char const *symName, int32_t defLineNo, char *body, size_t size);
struct Symbol *sym_Ref(char const *symName);
struct Symbol *sym_AddString(char const *symName, char const *value);
uint32_t sym_GetDefinedValue(char const *s);
Expand Down
1 change: 1 addition & 0 deletions include/asm/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <stdint.h>

uint32_t calchash(const char *s);
char const *print(int c);
size_t readUTF8Char(uint8_t *dest, char const *src);

#endif /* RGBDS_UTIL_H */
Loading