Skip to content

Commit

Permalink
Lexer: parse operators; include quotes in char/string tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanjermakov committed Jun 3, 2023
1 parent 2c817c6 commit dcc4ac0
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 18 deletions.
4 changes: 2 additions & 2 deletions src/grammar.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ operand ::= STRING
| type-expr
;

infix-operator ::= PLUS | MINUS | ASTERISK | SLASH | CARET | PERCENT | PERIOD | EQUALS | NOT-EQUALS | CLOSE-CHEVRON
| GREATER-EQ | OPEN-CHEVRON | LESS-EQ | AND | OR
infix-operator ::= PLUS | MINUS | ASTERISK | SLASH | CARET | PERCENT | PERIOD | EQUALS-OP | NOT-EQUALS-OP
| CLOSE-CHEVRON | GREATER-EQ | OPEN-CHEVRON | LESS-EQ | AND | OR
;

prefix-op ::= PLUS | MINUS | EXCL | SPREAD
Expand Down
31 changes: 28 additions & 3 deletions src/lexer/lexer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,41 @@ fn main(): Unit {

it('tokenize string literal', () => {
expect(tokenize(`"string 123 \n ok"`)).toEqual([
{ name: 'string', value: 'string 123 \n ok', location: { start: 0, end: 16 } },
{ name: 'string', value: `"string 123 \n ok"`, location: { start: 0, end: 16 } },
{ name: 'eof', value: '', location: { start: 17, end: 17 } }
])
})

it('tokenize char literal', () => {
expect(tokenize('\'?\'')).toEqual([
{ name: 'char', value: '?', location: { start: 0, end: 2 } },
expect(tokenize(`'?'`)).toEqual([
{ name: 'char', value: `'?'`, location: { start: 0, end: 2 } },
{ name: 'eof', value: '', location: { start: 3, end: 3 } }
])
})

it('tokenize expression', () => {
const tokens = tokenize(`1+call("str").ok() / (12 - a())`)
expect(tokens.map(t => [t.name, t.value])).toEqual([
['number', '1'],
['plus', '+'],
['identifier', 'call'],
['open-paren', '('],
['string', '"str"'],
['close-paren', ')'],
['period', '.'],
['identifier', 'ok'],
['open-paren', '('],
['close-paren', ')'],
['slash', '/'],
['open-paren', '('],
['number', '12'],
['minus', '-'],
['identifier', 'a'],
['open-paren', '('],
['close-paren', ')'],
['close-paren', ')'],
['eof', '']
])
})

})
46 changes: 33 additions & 13 deletions src/lexer/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,15 @@ export const lexerTokenNames = <const>[
'close-brace',
'open-chevron',
'close-chevron',
'colon',
'comma',
'period',
'equals',

'plus',
'minus',
'asterisk',
'slash',
'caret',
'percent',
'not-equals',
'equals-op',
'not-equals-op',
'greater-eq',
'open-chevron',
'less-eq',
Expand All @@ -35,6 +32,11 @@ export const lexerTokenNames = <const>[
'excl',
'spread',

'colon',
'comma',
'period',
'equals',

// dynamic
'identifier',
'string',
Expand Down Expand Up @@ -73,10 +75,26 @@ export const constTokenMap: Map<LexerTokenName, string> = new Map([
['close-brace', '}'],
['open-chevron', '<'],
['close-chevron', '>'],

['equals-op', '=='],
['plus', '+'],
['minus', '-'],
['asterisk', '*'],
['slash', '/'],
['caret', '^'],
['percent', '%'],
['not-equals-op', '!='],
['greater-eq', '>='],
['less-eq', '<='],
['and', '&&'],
['or', '||'],
['excl', '!'],
['spread', '..'],

['colon', ':'],
['comma', ','],
['period', '.'],
['equals', '=']
['equals', '='],
])

export const tokenize = (code: String): LexerToken[] => {
Expand Down Expand Up @@ -173,17 +191,18 @@ const parseNumberLiteral = (chars: string[], tokens: LexerToken[], pos: { pos: n
* @param pos
*/
const parseCharLiteral = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
if (chars[pos.pos] === `'`) {
const quote = `'`
if (chars[pos.pos] === quote) {
const start = pos.pos
pos.pos++
const charLiteral: string[] = []
while (chars[pos.pos] !== `'`) {
while (chars[pos.pos] !== quote) {
charLiteral.push(chars[pos.pos])
pos.pos++
}
pos.pos++
// TODO: verify literal
tokens.push(createToken('char', charLiteral.join(''), pos, start))
tokens.push(createToken('char', quote + charLiteral.join('') + quote, pos, start))
return true
}
return false
Expand All @@ -197,20 +216,21 @@ const parseCharLiteral = (chars: string[], tokens: LexerToken[], pos: { pos: num
* @param pos
*/
const parseStringLiteral = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
if (chars[pos.pos] === '"') {
const quote = '"'
if (chars[pos.pos] === quote) {
const start = pos.pos
pos.pos++
const stringLiteral: string[] = []
while (chars[pos.pos] !== '"') {
while (chars[pos.pos] !== quote) {
if (chars.length === pos.pos) {
throw Error('no matching `"`')
throw Error(`no matching \`${quote}\``)
}
stringLiteral.push(chars[pos.pos])
pos.pos++
}
pos.pos++
// TODO: verify literal
tokens.push(createToken('string', stringLiteral.join(''), pos, start))
tokens.push(createToken('string', quote + stringLiteral.join('') + quote, pos, start))
return true
}
return false
Expand Down

0 comments on commit dcc4ac0

Please sign in to comment.