Skip to content

Commit

Permalink
Lexer pretty error; parser refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanjermakov committed Jun 6, 2023
1 parent 909dead commit 1051c25
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 26 deletions.
9 changes: 6 additions & 3 deletions src/error.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import { TokenName } from './parser/parser'
import { indexToLocation, LocationRange, prettyIndex, prettyLocation } from './location'
import { Source } from './source'
import { LexerToken } from './lexer/lexer'

export interface SyntaxErrorInfo {
tokenChain: TokenName[],
expected: TokenName[],
got: TokenName,
location: LocationRange
}

export const prettyLexerError = (token: LexerToken): string => {
return `lexer error: unknown token \`${token.value}\``
}

export const prettySyntaxError = (error: SyntaxErrorInfo): string => {
const chain = error.tokenChain.filter(n => !n.endsWith('_')).slice(-2).join('/')
return `syntax error: expected \`${error.expected}\`, got \`${error.got}\`, while parsing \`${chain}\``
return `syntax error: expected \`${error.expected}\`, got \`${error.got}\``
}

export const prettySourceMessage = (message: string, index: number, source: Source): string => {
Expand Down
11 changes: 8 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ import { compactToken, flattenToken, parse } from './parser/parser'
import { tokenize } from './lexer/lexer'
import { readFileSync } from 'fs'
import { join, resolve } from 'path'
import { prettySourceMessage, prettySyntaxError } from './error'
import { prettyLexerError, prettySourceMessage, prettySyntaxError } from './error'


const version = JSON.parse(readFileSync(join(__dirname, '..', 'package.json')).toString()).version

Expand All @@ -16,10 +17,14 @@ if (!path) {
console.log(usage)
process.exit()
}

const source = { str: readFileSync(resolve(path)).toString(), filename: path }

const token = parse(tokenize(source.str))
const tokens = tokenize(source.str)
if ('name' in tokens) {
console.error(prettySourceMessage(prettyLexerError(tokens), tokens.location.start, source))
process.exit(1)
}
const token = parse(tokens)
if ('expected' in token) {
console.error(prettySourceMessage(prettySyntaxError(token), token.location.start, source))
process.exit(1)
Expand Down
22 changes: 16 additions & 6 deletions src/lexer/lexer.spec.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
import { tokenize } from './lexer'
import { LexerToken, tokenize } from './lexer'
import { expect } from '@jest/globals'
import { prettyLexerError } from '../error'

describe('lexer', () => {

const testTokenize = (code: string): LexerToken[] => {
const tokens = tokenize(code)
if ('name' in tokens) {
throw Error(prettyLexerError(tokens))
}
return tokens
}

it('tokenize basic', () => {
const code = `\
let main = (): Unit {
print(4)
}`
const tokens = tokenize(code)
const tokens = testTokenize(code)
expect(tokens.map(t => [t.name, t.value])).toEqual([
['let-keyword_', 'let'],
['identifier', 'main'],
Expand All @@ -29,28 +39,28 @@ let main = (): Unit {
})

it('tokenize number literal simple', () => {
expect(tokenize('14')).toEqual([
expect(testTokenize('14')).toEqual([
{ name: 'number', value: '14', location: { start: 0, end: 1 } },
{ name: 'eof', value: '', location: { start: 2, end: 2 } }
])
})

it('tokenize string literal', () => {
expect(tokenize(`"string 123 \n ok"`)).toEqual([
expect(testTokenize(`"string 123 \n ok"`)).toEqual([
{ name: 'string', value: `"string 123 \n ok"`, location: { start: 0, end: 16 } },
{ name: 'eof', value: '', location: { start: 17, end: 17 } }
])
})

it('tokenize char literal', () => {
expect(tokenize(`'?'`)).toEqual([
expect(testTokenize(`'?'`)).toEqual([
{ name: 'char', value: `'?'`, location: { start: 0, end: 2 } },
{ name: 'eof', value: '', location: { start: 3, end: 3 } }
])
})

it('tokenize expression', () => {
const tokens = tokenize(`1+call("str").ok() / (12 - a())`)
const tokens = testTokenize(`1+call("str").ok() / (12 - a())`)
expect(tokens.map(t => [t.name, t.value])).toEqual([
['number', '1'],
['plus', '+'],
Expand Down
9 changes: 4 additions & 5 deletions src/lexer/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ export const lexerTokenNames = <const>[
'colon_',
'comma_',
'equals_',
'semicolon_',

// dynamic
'identifier',
Expand Down Expand Up @@ -102,7 +101,7 @@ export const isWhitespace = (char: string): boolean => char === ' ' || char ===

export const isNewline = (char: string): boolean => char === '\n' || char === '\r'

export const tokenize = (code: String): LexerToken[] => {
export const tokenize = (code: String): LexerToken[] | LexerToken => {
const pos = { pos: 0 }
const chars = code.split('')
const tokens: LexerToken[] = []
Expand All @@ -127,11 +126,11 @@ export const tokenize = (code: String): LexerToken[] => {
if (parseStringLiteral(chars, tokens, pos)) {
continue
}
throw Error(`unknown token \`${chars[pos.pos]}\``)
return createToken(<LexerTokenName>'unknown', chars[pos.pos], pos)
}

pos.pos++
tokens.push(createToken('eof', '', pos))
tokens.push(createToken('eof', '', pos, pos.pos - 1))

return tokens
}
Expand Down Expand Up @@ -244,7 +243,7 @@ const parseStringLiteral = (chars: string[], tokens: LexerToken[], pos: { pos: n
const createToken = (
name: LexerTokenName,
value: string, pos: { pos: number },
start: number = pos.pos - 1
start: number = pos.pos
): LexerToken => {
return { name, value, location: { start, end: pos.pos - 1 } }
}
Expand Down
50 changes: 50 additions & 0 deletions src/parser/locate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { LexerTokenName } from '../lexer/lexer'
import { ParseBranch, ParserTokenName, rules, TokenName, Transform } from './parser'

export const firstTokens = (token: TokenName): Set<LexerTokenName> => {
const rule = rules.get(<ParserTokenName>token)
if (rule) {
return new Set(rule.branches.flatMap(b => [...transformFirstTokens({
name: <ParserTokenName>token,
branch: b
})]))
} else {
return new Set([<LexerTokenName>token])
}
}

const transformFirstTokens = (transform: Transform, index: number = 0): Set<LexerTokenName> => {
if (index >= transform.branch.length) {
return new Set()
}
const tokens: Set<LexerTokenName> = new Set()
const t = <LexerTokenName>transform.branch[index]
const rule = rules.get(<ParserTokenName>t)
if (rule) {
if (canMatchEmpty(rule.name)) {
transformFirstTokens(transform, index + 1).forEach(t => tokens.add(t))
}
firstTokens(rule.name).forEach(t => tokens.add(t))
} else {
tokens.add(t)
}
return tokens
}

const canMatchEmpty = (token: TokenName): boolean => {
const rule = rules.get(<ParserTokenName>token)
if (rule) {
return rule.branches.some(b => branchCanMatchEmpty(b))
} else {
return token === 'e'
}
}

const branchCanMatchEmpty = (branch: ParseBranch): boolean => {
for (let t of branch) {
if (!canMatchEmpty(t)) {
return false
}
}
return true
}
5 changes: 4 additions & 1 deletion src/parser/parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { tokenize } from '../lexer/lexer'
import { expect } from '@jest/globals'
import { compactToken, flattenToken, parse, ParserTokenName, Token } from './parser'
import { prettySyntaxError } from '../error'
import { prettyLexerError, prettySyntaxError } from '../error'

describe('parser', () => {

const parseToken = (source: string, root: ParserTokenName = 'program'): Token => {
const tokens = tokenize(source)
if ('name' in tokens) {
throw Error(prettyLexerError(tokens))
}
const token = parse(tokens, root)
if ('expected' in token) {
throw Error(prettySyntaxError(token))
Expand Down
19 changes: 11 additions & 8 deletions src/parser/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { readFileSync } from 'fs'
import { join } from 'path'
import { SyntaxErrorInfo } from '../error'
import { LocationRange } from '../location'
import { firstTokens } from './locate'

export const parserTokenNames = <const>[
'program',
Expand Down Expand Up @@ -53,33 +54,37 @@ export type ParseBranch = TokenName[]
const rawRules = JSON.parse(readFileSync(join(__dirname, '..', 'grammar.json')).toString()).rules
export const rules: Map<ParserTokenName, Rule> = new Map(rawRules.map((r: Rule) => [r.name, r]))

export const firstTokensMap: Map<ParserTokenName, Set<LexerTokenName>> =
new Map([...rules.keys()].map((n) => [<ParserTokenName>n, firstTokens(n)]))

export const parse = (tokens: LexerToken[], node: TokenName = 'program'): Token | SyntaxErrorInfo => {
const token = parseToken(tokens, node)
if (token === true) {
return { tokenChain: [node], expected: [node], got: tokens[0].name, location: tokens[0].location }
return { expected: [node], got: tokens[0].name, location: tokens[0].location }
}
if ('expected' in token) {
return token
}
console.dir(compactToken(flattenToken(token)), { depth: null, colors: true, compact: true })
if (tokenSize(token) === tokens.length - 1) {
return token
} else {
return { tokenChain: [node], expected: ['eof'], got: tokens[0].name, location: tokens[0].location }
const lastParsed = tokens[tokenSize(token)]
return { expected: ['eof'], got: lastParsed.name, location: lastParsed.location }
}
}

export const parseToken = (tokens: LexerToken[],
node: TokenName = 'program',
index: number = 0,
tokenChain: TokenName[] = [node]
index: number = 0
): Token | SyntaxErrorInfo | true => {
const rule = rules.get(<ParserTokenName>node)!
if (rule) {
let syntaxError: SyntaxErrorInfo | undefined
for (const branch of rule.branches) {
if (isEmptyBranch(branch)) return true
const transform = { name: <ParserTokenName>node, branch }
const branchToken = parseTransform(transform, tokens, index, tokenChain)
const branchToken = parseTransform(transform, tokens, index)
if ('name' in branchToken) {
return branchToken
} else {
Expand All @@ -91,7 +96,6 @@ export const parseToken = (tokens: LexerToken[],
return syntaxError!
} else {
const error: SyntaxErrorInfo = {
tokenChain,
expected: [node],
got: tokens[index].name,
location: tokens[index].location
Expand All @@ -103,11 +107,10 @@ export const parseToken = (tokens: LexerToken[],
const parseTransform = (transform: Transform,
tokens: LexerToken[],
index: number,
tokenChain: TokenName[]
): Token | SyntaxErrorInfo => {
const nodes = []
for (const branchTokenName of transform.branch) {
const branchToken = parseToken(tokens, branchTokenName, index, [...tokenChain, transform.name])
const branchToken = parseToken(tokens, branchTokenName, index)
if (branchToken === true) continue
if ('expected' in branchToken) return branchToken
nodes.push(branchToken)
Expand Down

0 comments on commit 1051c25

Please sign in to comment.