Skip to content

Commit

Permalink
Lexer: optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanjermakov committed Feb 12, 2024
1 parent 3c9ad15 commit ad964db
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 49 deletions.
9 changes: 5 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
"description": "Statically typed programming language for the web",
"scripts": {
"run": "bun run src/index.ts",
"build": "npm run build:compile && npm run build:std && npm run build:prepublish",
"build": "npm run build:compile && npm run build:std",
"build:compile": "tsc",
"build:std": "bun --target=node build-std-index.ts && cp -r src/std dist",
"build:prepublish": "cp package.json dist && find dist -name \"*.spec.*\" -delete",
"build:node": "npm run build && npm run build:node:imports",
"build:node": "npm run clean && npm run build && npm run build:node:imports",
"build:node:imports": "bun --target=node node-import-transform.ts",
"prepublish": "cp package.json dist && find dist -name \"*.spec.*\" -delete",
"publish": "npm run build && npm run prepublish",
"run:node": "node dist",
"test": "bun test",
"ci": "npm run test",
"clean": "rm -r dist"
"clean": "rm -rf dist"
},
"type": "module",
"main": "dist/index.js",
Expand Down
2 changes: 1 addition & 1 deletion src/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ describe('nois', () => {
const parser = new Parser(tokens)
parseModule(parser)

expect(parser.errors.length).toEqual(0)
expect(parser.errors).toEqual([])

const root = parser.buildTree()

Expand Down
77 changes: 33 additions & 44 deletions src/lexer/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export interface LexerToken {
span: Span
}

export const lexerKeywordMap: Map<TokenKind, string> = new Map([
export const lexerKeywordMap: [TokenKind, string][] = [
['use-keyword', 'use'],
['type-keyword', 'type'],
['trait-keyword', 'trait'],
Expand Down Expand Up @@ -115,9 +115,13 @@ export const lexerKeywordMap: Map<TokenKind, string> = new Map([
['equals', '='],
['underscore', '_'],
['at', '@']
])
]

export const boolMap: [TokenKind, string][] = [
['bool', 'true'],
['bool', 'false']
]

const boolRegex = /^(true|false)/
const floatRegex = /^((\d+(\.\d*)?e[+-]?\d+)|(\d+\.\d*)|(\d*\.\d+))/
const escapeCharReg = /(\\[btnvfr\\'"])/
const unicodeCharReg = /(\\u{[0-9a-fA-F]{1,4}})/
Expand Down Expand Up @@ -160,11 +164,11 @@ export const tokenize = (code: string): LexerToken[] => {
}

const fns = [
parseBool,
parseFloat,
parseInt,
parseComment,
parseConstToken,
parseConstToken(lexerKeywordMap),
parseConstToken(boolMap),
parseName,
parseCharLiteral,
parseStringLiteral
Expand Down Expand Up @@ -202,6 +206,7 @@ export const tokenize = (code: string): LexerToken[] => {
}

const parseComment = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
if (chars[pos.pos] !== '/') return false
if (chars.slice(pos.pos, pos.pos + 2).join('') === '//') {
const start = pos.pos
const buffer: string[] = []
Expand All @@ -215,49 +220,32 @@ const parseComment = (chars: string[], tokens: LexerToken[], pos: { pos: number
return false
}

const parseConstToken = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
const codeLeft = chars.slice(pos.pos).join('')
const pair = [...lexerKeywordMap.entries()].find(([, v]) => {
if (!isAlpha(v[0])) {
return codeLeft.startsWith(v)
} else {
const trailingChar = codeLeft.at(v.length)
return codeLeft.startsWith(v) && (!trailingChar || !isAlpha(trailingChar))
const parseConstToken =
(constMap: [TokenKind, string][]) =>
(chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
for (const [kind, value] of constMap) {
if (chars[pos.pos] !== value[0]) continue
const codeLeft = chars.slice(pos.pos).join('')
const trailingChar = codeLeft.at(value.length)
if (codeLeft.startsWith(value) && (!isAlpha(value[0]) || !trailingChar || !isAlpha(trailingChar))) {
const start = pos.pos
pos.pos += value.length
tokens.push(createToken(kind, value, pos, start))
return true
}
}
})
if (pair) {
const [kind, value] = pair
const start = pos.pos
pos.pos += value.length
tokens.push(createToken(kind, value, pos, start))
return true
return false
}
return false
}

const parseName = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
if (isAlpha(chars[pos.pos])) {
const start = pos.pos
const name: string[] = []
while (isAlpha(chars[pos.pos]) || isNumeric(chars[pos.pos])) {
name.push(chars[pos.pos])
pos.pos++
}
tokens.push(createToken('name', name.join(''), pos, start))
return true
}
return false
}

const parseBool = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
const leftCode = chars.slice(pos.pos).join('')
const match = leftCode.match(boolRegex)
if (!match) return false

const bool = match[0]
if (!isAlpha(chars[pos.pos])) return false
const start = pos.pos
pos.pos += bool.length
tokens.push(createToken('bool', bool, pos, start))
const name: string[] = []
while (isAlpha(chars[pos.pos]) || isNumeric(chars[pos.pos])) {
name.push(chars[pos.pos])
pos.pos++
}
tokens.push(createToken('name', name.join(''), pos, start))
return true
}

Expand All @@ -275,6 +263,7 @@ const parseFloat = (chars: string[], tokens: LexerToken[], pos: { pos: number })
}

const parseInt = (chars: string[], tokens: LexerToken[], pos: { pos: number }): boolean => {
if (!isNumeric(chars[pos.pos])) return false
const start = pos.pos
let int = ''
while (isNumeric(chars[pos.pos])) {
Expand Down Expand Up @@ -351,7 +340,7 @@ const parseUnterminatedString = (chars: string[], tokens: LexerToken[], pos: { p
return
}

const createToken = (name: TokenKind, value: string, pos: { pos: number }, start: number = pos.pos): LexerToken => {
const createToken = (name: TokenKind, value: string, pos: { pos: number }, start: number): LexerToken => {
return { kind: name, value, span: { start, end: pos.pos } }
}

Expand Down

0 comments on commit ad964db

Please sign in to comment.