Skip to content

Commit 77519e4

Browse files
authored
Merge PR #277: Nearley integration
2 parents f8069cf + 22bd711 commit 77519e4

File tree

15 files changed

+1309
-707
lines changed

15 files changed

+1309
-707
lines changed

.eslintignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
/coverage
55
/vscode
66
webpack.*.js
7+
/src/parser/grammar.ts

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ node_modules
44
.DS_Store
55
coverage
66
.eslintcache
7+
src/parser/grammar.ts

.prettierignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
/lib
44
/node_modules
55
yarn.lock
6-
LICENSE
6+
LICENSE
7+
/src/parser/grammar.ts

package.json

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,16 @@
8282
"pretty": "prettier --write .",
8383
"pretty:check": "prettier --check .",
8484
"fix": "yarn pretty && eslint --fix .",
85-
"test": "jest",
85+
"test": "yarn grammar && jest",
8686
"test:watch": "yarn test -- --watch",
8787
"check": "yarn ts:check && yarn pretty:check && yarn lint && yarn test",
88-
"prepare": "yarn clean && yarn fix && yarn check && yarn build",
88+
"prepare": "yarn clean && yarn grammar && yarn fix && yarn check && yarn build",
8989
"pre-commit": "npm-run-all --parallel ts:changes lint:changes",
90+
"grammar": "nearleyc src/parser/grammar.ne -o src/parser/grammar.ts",
9091
"build:babel": "babel src --out-dir lib --extensions .ts --source-maps",
9192
"build:types": "ttsc --module commonjs --emitDeclarationOnly --isolatedModules",
9293
"build:minified": "webpack --config webpack.prod.js",
93-
"build": "npm-run-all --parallel build:babel build:types build:minified",
94+
"build": "yarn grammar && npm-run-all --parallel build:babel build:types build:minified",
9495
"release": "release-it"
9596
},
9697
"repository": {
@@ -101,7 +102,8 @@
101102
"url": "https://github.com/sql-formatter-org/sql-formatter/issues"
102103
},
103104
"dependencies": {
104-
"argparse": "^2.0.1"
105+
"argparse": "^2.0.1",
106+
"nearley": "^2.20.1"
105107
},
106108
"devDependencies": {
107109
"@babel/cli": "^7.10.4",
@@ -112,6 +114,7 @@
112114
"@jest/globals": "^28.1.2",
113115
"@types/babel__core": "^7.1.15",
114116
"@types/jest": "^28.1.4",
117+
"@types/nearley": "^2.11.2",
115118
"@typescript-eslint/eslint-plugin": "^5.21.0",
116119
"@typescript-eslint/parser": "^5.21.0",
117120
"@zerollup/ts-transform-paths": "^1.7.18",

src/formatter/Formatter.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { indentString } from 'src/formatter/config';
33
import Params from 'src/formatter/Params';
44
import Tokenizer from 'src/lexer/Tokenizer';
55

6-
import Parser from 'src/parser/Parser';
6+
import { createParser } from 'src/parser/createParser';
77
import { Statement } from 'src/parser/ast';
88

99
import formatCommaPositions from './formatCommaPositions';
@@ -54,8 +54,7 @@ export default class Formatter {
5454
}
5555

5656
private parse(query: string): Statement[] {
57-
const tokens = this.cachedTokenizer().tokenize(query, this.cfg.paramTypes || {});
58-
return new Parser(tokens).parse();
57+
return createParser(this.cachedTokenizer()).parse(query, this.cfg.paramTypes || {});
5958
}
6059

6160
private formatAst(statements: Statement[]): string {

src/lexer/disambiguateTokens.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import { Token, TokenType } from 'src/lexer/token';
2+
3+
/**
4+
* Ensures that all RESERVED_FUNCTION_NAME tokens are followed by "(".
5+
* If they're not, converts the token to RESERVED_KEYWORD.
6+
*
7+
* When IDENTIFIER and RESERVED_KEYWORD token is followed by "["
8+
* converts it to ARRAY_IDENTIFIER or ARRAY_KEYWORD accordingly.
9+
*
10+
* This is needed to avoid ambiguity in parser which expects function names
11+
* to always be followed by open-paren, and to distinguish between
12+
* array accessor `foo[1]` and array literal `[1, 2, 3]`.
13+
*/
14+
export function disambiguateTokens(tokens: Token[]): Token[] {
15+
return tokens.map((token, i) => {
16+
if (token.type === TokenType.RESERVED_FUNCTION_NAME) {
17+
const nextToken = tokens[i + 1];
18+
if (!nextToken || !isOpenParen(nextToken)) {
19+
return { ...token, type: TokenType.RESERVED_KEYWORD };
20+
}
21+
}
22+
if (token.type === TokenType.IDENTIFIER) {
23+
const nextToken = tokens[i + 1];
24+
if (nextToken && isOpenBracket(nextToken)) {
25+
return { ...token, type: TokenType.ARRAY_IDENTIFIER };
26+
}
27+
}
28+
if (token.type === TokenType.RESERVED_KEYWORD) {
29+
const nextToken = tokens[i + 1];
30+
if (nextToken && isOpenBracket(nextToken)) {
31+
return { ...token, type: TokenType.ARRAY_KEYWORD };
32+
}
33+
}
34+
return token;
35+
});
36+
}
37+
38+
const isOpenParen = (t: Token): boolean => t.type === TokenType.OPEN_PAREN && t.text === '(';
39+
40+
const isOpenBracket = (t: Token): boolean => t.type === TokenType.OPEN_PAREN && t.text === '[';

src/lexer/token.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ export enum TokenType {
1212
RESERVED_COMMAND = 'RESERVED_COMMAND',
1313
RESERVED_SELECT = 'RESERVED_SELECT',
1414
RESERVED_JOIN = 'RESERVED_JOIN',
15+
ARRAY_IDENTIFIER = 'ARRAY_IDENTIFIER', // IDENTIFIER token in front of [
16+
ARRAY_KEYWORD = 'ARRAY_KEYWORD', // RESERVED_KEYWORD token in front of [
1517
CASE = 'CASE',
1618
END = 'END',
1719
LIMIT = 'LIMIT',
@@ -83,6 +85,7 @@ export const isReserved = (token: Token): boolean =>
8385
token.type === TokenType.RESERVED_SELECT ||
8486
token.type === TokenType.RESERVED_SET_OPERATION ||
8587
token.type === TokenType.RESERVED_JOIN ||
88+
token.type === TokenType.ARRAY_KEYWORD ||
8689
token.type === TokenType.CASE ||
8790
token.type === TokenType.END ||
8891
token.type === TokenType.LIMIT ||

src/parser/LexerAdapter.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import { Token, TokenType } from 'src/lexer/token';
2+
3+
// Nearly type definitions say that Token must have a value field,
4+
// which however is wrong. Instead Nearley expects a text field.
5+
type NearleyToken = Token & { value: string };
6+
7+
export default class LexerAdapter {
8+
private index = 0;
9+
private tokens: Token[] = [];
10+
11+
constructor(private tokenize: (chunk: string) => Token[]) {}
12+
13+
reset(chunk: string, _info: any) {
14+
this.index = 0;
15+
this.tokens = this.tokenize(chunk);
16+
}
17+
18+
next(): NearleyToken | undefined {
19+
return this.tokens[this.index++] as NearleyToken | undefined;
20+
}
21+
22+
save(): any {}
23+
24+
formatError(token: NearleyToken) {
25+
return `Parse error at token: ${token.text}`;
26+
}
27+
28+
has(name: string): boolean {
29+
return name in TokenType;
30+
}
31+
}

src/parser/Parser.ts

Lines changed: 0 additions & 211 deletions
This file was deleted.

0 commit comments

Comments
 (0)