sql-formatter-org
diff --git a/‎.eslintignore‎
Lines changed: 1 addition & 0 deletions b/‎.eslintignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.prettierignore‎
Lines changed: 2 additions & 1 deletion b/‎.prettierignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎package.json‎
Lines changed: 7 additions & 4 deletions b/‎package.json‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/formatter/Formatter.ts‎
Lines changed: 2 additions & 3 deletions b/‎src/formatter/Formatter.ts‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/lexer/disambiguateTokens.ts‎
Lines changed: 40 additions & 0 deletions b/‎src/lexer/disambiguateTokens.ts‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/lexer/token.ts‎
Lines changed: 3 additions & 0 deletions b/‎src/lexer/token.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/parser/LexerAdapter.ts‎
Lines changed: 31 additions & 0 deletions b/‎src/parser/LexerAdapter.ts‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/parser/Parser.ts‎
Lines changed: 0 additions & 211 deletions b/‎src/parser/Parser.ts‎
Lines changed: 0 additions & 211 deletions
@@ -4,3 +4,4 @@
 /coverage
 /vscode
 webpack.*.js
+/src/parser/grammar.ts
@@ -4,3 +4,4 @@ node_modules
 .DS_Store
 coverage
 .eslintcache
+src/parser/grammar.ts
@@ -3,4 +3,5 @@
 /lib
 /node_modules
 yarn.lock
-LICENSE
+LICENSE
+/src/parser/grammar.ts
@@ -82,15 +82,16 @@
     "pretty": "prettier --write .",
     "pretty:check": "prettier --check .",
     "fix": "yarn pretty && eslint --fix .",
-    "test": "jest",
+    "test": "yarn grammar && jest",
     "test:watch": "yarn test -- --watch",
     "check": "yarn ts:check && yarn pretty:check && yarn lint && yarn test",
-    "prepare": "yarn clean && yarn fix && yarn check && yarn build",
+    "prepare": "yarn clean && yarn grammar && yarn fix && yarn check && yarn build",
     "pre-commit": "npm-run-all --parallel ts:changes lint:changes",
+    "grammar": "nearleyc src/parser/grammar.ne -o src/parser/grammar.ts",
     "build:babel": "babel src --out-dir lib --extensions .ts --source-maps",
     "build:types": "ttsc --module commonjs --emitDeclarationOnly --isolatedModules",
     "build:minified": "webpack --config webpack.prod.js",
-    "build": "npm-run-all --parallel build:babel build:types build:minified",
+    "build": "yarn grammar && npm-run-all --parallel build:babel build:types build:minified",
     "release": "release-it"
   },
   "repository": {
@@ -101,7 +102,8 @@
     "url": "https://github.com/sql-formatter-org/sql-formatter/issues"
   },
   "dependencies": {
-    "argparse": "^2.0.1"
+    "argparse": "^2.0.1",
+    "nearley": "^2.20.1"
   },
   "devDependencies": {
     "@babel/cli": "^7.10.4",
@@ -112,6 +114,7 @@
     "@jest/globals": "^28.1.2",
     "@types/babel__core": "^7.1.15",
     "@types/jest": "^28.1.4",
+    "@types/nearley": "^2.11.2",
     "@typescript-eslint/eslint-plugin": "^5.21.0",
     "@typescript-eslint/parser": "^5.21.0",
     "@zerollup/ts-transform-paths": "^1.7.18",
 
@@ -3,7 +3,7 @@ import { indentString } from 'src/formatter/config';
 import Params from 'src/formatter/Params';
 import Tokenizer from 'src/lexer/Tokenizer';
 
-import Parser from 'src/parser/Parser';
+import { createParser } from 'src/parser/createParser';
 import { Statement } from 'src/parser/ast';
 
 import formatCommaPositions from './formatCommaPositions';
@@ -54,8 +54,7 @@ export default class Formatter {
   }
 
   private parse(query: string): Statement[] {
-    const tokens = this.cachedTokenizer().tokenize(query, this.cfg.paramTypes || {});
-    return new Parser(tokens).parse();
+    return createParser(this.cachedTokenizer()).parse(query, this.cfg.paramTypes || {});
   }
 
   private formatAst(statements: Statement[]): string {
 
@@ -0,0 +1,40 @@
+import { Token, TokenType } from 'src/lexer/token';
+
+/**
+ * Ensures that all RESERVED_FUNCTION_NAME tokens are followed by "(".
+ * If they're not, converts the token to RESERVED_KEYWORD.
+ *
+ * When IDENTIFIER and RESERVED_KEYWORD token is followed by "["
+ * converts it to ARRAY_IDENTIFIER or ARRAY_KEYWORD accordingly.
+ *
+ * This is needed to avoid ambiguity in parser which expects function names
+ * to always be followed by open-paren, and to distinguish between
+ * array accessor `foo[1]` and array literal `[1, 2, 3]`.
+ */
+export function disambiguateTokens(tokens: Token[]): Token[] {
+  return tokens.map((token, i) => {
+    if (token.type === TokenType.RESERVED_FUNCTION_NAME) {
+      const nextToken = tokens[i + 1];
+      if (!nextToken || !isOpenParen(nextToken)) {
+        return { ...token, type: TokenType.RESERVED_KEYWORD };
+      }
+    }
+    if (token.type === TokenType.IDENTIFIER) {
+      const nextToken = tokens[i + 1];
+      if (nextToken && isOpenBracket(nextToken)) {
+        return { ...token, type: TokenType.ARRAY_IDENTIFIER };
+      }
+    }
+    if (token.type === TokenType.RESERVED_KEYWORD) {
+      const nextToken = tokens[i + 1];
+      if (nextToken && isOpenBracket(nextToken)) {
+        return { ...token, type: TokenType.ARRAY_KEYWORD };
+      }
+    }
+    return token;
+  });
+}
+
+const isOpenParen = (t: Token): boolean => t.type === TokenType.OPEN_PAREN && t.text === '(';
+
+const isOpenBracket = (t: Token): boolean => t.type === TokenType.OPEN_PAREN && t.text === '[';
@@ -12,6 +12,8 @@ export enum TokenType {
   RESERVED_COMMAND = 'RESERVED_COMMAND',
   RESERVED_SELECT = 'RESERVED_SELECT',
   RESERVED_JOIN = 'RESERVED_JOIN',
+  ARRAY_IDENTIFIER = 'ARRAY_IDENTIFIER', // IDENTIFIER token in front of [
+  ARRAY_KEYWORD = 'ARRAY_KEYWORD', // RESERVED_KEYWORD token in front of [
   CASE = 'CASE',
   END = 'END',
   LIMIT = 'LIMIT',
@@ -83,6 +85,7 @@ export const isReserved = (token: Token): boolean =>
   token.type === TokenType.RESERVED_SELECT ||
   token.type === TokenType.RESERVED_SET_OPERATION ||
   token.type === TokenType.RESERVED_JOIN ||
+  token.type === TokenType.ARRAY_KEYWORD ||
   token.type === TokenType.CASE ||
   token.type === TokenType.END ||
   token.type === TokenType.LIMIT ||
 
@@ -0,0 +1,31 @@
+import { Token, TokenType } from 'src/lexer/token';
+
+// Nearly type definitions say that Token must have a value field,
+// which however is wrong.  Instead Nearley expects a text field.
+type NearleyToken = Token & { value: string };
+
+export default class LexerAdapter {
+  private index = 0;
+  private tokens: Token[] = [];
+
+  constructor(private tokenize: (chunk: string) => Token[]) {}
+
+  reset(chunk: string, _info: any) {
+    this.index = 0;
+    this.tokens = this.tokenize(chunk);
+  }
+
+  next(): NearleyToken | undefined {
+    return this.tokens[this.index++] as NearleyToken | undefined;
+  }
+
+  save(): any {}
+
+  formatError(token: NearleyToken) {
+    return `Parse error at token: ${token.text}`;
+  }
+
+  has(name: string): boolean {
+    return name in TokenType;
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@ import { indentString } from 'src/formatter/config';`
`3`	`3`	`import Params from 'src/formatter/Params';`
`4`	`4`	`import Tokenizer from 'src/lexer/Tokenizer';`
`5`	`5`
`6`		`-import Parser from 'src/parser/Parser';`
	`6`	`+import { createParser } from 'src/parser/createParser';`
`7`	`7`	`import { Statement } from 'src/parser/ast';`
`8`	`8`
`9`	`9`	`import formatCommaPositions from './formatCommaPositions';`
`@@ -54,8 +54,7 @@ export default class Formatter {`
`54`	`54`	`}`
`55`	`55`
`56`	`56`	`private parse(query: string): Statement[] {`
`57`		`- const tokens = this.cachedTokenizer().tokenize(query, this.cfg.paramTypes \|\| {});`
`58`		`- return new Parser(tokens).parse();`
	`57`	`+ return createParser(this.cachedTokenizer()).parse(query, this.cfg.paramTypes \|\| {});`
`59`	`58`	`}`
`60`	`59`
`61`	`60`	`private formatAst(statements: Statement[]): string {`