From fe5ac1747a8a16055daf88a16d3a2eb07059c2cc Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Sat, 21 Sep 2024 21:57:20 +0900 Subject: [PATCH] Add SplitRawStatements() (#102) * Add statement separator * Update separator*.go * Add test cases * Fix ineffassign lint error * Add test cases for separator_test.go * Update test desc * Rename to splitter and some change * Simplify control flow * Apply review comments * Fix test function name --- split.go | 48 +++++++++++++++++++++++++++++++++++++++++++ split_test.go | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 split.go create mode 100644 split_test.go diff --git a/split.go b/split.go new file mode 100644 index 00000000..9f0193ce --- /dev/null +++ b/split.go @@ -0,0 +1,48 @@ +package memefish + +import "github.com/cloudspannerecosystem/memefish/token" + +// SplitRawStatements splits an input string to statement strings at terminating semicolons without parsing. +// Statements are terminated by `;`, `` or `;` and the minimum output will be []string{""}. +// See [terminating semicolons]. +// This function won't panic but return error if lexer become error state. +// filepath can be used in error message. +// +// [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons +func SplitRawStatements(filepath, s string) ([]string, error) { + lex := &Lexer{ + File: &token.File{ + FilePath: filepath, + Buffer: s, + }, + } + + var result []string + var firstPos token.Pos + for { + if lex.Token.Kind == ";" { + result = append(result, s[firstPos:lex.Token.Pos]) + if err := lex.NextToken(); err != nil { + return nil, err + } + firstPos = lex.Token.Pos + continue + } + + err := lex.NextToken() + if err != nil { + return nil, err + } + + if lex.Token.Kind == token.TokenEOF { + if lex.Token.Pos != firstPos { + result = append(result, s[firstPos:lex.Token.Pos]) + } + break + } + } + if len(result) == 0 { + return []string{""}, nil + } + return result, nil +} diff --git a/split_test.go b/split_test.go new file mode 100644 index 00000000..d9b8a5b6 --- /dev/null +++ b/split_test.go @@ -0,0 +1,57 @@ +package memefish_test + +import ( + "github.com/cloudspannerecosystem/memefish" + "github.com/google/go-cmp/cmp" + "regexp" + "testing" +) + +func TestSplitRawStatements(t *testing.T) { + for _, test := range []struct { + desc string + input string + errRe *regexp.Regexp + want []string + }{ + // SplitRawStatements treats only lexical structures, so the test cases can be invalid statements. + {desc: "empty input", input: "", want: []string{""}}, + {desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, + {desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}}, + {desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, + {desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}}, + {desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}}, + {desc: "two statement with new lines", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, + {desc: "single statement with line comment", input: `SELECT 1// +`, want: []string{"SELECT 1//\n"}}, + {desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}}, + {desc: "semicolon in multi-line comment", input: "SELECT 1 /*;\n*/ + 2", want: []string{"SELECT 1 /*;\n*/ + 2"}}, + {desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}}, + {desc: "semicolon in single-quoted string", input: `SELECT '1;2;3';`, want: []string{`SELECT '1;2;3'`}}, + {desc: "semicolon in back-quote", input: "SELECT `1;2;3`;", want: []string{"SELECT `1;2;3`"}}, + // $` may become a valid token in the future, but it's reasonable to check its current behavior. + {desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, + } { + t.Run(test.desc, func(t *testing.T) { + stmts, err := memefish.SplitRawStatements("", test.input) + if err != nil { + if test.errRe == nil { + t.Errorf("should success, but %v", err) + return + } + if !test.errRe.MatchString(err.Error()) { + t.Errorf("error message should match %q, but %q", test.errRe, err) + return + } + } + if err == nil && test.errRe != nil { + t.Errorf("success, but should fail %q", test.errRe) + return + } + if diff := cmp.Diff(stmts, test.want); diff != "" { + t.Errorf("differs: %v", diff) + return + } + }) + } +}