From 0c0eb5eb9eb73e04a153331dd439cca9e07d4234 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 18:35:35 +0900 Subject: [PATCH 01/10] Add statement separator --- separator.go | 41 +++++++++++++++++++++++++++++++++++++++ separator_test.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 separator.go create mode 100644 separator_test.go diff --git a/separator.go b/separator.go new file mode 100644 index 00000000..784413d5 --- /dev/null +++ b/separator.go @@ -0,0 +1,41 @@ +package memefish + +import "github.com/cloudspannerecosystem/memefish/token" + +func SeparateRawStatements(filepath, s string) ([]string, error) { + lex := &Lexer{ + File: &token.File{ + FilePath: filepath, + Buffer: s, + }, + } + + var result []string + var firstPos token.Pos + for { + if lex.Token.Kind == ";" { + result = append(result, s[firstPos:lex.Token.Pos]) + lex.nextToken() + firstPos = lex.Token.Pos + continue + } + + err := lex.NextToken() + if err != nil { + return nil, err + } + + if lex.Token.Kind == token.TokenEOF { + if lex.Token.Pos == firstPos { + break + } + result = append(result, s[firstPos:lex.Token.Pos]) + firstPos = lex.Token.Pos + break + } + } + if len(result) == 0 { + return []string{""}, nil + } + return result, nil +} diff --git a/separator_test.go b/separator_test.go new file mode 100644 index 00000000..71f253dc --- /dev/null +++ b/separator_test.go @@ -0,0 +1,49 @@ +package memefish_test + +import ( + "github.com/cloudspannerecosystem/memefish" + "github.com/google/go-cmp/cmp" + "regexp" + "testing" +) + +func TestSeparateRawStatements(t *testing.T) { + for _, test := range []struct { + desc string + input string + errRe *regexp.Regexp + want []string + }{ + {desc: "empty input", input: "", want: []string{""}}, + {desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, + {desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, + {desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}}, + {desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, + {desc: "single statement with line comment", input: `SELECT 1// +`, want: []string{"SELECT 1//\n"}}, + {desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}}, + {desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, + } { + t.Run(test.desc, func(t *testing.T) { + stmts, err := memefish.SeparateRawStatements("", test.input) + if err != nil { + if test.errRe == nil { + t.Errorf("should success, but %v", err) + return + } + if !test.errRe.MatchString(err.Error()) { + t.Errorf("error message should match %q, but %q", test.errRe, err) + return + } + } + if err == nil && test.errRe != nil { + t.Errorf("success, but should fail %q", test.errRe) + return + } + if diff := cmp.Diff(stmts, test.want); diff != "" { + t.Errorf("differs: %v", diff) + return + } + }) + } +} From 7973a97199cf1a5fd91b5c20f71b9847be5f3080 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:00:17 +0900 Subject: [PATCH 02/10] Update separator*.go --- separator.go | 12 +++++++++++- separator_test.go | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/separator.go b/separator.go index 784413d5..90b425e8 100644 --- a/separator.go +++ b/separator.go @@ -2,6 +2,13 @@ package memefish import "github.com/cloudspannerecosystem/memefish/token" +// SeparateRawStatements separates s to statements without parsing. +// Statements are terminated by `;`, `` or `;` and the minimum output will be []string{""}. +// See [terminating semicolons]. +// This function won't panic but return error if lexer become error state. +// filepath can be used in error message. +// +// [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons func SeparateRawStatements(filepath, s string) ([]string, error) { lex := &Lexer{ File: &token.File{ @@ -15,7 +22,10 @@ func SeparateRawStatements(filepath, s string) ([]string, error) { for { if lex.Token.Kind == ";" { result = append(result, s[firstPos:lex.Token.Pos]) - lex.nextToken() + err := lex.NextToken() + if err != nil { + return nil, err + } firstPos = lex.Token.Pos continue } diff --git a/separator_test.go b/separator_test.go index 71f253dc..c470cfe7 100644 --- a/separator_test.go +++ b/separator_test.go @@ -22,6 +22,7 @@ func TestSeparateRawStatements(t *testing.T) { {desc: "single statement with line comment", input: `SELECT 1// `, want: []string{"SELECT 1//\n"}}, {desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}}, + // $` may become a valid token in the future, but it's reasonable to check its current behavior. {desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, } { t.Run(test.desc, func(t *testing.T) { From 9b1c1ad8a9b94f552a9958868cdd813888ecb481 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:22:31 +0900 Subject: [PATCH 03/10] Add test cases --- separator_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/separator_test.go b/separator_test.go index c470cfe7..f868a055 100644 --- a/separator_test.go +++ b/separator_test.go @@ -14,6 +14,7 @@ func TestSeparateRawStatements(t *testing.T) { errRe *regexp.Regexp want []string }{ + // SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements. {desc: "empty input", input: "", want: []string{""}}, {desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, {desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, @@ -21,7 +22,11 @@ func TestSeparateRawStatements(t *testing.T) { {desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, {desc: "single statement with line comment", input: `SELECT 1// `, want: []string{"SELECT 1//\n"}}, + {desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}}, + {desc: "semicolon in multi-line comment", input: "SELECT 1 /*;\n*/ + 2", want: []string{"SELECT 1 /*;\n*/ + 2"}}, {desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}}, + {desc: "semicolon in single-quoted string", input: `SELECT '1;2;3';`, want: []string{`SELECT '1;2;3'`}}, + {desc: "semicolon in back-quote", input: "SELECT `1;2;3`;", want: []string{"SELECT `1;2;3`"}}, // $` may become a valid token in the future, but it's reasonable to check its current behavior. {desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, } { From a4b895d89598d44067944340b027a0c34aeb6065 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:23:47 +0900 Subject: [PATCH 04/10] Fix ineffassign lint error --- separator.go | 1 - 1 file changed, 1 deletion(-) diff --git a/separator.go b/separator.go index 90b425e8..a1e3545d 100644 --- a/separator.go +++ b/separator.go @@ -40,7 +40,6 @@ func SeparateRawStatements(filepath, s string) ([]string, error) { break } result = append(result, s[firstPos:lex.Token.Pos]) - firstPos = lex.Token.Pos break } } From 91f2e58577c74db21ff4eb33861185362f94ddb9 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:27:53 +0900 Subject: [PATCH 05/10] Add test cases for separator_test.go --- separator_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/separator_test.go b/separator_test.go index f868a055..2aa63f2c 100644 --- a/separator_test.go +++ b/separator_test.go @@ -16,8 +16,10 @@ func TestSeparateRawStatements(t *testing.T) { }{ // SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements. {desc: "empty input", input: "", want: []string{""}}, - {desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, - {desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, + {desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, + {desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}}, + {desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, + {desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}}, {desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}}, {desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, {desc: "single statement with line comment", input: `SELECT 1// From 838545d184c5fd19a001184104b65ac250d54293 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:28:44 +0900 Subject: [PATCH 06/10] Update test desc --- separator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/separator_test.go b/separator_test.go index 2aa63f2c..3d8252dc 100644 --- a/separator_test.go +++ b/separator_test.go @@ -21,7 +21,7 @@ func TestSeparateRawStatements(t *testing.T) { {desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, {desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}}, {desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}}, - {desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, + {desc: "two statement with new lines", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, {desc: "single statement with line comment", input: `SELECT 1// `, want: []string{"SELECT 1//\n"}}, {desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}}, From 0e6b0029e29eea8eabf6e940ccfe632b1cec0f47 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Sat, 21 Sep 2024 20:00:09 +0900 Subject: [PATCH 07/10] Rename to splitter and some change --- separator.go => splitter.go | 10 +++++----- separator_test.go => splitter_test.go | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) rename separator.go => splitter.go (76%) rename separator_test.go => splitter_test.go (93%) diff --git a/separator.go b/splitter.go similarity index 76% rename from separator.go rename to splitter.go index a1e3545d..929a28d4 100644 --- a/separator.go +++ b/splitter.go @@ -2,18 +2,18 @@ package memefish import "github.com/cloudspannerecosystem/memefish/token" -// SeparateRawStatements separates s to statements without parsing. +// SplitRawStatements splits input to statement strings at terminating semicolons without parsing. // Statements are terminated by `;`, `` or `;` and the minimum output will be []string{""}. // See [terminating semicolons]. // This function won't panic but return error if lexer become error state. // filepath can be used in error message. // // [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons -func SeparateRawStatements(filepath, s string) ([]string, error) { +func SplitRawStatements(filepath, input string) ([]string, error) { lex := &Lexer{ File: &token.File{ FilePath: filepath, - Buffer: s, + Buffer: input, }, } @@ -21,7 +21,7 @@ func SeparateRawStatements(filepath, s string) ([]string, error) { var firstPos token.Pos for { if lex.Token.Kind == ";" { - result = append(result, s[firstPos:lex.Token.Pos]) + result = append(result, input[firstPos:lex.Token.Pos]) err := lex.NextToken() if err != nil { return nil, err @@ -39,7 +39,7 @@ func SeparateRawStatements(filepath, s string) ([]string, error) { if lex.Token.Pos == firstPos { break } - result = append(result, s[firstPos:lex.Token.Pos]) + result = append(result, input[firstPos:lex.Token.Pos]) break } } diff --git a/separator_test.go b/splitter_test.go similarity index 93% rename from separator_test.go rename to splitter_test.go index 3d8252dc..31f31bfa 100644 --- a/separator_test.go +++ b/splitter_test.go @@ -14,7 +14,7 @@ func TestSeparateRawStatements(t *testing.T) { errRe *regexp.Regexp want []string }{ - // SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements. + // SplitRawStatements treats only lexical structures, so the test cases can be invalid statements. {desc: "empty input", input: "", want: []string{""}}, {desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, {desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}}, @@ -33,7 +33,7 @@ func TestSeparateRawStatements(t *testing.T) { {desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, } { t.Run(test.desc, func(t *testing.T) { - stmts, err := memefish.SeparateRawStatements("", test.input) + stmts, err := memefish.SplitRawStatements("", test.input) if err != nil { if test.errRe == nil { t.Errorf("should success, but %v", err) From b75c307e06e4440477b5cca5c8d796e39d4e5782 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Sat, 21 Sep 2024 20:13:50 +0900 Subject: [PATCH 08/10] Simplify control flow --- splitter.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/splitter.go b/splitter.go index 929a28d4..4d15e5a9 100644 --- a/splitter.go +++ b/splitter.go @@ -22,8 +22,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) { for { if lex.Token.Kind == ";" { result = append(result, input[firstPos:lex.Token.Pos]) - err := lex.NextToken() - if err != nil { + if err := lex.NextToken(); err != nil { return nil, err } firstPos = lex.Token.Pos @@ -36,10 +35,9 @@ func SplitRawStatements(filepath, input string) ([]string, error) { } if lex.Token.Kind == token.TokenEOF { - if lex.Token.Pos == firstPos { - break + if lex.Token.Pos != firstPos { + result = append(result, input[firstPos:lex.Token.Pos]) } - result = append(result, input[firstPos:lex.Token.Pos]) break } } From 387eeb57b09721d8264b9e7de411707f5a2dc9c6 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Sat, 21 Sep 2024 21:46:34 +0900 Subject: [PATCH 09/10] Apply review comments --- splitter.go => split.go | 10 +++++----- splitter_test.go => split_test.go | 0 2 files changed, 5 insertions(+), 5 deletions(-) rename splitter.go => split.go (75%) rename splitter_test.go => split_test.go (100%) diff --git a/splitter.go b/split.go similarity index 75% rename from splitter.go rename to split.go index 4d15e5a9..9f0193ce 100644 --- a/splitter.go +++ b/split.go @@ -2,18 +2,18 @@ package memefish import "github.com/cloudspannerecosystem/memefish/token" -// SplitRawStatements splits input to statement strings at terminating semicolons without parsing. +// SplitRawStatements splits an input string to statement strings at terminating semicolons without parsing. // Statements are terminated by `;`, `` or `;` and the minimum output will be []string{""}. // See [terminating semicolons]. // This function won't panic but return error if lexer become error state. // filepath can be used in error message. // // [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons -func SplitRawStatements(filepath, input string) ([]string, error) { +func SplitRawStatements(filepath, s string) ([]string, error) { lex := &Lexer{ File: &token.File{ FilePath: filepath, - Buffer: input, + Buffer: s, }, } @@ -21,7 +21,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) { var firstPos token.Pos for { if lex.Token.Kind == ";" { - result = append(result, input[firstPos:lex.Token.Pos]) + result = append(result, s[firstPos:lex.Token.Pos]) if err := lex.NextToken(); err != nil { return nil, err } @@ -36,7 +36,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) { if lex.Token.Kind == token.TokenEOF { if lex.Token.Pos != firstPos { - result = append(result, input[firstPos:lex.Token.Pos]) + result = append(result, s[firstPos:lex.Token.Pos]) } break } diff --git a/splitter_test.go b/split_test.go similarity index 100% rename from splitter_test.go rename to split_test.go From 070c041a5685ec4e34f8baa2d81e1f01801a0aa1 Mon Sep 17 00:00:00 2001 From: apstndb <803393+apstndb@users.noreply.github.com> Date: Sat, 21 Sep 2024 21:47:27 +0900 Subject: [PATCH 10/10] Fix test function name --- split_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/split_test.go b/split_test.go index 31f31bfa..d9b8a5b6 100644 --- a/split_test.go +++ b/split_test.go @@ -7,7 +7,7 @@ import ( "testing" ) -func TestSeparateRawStatements(t *testing.T) { +func TestSplitRawStatements(t *testing.T) { for _, test := range []struct { desc string input string