-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add statement separator * Update separator*.go * Add test cases * Fix ineffassign lint error * Add test cases for separator_test.go * Update test desc * Rename to splitter and some change * Simplify control flow * Apply review comments * Fix test function name
- Loading branch information
Showing
2 changed files
with
105 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package memefish | ||
|
||
import "github.com/cloudspannerecosystem/memefish/token" | ||
|
||
// SplitRawStatements splits an input string to statement strings at terminating semicolons without parsing. | ||
// Statements are terminated by `;`, `<eof>` or `;<eof>` and the minimum output will be []string{""}. | ||
// See [terminating semicolons]. | ||
// This function won't panic but return error if lexer become error state. | ||
// filepath can be used in error message. | ||
// | ||
// [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons | ||
func SplitRawStatements(filepath, s string) ([]string, error) { | ||
lex := &Lexer{ | ||
File: &token.File{ | ||
FilePath: filepath, | ||
Buffer: s, | ||
}, | ||
} | ||
|
||
var result []string | ||
var firstPos token.Pos | ||
for { | ||
if lex.Token.Kind == ";" { | ||
result = append(result, s[firstPos:lex.Token.Pos]) | ||
if err := lex.NextToken(); err != nil { | ||
return nil, err | ||
} | ||
firstPos = lex.Token.Pos | ||
continue | ||
} | ||
|
||
err := lex.NextToken() | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
if lex.Token.Kind == token.TokenEOF { | ||
if lex.Token.Pos != firstPos { | ||
result = append(result, s[firstPos:lex.Token.Pos]) | ||
} | ||
break | ||
} | ||
} | ||
if len(result) == 0 { | ||
return []string{""}, nil | ||
} | ||
return result, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package memefish_test | ||
|
||
import ( | ||
"github.com/cloudspannerecosystem/memefish" | ||
"github.com/google/go-cmp/cmp" | ||
"regexp" | ||
"testing" | ||
) | ||
|
||
func TestSplitRawStatements(t *testing.T) { | ||
for _, test := range []struct { | ||
desc string | ||
input string | ||
errRe *regexp.Regexp | ||
want []string | ||
}{ | ||
// SplitRawStatements treats only lexical structures, so the test cases can be invalid statements. | ||
{desc: "empty input", input: "", want: []string{""}}, | ||
{desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}}, | ||
{desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}}, | ||
{desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}}, | ||
{desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}}, | ||
{desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}}, | ||
{desc: "two statement with new lines", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}}, | ||
{desc: "single statement with line comment", input: `SELECT 1// | ||
`, want: []string{"SELECT 1//\n"}}, | ||
{desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}}, | ||
{desc: "semicolon in multi-line comment", input: "SELECT 1 /*;\n*/ + 2", want: []string{"SELECT 1 /*;\n*/ + 2"}}, | ||
{desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}}, | ||
{desc: "semicolon in single-quoted string", input: `SELECT '1;2;3';`, want: []string{`SELECT '1;2;3'`}}, | ||
{desc: "semicolon in back-quote", input: "SELECT `1;2;3`;", want: []string{"SELECT `1;2;3`"}}, | ||
// $` may become a valid token in the future, but it's reasonable to check its current behavior. | ||
{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)}, | ||
} { | ||
t.Run(test.desc, func(t *testing.T) { | ||
stmts, err := memefish.SplitRawStatements("", test.input) | ||
if err != nil { | ||
if test.errRe == nil { | ||
t.Errorf("should success, but %v", err) | ||
return | ||
} | ||
if !test.errRe.MatchString(err.Error()) { | ||
t.Errorf("error message should match %q, but %q", test.errRe, err) | ||
return | ||
} | ||
} | ||
if err == nil && test.errRe != nil { | ||
t.Errorf("success, but should fail %q", test.errRe) | ||
return | ||
} | ||
if diff := cmp.Diff(stmts, test.want); diff != "" { | ||
t.Errorf("differs: %v", diff) | ||
return | ||
} | ||
}) | ||
} | ||
} |