Skip to content

Commit

Permalink
Add SplitRawStatements() (#102)
Browse files Browse the repository at this point in the history
* Add statement separator

* Update separator*.go

* Add test cases

* Fix ineffassign lint error

* Add test cases for separator_test.go

* Update test desc

* Rename to splitter and some change

* Simplify control flow

* Apply review comments

* Fix test function name
  • Loading branch information
apstndb authored Sep 21, 2024
1 parent 64f802f commit fe5ac17
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
48 changes: 48 additions & 0 deletions split.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package memefish

import "github.com/cloudspannerecosystem/memefish/token"

// SplitRawStatements splits an input string to statement strings at terminating semicolons without parsing.
// Statements are terminated by `;`, `<eof>` or `;<eof>` and the minimum output will be []string{""}.
// See [terminating semicolons].
// This function won't panic but return error if lexer become error state.
// filepath can be used in error message.
//
// [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons
func SplitRawStatements(filepath, s string) ([]string, error) {
lex := &Lexer{
File: &token.File{
FilePath: filepath,
Buffer: s,
},
}

var result []string
var firstPos token.Pos
for {
if lex.Token.Kind == ";" {
result = append(result, s[firstPos:lex.Token.Pos])
if err := lex.NextToken(); err != nil {
return nil, err
}
firstPos = lex.Token.Pos
continue
}

err := lex.NextToken()
if err != nil {
return nil, err
}

if lex.Token.Kind == token.TokenEOF {
if lex.Token.Pos != firstPos {
result = append(result, s[firstPos:lex.Token.Pos])
}
break
}
}
if len(result) == 0 {
return []string{""}, nil
}
return result, nil
}
57 changes: 57 additions & 0 deletions split_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package memefish_test

import (
"github.com/cloudspannerecosystem/memefish"
"github.com/google/go-cmp/cmp"
"regexp"
"testing"
)

func TestSplitRawStatements(t *testing.T) {
for _, test := range []struct {
desc string
input string
errRe *regexp.Regexp
want []string
}{
// SplitRawStatements treats only lexical structures, so the test cases can be invalid statements.
{desc: "empty input", input: "", want: []string{""}},
{desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
{desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}},
{desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
{desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}},
{desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}},
{desc: "two statement with new lines", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
{desc: "single statement with line comment", input: `SELECT 1//
`, want: []string{"SELECT 1//\n"}},
{desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}},
{desc: "semicolon in multi-line comment", input: "SELECT 1 /*;\n*/ + 2", want: []string{"SELECT 1 /*;\n*/ + 2"}},
{desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}},
{desc: "semicolon in single-quoted string", input: `SELECT '1;2;3';`, want: []string{`SELECT '1;2;3'`}},
{desc: "semicolon in back-quote", input: "SELECT `1;2;3`;", want: []string{"SELECT `1;2;3`"}},
// $` may become a valid token in the future, but it's reasonable to check its current behavior.
{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
} {
t.Run(test.desc, func(t *testing.T) {
stmts, err := memefish.SplitRawStatements("", test.input)
if err != nil {
if test.errRe == nil {
t.Errorf("should success, but %v", err)
return
}
if !test.errRe.MatchString(err.Error()) {
t.Errorf("error message should match %q, but %q", test.errRe, err)
return
}
}
if err == nil && test.errRe != nil {
t.Errorf("success, but should fail %q", test.errRe)
return
}
if diff := cmp.Diff(stmts, test.want); diff != "" {
t.Errorf("differs: %v", diff)
return
}
})
}
}

0 comments on commit fe5ac17

Please sign in to comment.