From 0c0eb5eb9eb73e04a153331dd439cca9e07d4234 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 18:35:35 +0900
Subject: [PATCH 01/10] Add statement separator

---
 separator.go      | 41 +++++++++++++++++++++++++++++++++++++++
 separator_test.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 separator.go
 create mode 100644 separator_test.go
diff --git a/separator.go b/separator.go
new file mode 100644
index 00000000..784413d5
--- /dev/null
+++ b/separator.go
@@ -0,0 +1,41 @@
+package memefish
+
+import "github.com/cloudspannerecosystem/memefish/token"
+
+func SeparateRawStatements(filepath, s string) ([]string, error) {
+	lex := &Lexer{
+		File: &token.File{
+			FilePath: filepath,
+			Buffer:   s,
+		},
+	}
+
+	var result []string
+	var firstPos token.Pos
+	for {
+		if lex.Token.Kind == ";" {
+			result = append(result, s[firstPos:lex.Token.Pos])
+			lex.nextToken()
+			firstPos = lex.Token.Pos
+			continue
+		}
+
+		err := lex.NextToken()
+		if err != nil {
+			return nil, err
+		}
+
+		if lex.Token.Kind == token.TokenEOF {
+			if lex.Token.Pos == firstPos {
+				break
+			}
+			result = append(result, s[firstPos:lex.Token.Pos])
+			firstPos = lex.Token.Pos
+			break
+		}
+	}
+	if len(result) == 0 {
+		return []string{""}, nil
+	}
+	return result, nil
+}
diff --git a/separator_test.go b/separator_test.go
new file mode 100644
index 00000000..71f253dc
--- /dev/null
+++ b/separator_test.go
@@ -0,0 +1,49 @@
+package memefish_test
+
+import (
+	"github.com/cloudspannerecosystem/memefish"
+	"github.com/google/go-cmp/cmp"
+	"regexp"
+	"testing"
+)
+
+func TestSeparateRawStatements(t *testing.T) {
+	for _, test := range []struct {
+		desc  string
+		input string
+		errRe *regexp.Regexp
+		want  []string
+	}{
+		{desc: "empty input", input: "", want: []string{""}},
+		{desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
+		{desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
+		{desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}},
+		{desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
+		{desc: "single statement with line comment", input: `SELECT 1//
+`, want: []string{"SELECT 1//\n"}},
+		{desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}},
+		{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
+	} {
+		t.Run(test.desc, func(t *testing.T) {
+			stmts, err := memefish.SeparateRawStatements("", test.input)
+			if err != nil {
+				if test.errRe == nil {
+					t.Errorf("should success, but %v", err)
+					return
+				}
+				if !test.errRe.MatchString(err.Error()) {
+					t.Errorf("error message should match %q, but %q", test.errRe, err)
+					return
+				}
+			}
+			if err == nil && test.errRe != nil {
+				t.Errorf("success, but should fail %q", test.errRe)
+				return
+			}
+			if diff := cmp.Diff(stmts, test.want); diff != "" {
+				t.Errorf("differs: %v", diff)
+				return
+			}
+		})
+	}
+}

From 7973a97199cf1a5fd91b5c20f71b9847be5f3080 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 23:00:17 +0900
Subject: [PATCH 02/10] Update separator*.go

---
 separator.go      | 12 +++++++++++-
 separator_test.go |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/separator.go b/separator.go
index 784413d5..90b425e8 100644
--- a/separator.go
+++ b/separator.go
@@ -2,6 +2,13 @@ package memefish
 
 import "github.com/cloudspannerecosystem/memefish/token"
 
+// SeparateRawStatements separates s to statements without parsing.
+// Statements are terminated by `;`, `<eof>` or `;<eof>` and the minimum output will be []string{""}.
+// See [terminating semicolons].
+// This function won't panic but return error if lexer become error state.
+// filepath can be used in error message.
+//
+// [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons
 func SeparateRawStatements(filepath, s string) ([]string, error) {
 	lex := &Lexer{
 		File: &token.File{
@@ -15,7 +22,10 @@ func SeparateRawStatements(filepath, s string) ([]string, error) {
 	for {
 		if lex.Token.Kind == ";" {
 			result = append(result, s[firstPos:lex.Token.Pos])
-			lex.nextToken()
+			err := lex.NextToken()
+			if err != nil {
+				return nil, err
+			}
 			firstPos = lex.Token.Pos
 			continue
 		}
diff --git a/separator_test.go b/separator_test.go
index 71f253dc..c470cfe7 100644
--- a/separator_test.go
+++ b/separator_test.go
@@ -22,6 +22,7 @@ func TestSeparateRawStatements(t *testing.T) {
 		{desc: "single statement with line comment", input: `SELECT 1//
 `, want: []string{"SELECT 1//\n"}},
 		{desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}},
+		// $` may become a valid token in the future, but it's reasonable to check its current behavior.
 		{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
 	} {
 		t.Run(test.desc, func(t *testing.T) {

From 9b1c1ad8a9b94f552a9958868cdd813888ecb481 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 23:22:31 +0900
Subject: [PATCH 03/10] Add test cases

---
 separator_test.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/separator_test.go b/separator_test.go
index c470cfe7..f868a055 100644
--- a/separator_test.go
+++ b/separator_test.go
@@ -14,6 +14,7 @@ func TestSeparateRawStatements(t *testing.T) {
 		errRe *regexp.Regexp
 		want  []string
 	}{
+		// SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements.
 		{desc: "empty input", input: "", want: []string{""}},
 		{desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
 		{desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
@@ -21,7 +22,11 @@ func TestSeparateRawStatements(t *testing.T) {
 		{desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
 		{desc: "single statement with line comment", input: `SELECT 1//
 `, want: []string{"SELECT 1//\n"}},
+		{desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}},
+		{desc: "semicolon in multi-line comment", input: "SELECT 1 /*;\n*/ + 2", want: []string{"SELECT 1 /*;\n*/ + 2"}},
 		{desc: "semicolon in double-quoted string", input: `SELECT "1;2;3";`, want: []string{`SELECT "1;2;3"`}},
+		{desc: "semicolon in single-quoted string", input: `SELECT '1;2;3';`, want: []string{`SELECT '1;2;3'`}},
+		{desc: "semicolon in back-quote", input: "SELECT `1;2;3`;", want: []string{"SELECT `1;2;3`"}},
 		// $` may become a valid token in the future, but it's reasonable to check its current behavior.
 		{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
 	} {

From a4b895d89598d44067944340b027a0c34aeb6065 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 23:23:47 +0900
Subject: [PATCH 04/10] Fix ineffassign lint error

---
 separator.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/separator.go b/separator.go
index 90b425e8..a1e3545d 100644
--- a/separator.go
+++ b/separator.go
@@ -40,7 +40,6 @@ func SeparateRawStatements(filepath, s string) ([]string, error) {
 				break
 			}
 			result = append(result, s[firstPos:lex.Token.Pos])
-			firstPos = lex.Token.Pos
 			break
 		}
 	}

From 91f2e58577c74db21ff4eb33861185362f94ddb9 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 23:27:53 +0900
Subject: [PATCH 05/10] Add test cases for separator_test.go

---
 separator_test.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/separator_test.go b/separator_test.go
index f868a055..2aa63f2c 100644
--- a/separator_test.go
+++ b/separator_test.go
@@ -16,8 +16,10 @@ func TestSeparateRawStatements(t *testing.T) {
 	}{
 		// SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements.
 		{desc: "empty input", input: "", want: []string{""}},
-		{desc: "single statement ", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
-		{desc: "two statement", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
+		{desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
+		{desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}},
+		{desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
+		{desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}},
 		{desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}},
 		{desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
 		{desc: "single statement with line comment", input: `SELECT 1//

From 838545d184c5fd19a001184104b65ac250d54293 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Fri, 20 Sep 2024 23:28:44 +0900
Subject: [PATCH 06/10] Update test desc

---
 separator_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/separator_test.go b/separator_test.go
index 2aa63f2c..3d8252dc 100644
--- a/separator_test.go
+++ b/separator_test.go
@@ -21,7 +21,7 @@ func TestSeparateRawStatements(t *testing.T) {
 		{desc: "two statement ends with semicolon", input: `SELECT "123"; SELECT "456";`, want: []string{`SELECT "123"`, `SELECT "456"`}},
 		{desc: "two statement ends with EOF", input: `SELECT "123"; SELECT "456"`, want: []string{`SELECT "123"`, `SELECT "456"`}},
 		{desc: "second statement is empty", input: `SELECT 1; ;`, want: []string{`SELECT 1`, ``}},
-		{desc: "two statement", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
+		{desc: "two statement with new lines", input: "SELECT 1;\n SELECT 2;\n", want: []string{"SELECT 1", "SELECT 2"}},
 		{desc: "single statement with line comment", input: `SELECT 1//
 `, want: []string{"SELECT 1//\n"}},
 		{desc: "semicolon in line comment", input: "SELECT 1 //;\n + 2", want: []string{"SELECT 1 //;\n + 2"}},

From 0e6b0029e29eea8eabf6e940ccfe632b1cec0f47 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Sat, 21 Sep 2024 20:00:09 +0900
Subject: [PATCH 07/10] Rename to splitter and some change

---
 separator.go => splitter.go           | 10 +++++-----
 separator_test.go => splitter_test.go |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)
 rename separator.go => splitter.go (76%)
 rename separator_test.go => splitter_test.go (93%)

diff --git a/separator.go b/splitter.go
similarity index 76%
rename from separator.go
rename to splitter.go
index a1e3545d..929a28d4 100644
--- a/separator.go
+++ b/splitter.go
@@ -2,18 +2,18 @@ package memefish
 
 import "github.com/cloudspannerecosystem/memefish/token"
 
-// SeparateRawStatements separates s to statements without parsing.
+// SplitRawStatements splits input to statement strings at terminating semicolons without parsing.
 // Statements are terminated by `;`, `<eof>` or `;<eof>` and the minimum output will be []string{""}.
 // See [terminating semicolons].
 // This function won't panic but return error if lexer become error state.
 // filepath can be used in error message.
 //
 // [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons
-func SeparateRawStatements(filepath, s string) ([]string, error) {
+func SplitRawStatements(filepath, input string) ([]string, error) {
 	lex := &Lexer{
 		File: &token.File{
 			FilePath: filepath,
-			Buffer:   s,
+			Buffer:   input,
 		},
 	}
 
@@ -21,7 +21,7 @@ func SeparateRawStatements(filepath, s string) ([]string, error) {
 	var firstPos token.Pos
 	for {
 		if lex.Token.Kind == ";" {
-			result = append(result, s[firstPos:lex.Token.Pos])
+			result = append(result, input[firstPos:lex.Token.Pos])
 			err := lex.NextToken()
 			if err != nil {
 				return nil, err
@@ -39,7 +39,7 @@ func SeparateRawStatements(filepath, s string) ([]string, error) {
 			if lex.Token.Pos == firstPos {
 				break
 			}
-			result = append(result, s[firstPos:lex.Token.Pos])
+			result = append(result, input[firstPos:lex.Token.Pos])
 			break
 		}
 	}
diff --git a/separator_test.go b/splitter_test.go
similarity index 93%
rename from separator_test.go
rename to splitter_test.go
index 3d8252dc..31f31bfa 100644
--- a/separator_test.go
+++ b/splitter_test.go
@@ -14,7 +14,7 @@ func TestSeparateRawStatements(t *testing.T) {
 		errRe *regexp.Regexp
 		want  []string
 	}{
-		// SeparateRawStatements treats only lexical structures, so the test cases can be invalid statements.
+		// SplitRawStatements treats only lexical structures, so the test cases can be invalid statements.
 		{desc: "empty input", input: "", want: []string{""}},
 		{desc: "single statement ends with semicolon", input: `SELECT "123";`, want: []string{`SELECT "123"`}},
 		{desc: "single statement ends with EOF", input: `SELECT "123"`, want: []string{`SELECT "123"`}},
@@ -33,7 +33,7 @@ func TestSeparateRawStatements(t *testing.T) {
 		{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
 	} {
 		t.Run(test.desc, func(t *testing.T) {
-			stmts, err := memefish.SeparateRawStatements("", test.input)
+			stmts, err := memefish.SplitRawStatements("", test.input)
 			if err != nil {
 				if test.errRe == nil {
 					t.Errorf("should success, but %v", err)

From b75c307e06e4440477b5cca5c8d796e39d4e5782 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Sat, 21 Sep 2024 20:13:50 +0900
Subject: [PATCH 08/10] Simplify control flow

---
 splitter.go | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/splitter.go b/splitter.go
index 929a28d4..4d15e5a9 100644
--- a/splitter.go
+++ b/splitter.go
@@ -22,8 +22,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) {
 	for {
 		if lex.Token.Kind == ";" {
 			result = append(result, input[firstPos:lex.Token.Pos])
-			err := lex.NextToken()
-			if err != nil {
+			if err := lex.NextToken(); err != nil {
 				return nil, err
 			}
 			firstPos = lex.Token.Pos
@@ -36,10 +35,9 @@ func SplitRawStatements(filepath, input string) ([]string, error) {
 		}
 
 		if lex.Token.Kind == token.TokenEOF {
-			if lex.Token.Pos == firstPos {
-				break
+			if lex.Token.Pos != firstPos {
+				result = append(result, input[firstPos:lex.Token.Pos])
 			}
-			result = append(result, input[firstPos:lex.Token.Pos])
 			break
 		}
 	}

From 387eeb57b09721d8264b9e7de411707f5a2dc9c6 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Sat, 21 Sep 2024 21:46:34 +0900
Subject: [PATCH 09/10] Apply review comments

---
 splitter.go => split.go           | 10 +++++-----
 splitter_test.go => split_test.go |  0
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename splitter.go => split.go (75%)
 rename splitter_test.go => split_test.go (100%)

diff --git a/splitter.go b/split.go
similarity index 75%
rename from splitter.go
rename to split.go
index 4d15e5a9..9f0193ce 100644
--- a/splitter.go
+++ b/split.go
@@ -2,18 +2,18 @@ package memefish
 
 import "github.com/cloudspannerecosystem/memefish/token"
 
-// SplitRawStatements splits input to statement strings at terminating semicolons without parsing.
+// SplitRawStatements splits an input string to statement strings at terminating semicolons without parsing.
 // Statements are terminated by `;`, `<eof>` or `;<eof>` and the minimum output will be []string{""}.
 // See [terminating semicolons].
 // This function won't panic but return error if lexer become error state.
 // filepath can be used in error message.
 //
 // [terminating semicolons]: https://cloud.google.com/spanner/docs/reference/standard-sql/lexical#terminating_semicolons
-func SplitRawStatements(filepath, input string) ([]string, error) {
+func SplitRawStatements(filepath, s string) ([]string, error) {
 	lex := &Lexer{
 		File: &token.File{
 			FilePath: filepath,
-			Buffer:   input,
+			Buffer:   s,
 		},
 	}
 
@@ -21,7 +21,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) {
 	var firstPos token.Pos
 	for {
 		if lex.Token.Kind == ";" {
-			result = append(result, input[firstPos:lex.Token.Pos])
+			result = append(result, s[firstPos:lex.Token.Pos])
 			if err := lex.NextToken(); err != nil {
 				return nil, err
 			}
@@ -36,7 +36,7 @@ func SplitRawStatements(filepath, input string) ([]string, error) {
 
 		if lex.Token.Kind == token.TokenEOF {
 			if lex.Token.Pos != firstPos {
-				result = append(result, input[firstPos:lex.Token.Pos])
+				result = append(result, s[firstPos:lex.Token.Pos])
 			}
 			break
 		}
diff --git a/splitter_test.go b/split_test.go
similarity index 100%
rename from splitter_test.go
rename to split_test.go

From 070c041a5685ec4e34f8baa2d81e1f01801a0aa1 Mon Sep 17 00:00:00 2001
From: apstndb <803393+apstndb@users.noreply.github.com>
Date: Sat, 21 Sep 2024 21:47:27 +0900
Subject: [PATCH 10/10] Fix test function name

---
 split_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/split_test.go b/split_test.go
index 31f31bfa..d9b8a5b6 100644
--- a/split_test.go
+++ b/split_test.go
@@ -7,7 +7,7 @@ import (
 	"testing"
 )
 
-func TestSeparateRawStatements(t *testing.T) {
+func TestSplitRawStatements(t *testing.T) {
 	for _, test := range []struct {
 		desc  string
 		input string