From 0b50f6ac441aa9996f2ee29be0a1ee0ebf3806ac Mon Sep 17 00:00:00 2001
From: Ville Vesilehto <ville@vesilehto.fi>
Date: Wed, 30 Oct 2024 21:35:08 +0200
Subject: [PATCH] feat: add fuzz and bench tests (#2)

---
 Makefile         |  10 ++-
 thespine.go      |  42 +++++++++----
 thespine_test.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+), 13 deletions(-)

diff --git a/Makefile b/Makefile
index b527747..cc0049c 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,15 @@ lit: lint
 vet:
 	go vet $$(go list ./...)
 
-test:
+test: test-unit test-fuzz test-bench
+
+test-unit:
 	go test -v -race -run ^Test -parallel=8 ./...
 
+test-bench:
+	go test -v -benchmem -bench ^Benchmark -parallel=8 ./...
+
+test-fuzz:
+	go test -v -race -run ^Fuzz -parallel=8 ./...
+
 .PHONY: fmt lint test
\ No newline at end of file
diff --git a/thespine.go b/thespine.go
index fa18d10..20a9f59 100644
--- a/thespine.go
+++ b/thespine.go
@@ -78,46 +78,64 @@ func Encode(s string) (string, error) {
 // EncodeText takes a UTF-8 string as an input, splits it by whitespace and runs an anagram for each word.
 // Error returned in case of an invalid UTF-8 string.
 func EncodeText(s string) (string, error) {
-	o := ""
+	if s == "" {
+		return "", nil
+	}
+
+	var builder strings.Builder
 	ws := strings.Split(s, " ")
 	for i, w := range ws {
+		if w == "" {
+			continue // Skip empty strings or preserve them, depending on requirements
+		}
+
 		ew, err := Encode(w)
 		if err != nil {
 			return "", err
 		}
-		o += ew
+
+		builder.WriteString(ew)
 		if i != len(ws)-1 {
-			o += " "
+			builder.WriteString(" ")
 		}
 	}
 
-	return o, nil
+	return builder.String(), nil
 }
 
 // DecodeText takes a UTF-8 string as an input, splits it by whitespace and decodes each anagram word-by-word.
 // Error returned in case of an invalid UTF-8 string.
 func DecodeText(s string) (string, error) {
-	o := ""
+	if s == "" {
+		return "", nil
+	}
+
+	var builder strings.Builder
 	ws := strings.Split(s, " ")
 	for i, w := range ws {
+		if w == "" {
+			continue // Skip empty strings or preserve them, depending on requirements
+		}
+
 		ew, err := Decode(w)
 		if err != nil {
 			return "", err
 		}
-		o += ew
+
+		builder.WriteString(ew)
 		if i < len(ws)-1 {
-			o += " "
+			builder.WriteString(" ")
 		}
 	}
 
-	return o, nil
+	return builder.String(), nil
 }
 
 func runestring(r [][]rune) string {
-	var s string
-	for _, r := range r {
-		s += string(r)
+	var builder strings.Builder
+	for _, runes := range r {
+		builder.WriteString(string(runes))
 	}
 
-	return s
+	return builder.String()
 }
diff --git a/thespine_test.go b/thespine_test.go
index fcbb49e..5a1884d 100644
--- a/thespine_test.go
+++ b/thespine_test.go
@@ -3,7 +3,9 @@ package thespine
 import (
 	"fmt"
 	"log"
+	"strings"
 	"testing"
+	"unicode/utf8"
 )
 
 func ExampleDecode() {
@@ -252,3 +254,155 @@ func Test_DecodeText(t *testing.T) {
 		})
 	}
 }
+
+func FuzzEncodeDecodeComprehensive(f *testing.F) {
+	// Add seed corpus
+	seeds := []string{
+		"",                        // Empty string
+		"a",                       // Single char
+		"ab",                      // Two chars
+		"abc",                     // Three chars (group size)
+		"abcd",                    // More than group size
+		"Hello, 世界!",              // Mixed ASCII and Unicode
+		"🌍🌎🌏",                     // Only emojis
+		"     ",                   // Multiple spaces
+		"a\nb\tc",                 // Special whitespace
+		"a  b    c",               // Multiple consecutive spaces
+		strings.Repeat("a", 1000), // Long string
+		"ᚠᛇᚻ᛫ᛒᛦᚦ",                 // Runes
+		"\u200B\u200C\u200D",      // Zero-width characters
+		"a\u0300\u0301b\u0302c",   // Combining diacritical marks
+	}
+
+	for _, seed := range seeds {
+		f.Add(seed)
+	}
+
+	f.Fuzz(func(t *testing.T, input string) {
+		// Skip invalid UTF-8
+		if !utf8.ValidString(input) {
+			return
+		}
+
+		// Test 1: Encode->Decode roundtrip
+		encoded, err := Encode(input)
+		if err != nil {
+			// Some inputs might legitimately fail to encode
+			return
+		}
+		decoded, err := Decode(encoded)
+		if err != nil {
+			t.Errorf("Failed to decode encoded string: %v", err)
+
+			return
+		}
+		if decoded != input {
+			t.Errorf("Roundtrip failed: input=%q, got=%q", input, decoded)
+		}
+
+		// Test 2: Check encoded string properties
+		inputRunes := []rune(input)
+		encodedRunes := []rune(encoded)
+		if len(inputRunes) != len(encodedRunes) {
+			t.Errorf("Length mismatch: input=%d, encoded=%d", len(inputRunes), len(encodedRunes))
+		}
+
+		// Test 3: Multiple encode/decode cycles
+		current := input
+		for i := range 3 {
+			encoded, err := Encode(current)
+			if err != nil {
+				t.Errorf("Failed at cycle %d: %v", i, err)
+
+				return
+			}
+			decoded, err := Decode(encoded)
+			if err != nil {
+				t.Errorf("Failed at cycle %d: %v", i, err)
+
+				return
+			}
+			if decoded != current {
+				t.Errorf("Cycle %d failed: expected=%q, got=%q", i, current, decoded)
+			}
+			current = decoded
+		}
+	})
+}
+
+func FuzzEncodeDecodeText(f *testing.F) {
+	seeds := []string{
+		"",
+		"hello world",
+		"  spaced  words  ",
+		"one two three four",
+		"Hello,\nWorld!",
+		"Tab\there",
+		"Mixed 世界 Unicode",
+		"🌍 Earth 🌎 Globe 🌏",
+		strings.Repeat("word ", 100),
+	}
+
+	for _, seed := range seeds {
+		f.Add(seed)
+	}
+
+	f.Fuzz(func(t *testing.T, input string) {
+		// Skip invalid UTF-8
+		if !utf8.ValidString(input) {
+			return
+		}
+
+		// Test 1: EncodeText->DecodeText roundtrip
+		encoded, err := EncodeText(input)
+		if err != nil {
+			return
+		}
+		decoded, err := DecodeText(encoded)
+		if err != nil {
+			t.Errorf("Failed to decode encoded text: %v", err)
+
+			return
+		}
+
+		// Normalize spaces for comparison since that's part of the spec
+		normalizeSpaces := func(s string) string {
+			return strings.Join(strings.Fields(s), " ")
+		}
+
+		normalizedInput := normalizeSpaces(input)
+		normalizedDecoded := normalizeSpaces(decoded)
+
+		if normalizedDecoded != normalizedInput {
+			t.Errorf("Roundtrip failed:\ninput=%q\ngot=%q", normalizedInput, normalizedDecoded)
+		}
+
+		// Test 2: Check word boundaries are preserved
+		inputWords := strings.Fields(input)
+		encodedWords := strings.Fields(encoded)
+		if len(inputWords) != len(encodedWords) {
+			t.Errorf("Word count mismatch: input=%d, encoded=%d", len(inputWords), len(encodedWords))
+		}
+	})
+}
+
+// Add benchmark tests.
+func BenchmarkEncode(b *testing.B) {
+	inputs := []struct {
+		name string
+		str  string
+	}{
+		{"small", "hello"},
+		{"medium", strings.Repeat("hello", 100)},
+		{"large", strings.Repeat("hello", 1000)},
+		{"unicode", "Hello, 世界! 🌍"},
+	}
+
+	for _, input := range inputs {
+		b.Run(input.name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				_, _ = Encode(input.str)
+			}
+		})
+	}
+}