From 0b50f6ac441aa9996f2ee29be0a1ee0ebf3806ac Mon Sep 17 00:00:00 2001 From: Ville Vesilehto Date: Wed, 30 Oct 2024 21:35:08 +0200 Subject: [PATCH] feat: add fuzz and bench tests (#2) --- Makefile | 10 ++- thespine.go | 42 +++++++++---- thespine_test.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index b527747..cc0049c 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,15 @@ lit: lint vet: go vet $$(go list ./...) -test: +test: test-unit test-fuzz test-bench + +test-unit: go test -v -race -run ^Test -parallel=8 ./... +test-bench: + go test -v -benchmem -bench ^Benchmark -parallel=8 ./... + +test-fuzz: + go test -v -race -run ^Fuzz -parallel=8 ./... + .PHONY: fmt lint test \ No newline at end of file diff --git a/thespine.go b/thespine.go index fa18d10..20a9f59 100644 --- a/thespine.go +++ b/thespine.go @@ -78,46 +78,64 @@ func Encode(s string) (string, error) { // EncodeText takes a UTF-8 string as an input, splits it by whitespace and runs an anagram for each word. // Error returned in case of an invalid UTF-8 string. func EncodeText(s string) (string, error) { - o := "" + if s == "" { + return "", nil + } + + var builder strings.Builder ws := strings.Split(s, " ") for i, w := range ws { + if w == "" { + continue // Skip empty strings or preserve them, depending on requirements + } + ew, err := Encode(w) if err != nil { return "", err } - o += ew + + builder.WriteString(ew) if i != len(ws)-1 { - o += " " + builder.WriteString(" ") } } - return o, nil + return builder.String(), nil } // DecodeText takes a UTF-8 string as an input, splits it by whitespace and decodes each anagram word-by-word. // Error returned in case of an invalid UTF-8 string. func DecodeText(s string) (string, error) { - o := "" + if s == "" { + return "", nil + } + + var builder strings.Builder ws := strings.Split(s, " ") for i, w := range ws { + if w == "" { + continue // Skip empty strings or preserve them, depending on requirements + } + ew, err := Decode(w) if err != nil { return "", err } - o += ew + + builder.WriteString(ew) if i < len(ws)-1 { - o += " " + builder.WriteString(" ") } } - return o, nil + return builder.String(), nil } func runestring(r [][]rune) string { - var s string - for _, r := range r { - s += string(r) + var builder strings.Builder + for _, runes := range r { + builder.WriteString(string(runes)) } - return s + return builder.String() } diff --git a/thespine_test.go b/thespine_test.go index fcbb49e..5a1884d 100644 --- a/thespine_test.go +++ b/thespine_test.go @@ -3,7 +3,9 @@ package thespine import ( "fmt" "log" + "strings" "testing" + "unicode/utf8" ) func ExampleDecode() { @@ -252,3 +254,155 @@ func Test_DecodeText(t *testing.T) { }) } } + +func FuzzEncodeDecodeComprehensive(f *testing.F) { + // Add seed corpus + seeds := []string{ + "", // Empty string + "a", // Single char + "ab", // Two chars + "abc", // Three chars (group size) + "abcd", // More than group size + "Hello, δΈ–η•Œ!", // Mixed ASCII and Unicode + "🌍🌎🌏", // Only emojis + " ", // Multiple spaces + "a\nb\tc", // Special whitespace + "a b c", // Multiple consecutive spaces + strings.Repeat("a", 1000), // Long string + "αš α›‡αš»α›«α›’α›¦αš¦", // Runes + "\u200B\u200C\u200D", // Zero-width characters + "a\u0300\u0301b\u0302c", // Combining diacritical marks + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, input string) { + // Skip invalid UTF-8 + if !utf8.ValidString(input) { + return + } + + // Test 1: Encode->Decode roundtrip + encoded, err := Encode(input) + if err != nil { + // Some inputs might legitimately fail to encode + return + } + decoded, err := Decode(encoded) + if err != nil { + t.Errorf("Failed to decode encoded string: %v", err) + + return + } + if decoded != input { + t.Errorf("Roundtrip failed: input=%q, got=%q", input, decoded) + } + + // Test 2: Check encoded string properties + inputRunes := []rune(input) + encodedRunes := []rune(encoded) + if len(inputRunes) != len(encodedRunes) { + t.Errorf("Length mismatch: input=%d, encoded=%d", len(inputRunes), len(encodedRunes)) + } + + // Test 3: Multiple encode/decode cycles + current := input + for i := range 3 { + encoded, err := Encode(current) + if err != nil { + t.Errorf("Failed at cycle %d: %v", i, err) + + return + } + decoded, err := Decode(encoded) + if err != nil { + t.Errorf("Failed at cycle %d: %v", i, err) + + return + } + if decoded != current { + t.Errorf("Cycle %d failed: expected=%q, got=%q", i, current, decoded) + } + current = decoded + } + }) +} + +func FuzzEncodeDecodeText(f *testing.F) { + seeds := []string{ + "", + "hello world", + " spaced words ", + "one two three four", + "Hello,\nWorld!", + "Tab\there", + "Mixed δΈ–η•Œ Unicode", + "🌍 Earth 🌎 Globe 🌏", + strings.Repeat("word ", 100), + } + + for _, seed := range seeds { + f.Add(seed) + } + + f.Fuzz(func(t *testing.T, input string) { + // Skip invalid UTF-8 + if !utf8.ValidString(input) { + return + } + + // Test 1: EncodeText->DecodeText roundtrip + encoded, err := EncodeText(input) + if err != nil { + return + } + decoded, err := DecodeText(encoded) + if err != nil { + t.Errorf("Failed to decode encoded text: %v", err) + + return + } + + // Normalize spaces for comparison since that's part of the spec + normalizeSpaces := func(s string) string { + return strings.Join(strings.Fields(s), " ") + } + + normalizedInput := normalizeSpaces(input) + normalizedDecoded := normalizeSpaces(decoded) + + if normalizedDecoded != normalizedInput { + t.Errorf("Roundtrip failed:\ninput=%q\ngot=%q", normalizedInput, normalizedDecoded) + } + + // Test 2: Check word boundaries are preserved + inputWords := strings.Fields(input) + encodedWords := strings.Fields(encoded) + if len(inputWords) != len(encodedWords) { + t.Errorf("Word count mismatch: input=%d, encoded=%d", len(inputWords), len(encodedWords)) + } + }) +} + +// Add benchmark tests. +func BenchmarkEncode(b *testing.B) { + inputs := []struct { + name string + str string + }{ + {"small", "hello"}, + {"medium", strings.Repeat("hello", 100)}, + {"large", strings.Repeat("hello", 1000)}, + {"unicode", "Hello, δΈ–η•Œ! 🌍"}, + } + + for _, input := range inputs { + b.Run(input.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = Encode(input.str) + } + }) + } +}