Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly handle lengths of serialized strings #39

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ test:
go test -v ./...
go test -bench .

bench:
go test -bench .

clean:
rm -rf ${BUILDDIR}

Expand Down
22 changes: 22 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,25 @@ func TestMultipleReplaceWithoutNewlineAtEOF(t *testing.T) {
expected := "Space, the final frontier!\nCheck out: warp://ncc-1701-d.space/decks/10/areas/forward"
doMainTest(t, input, expected, mainArgs)
}

func TestSerializedReplaceWithCss(t *testing.T) {
mainArgs := []string{
"https://uss-enterprise.com",
"https://ncc-1701-d.space",
}

input := `a:2:{s:3:\"key\";s:5:\"value\";s:3:\"css\";s:216:\"body { color: #123456;\r\nborder-bottom: none; }\r\ndiv.bg { background: url('https://uss-enterprise.com/wp-content/uploads/main-bg.gif');\r\n background-position: left center;\r\n background-repeat: no-repeat; }\";}`
expected := `a:2:{s:3:\"key\";s:5:\"value\";s:3:\"css\";s:214:\"body { color: #123456;\r\nborder-bottom: none; }\r\ndiv.bg { background: url('https://ncc-1701-d.space/wp-content/uploads/main-bg.gif');\r\n background-position: left center;\r\n background-repeat: no-repeat; }\";}`
doMainTest(t, input, expected, mainArgs)
}

func TestSerializedReplaceWithCssAndUnrelatedSerializationMarker(t *testing.T) {
mainArgs := []string{
"https://uss-enterprise.com",
"https://ncc-1701-d.space",
}

input := `a:2:{s:3:\"key\";s:5:\"value\";s:3:\"css\";s:249:\"body { color: #123456;\r\nborder-bottom: none; }\r\nbody:after{ content: \"▼\"; }\r\ndiv.bg { background: url('https://uss-enterprise.com/wp-content/uploads/main-bg.gif');\r\n background-position: left center;\r\n background-repeat: no-repeat; }\";}`
expected := `a:2:{s:3:\"key\";s:5:\"value\";s:3:\"css\";s:247:\"body { color: #123456;\r\nborder-bottom: none; }\r\nbody:after{ content: \"▼\"; }\r\ndiv.bg { background: url('https://ncc-1701-d.space/wp-content/uploads/main-bg.gif');\r\n background-position: left center;\r\n background-repeat: no-repeat; }\";}`
doMainTest(t, input, expected, mainArgs)
}
95 changes: 94 additions & 1 deletion search-replace.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func main() {

go func(line *[]byte) {
defer wg.Done()
line = replaceAndFix(line, replacements)
line = fixLine(line, replacements)
ch <- *line
}(&line)
}
Expand All @@ -129,6 +129,99 @@ func main() {
}
}

var debugMode = false

func Debugf(format string, args ...interface{}) {
return
if debugMode {
fmt.Printf(format, args...)
}
}

func fixLine(line *[]byte, replacements []*Replacement) *[]byte {
if bytes.Contains(*line, []byte("s:")) {
line = fixSerializedContent(line, replacements)
}

Debugf("Doing global replacements: %s\n", string(*line))
// Catch anything left
for _, replacement := range replacements {
*line = bytes.ReplaceAll(*line, replacement.From, replacement.To)
Debugf("After global replacement (from: %s | to: %s): %s\n", replacement.From, replacement.To, string(*line))
}

Debugf("All done: %s\n", string(*line))

return line
}

var serializedStringPrefixRegexp = regexp.MustCompile(`s:(\d+):\\"`)

func fixSerializedContent(line *[]byte, replacements []*Replacement) *[]byte {
startIndex := 0
for startIndex < len(*line) {
Debugf("Start of loop, startIndex: %d\n", startIndex)
match := serializedStringPrefixRegexp.FindSubmatchIndex((*line)[startIndex:])
if match == nil {
break
}

length, err := strconv.Atoi(string((*line)[startIndex+match[2] : startIndex+match[3]]))
if err != nil {
startIndex++
continue
}
Debugf("Match found, length: %d\n", length)

contentStart := startIndex + match[1]
contentEnd := contentStart + length

// TODO: check if the next three letters are \"; to catch broken serialized content. If not, skip this section.

Debugf("Content boundaries, start: %d, end: %d\n", contentStart, contentEnd)

serializedContent := (*line)[contentStart:contentEnd]
Debugf("Content before: %s\n", serializedContent)
updatedContent := replaceInSerializedBytes(serializedContent, replacements)
Debugf("Content after: %s\n\n", updatedContent)

// no change, move to the next one
if bytes.Equal(serializedContent, updatedContent) {
startIndex = contentEnd + len(`\";`)
Debugf("No replacements made; skipping to %d: %s\n", startIndex, updatedContent)
// TODO: fix
continue
}

// Calculate the new length and update the serialized length prefix
newLength := len(updatedContent)
newLengthStr := []byte(strconv.Itoa(newLength))
Debugf("Replaced content new length: %d\n", newLength)
*line = append((*line)[:startIndex+match[2]], append(newLengthStr, (*line)[startIndex+match[3]:]...)...)
Debugf("After updating length prefix: %s\n", string(*line))

// Update the serialized content inline

//contentEnd = contentStart + newLength // adjust the end index based on the new length -- THIS BREAKS THINGS

*line = append((*line)[:contentStart], append(updatedContent, (*line)[contentEnd:]...)...)
Debugf("After updating content: %s\n", string(*line))

// Adjust startIndex for the next iteration
startIndex += match[1] + newLength + len(newLengthStr) - len((*line)[startIndex+match[2]:startIndex+match[3]])
Debugf("New startIndex: %d\n", startIndex)
}

return line
}

func replaceInSerializedBytes(serialized []byte, replacements []*Replacement) []byte {
for _, replacement := range replacements {
serialized = bytes.ReplaceAll(serialized, replacement.From, replacement.To)
}
return serialized
}

func replaceAndFix(line *[]byte, replacements []*Replacement) *[]byte {
for _, replacement := range replacements {
if !bytes.Contains(*line, replacement.From) {
Expand Down
82 changes: 78 additions & 4 deletions search-replace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,35 @@ func BenchmarkFix(b *testing.B) {
}
}

func BenchmarkSimpleReplace(b *testing.B) {
func BenchmarkNoReplaceOld(b *testing.B) {
line := []byte("http://automattic.com")
from := []byte("bananas")
to := []byte("apples")
for i := 0; i < b.N; i++ {
replaceAndFix(&line, []*Replacement{
{
From: from,
To: to,
},
})
}
}

func BenchmarkNoReplaceNew(b *testing.B) {
line := []byte("http://automattic.com")
from := []byte("bananas")
to := []byte("apples")
for i := 0; i < b.N; i++ {
fixLine(&line, []*Replacement{
{
From: from,
To: to,
},
})
}
}

func BenchmarkSimpleReplaceOld(b *testing.B) {
line := []byte("http://automattic.com")
from := []byte("http:")
to := []byte("https:")
Expand All @@ -26,7 +54,21 @@ func BenchmarkSimpleReplace(b *testing.B) {
}
}

func BenchmarkSerializedReplace(b *testing.B) {
func BenchmarkSimpleReplaceNew(b *testing.B) {
line := []byte("http://automattic.com")
from := []byte("http:")
to := []byte("https:")
for i := 0; i < b.N; i++ {
fixLine(&line, []*Replacement{
{
From: from,
To: to,
},
})
}
}

func BenchmarkSerializedReplaceOld(b *testing.B) {
line := []byte(`s:0:\"http://automattic.com\";`)
from := []byte("http://automattic.com")
to := []byte("https://automattic.com")
Expand All @@ -40,6 +82,20 @@ func BenchmarkSerializedReplace(b *testing.B) {
}
}

func BenchmarkSerializedReplaceNew(b *testing.B) {
line := []byte(`s:0:\"http://automattic.com\";`)
from := []byte("http://automattic.com")
to := []byte("https://automattic.com")
for i := 0; i < b.N; i++ {
fixLine(&line, []*Replacement{
{
From: from,
To: to,
},
})
}
}

func TestReplace(t *testing.T) {
var tests = []struct {
testName string
Expand Down Expand Up @@ -81,7 +137,7 @@ func TestReplace(t *testing.T) {
from: []byte("http://🖖.com"),
to: []byte("https://spock.com"),

in: []byte(`s:12:\"http://🖖.com\";`),
in: []byte(`s:15:\"http://🖖.com\";`),
out: []byte(`s:17:\"https://spock.com\";`),
},
{
Expand All @@ -93,11 +149,29 @@ func TestReplace(t *testing.T) {
in: []byte(`s:17:\"https://spock.com\";`),
out: []byte(`s:15:\"http://🖖.com\";`),
},
{
testName: "search and replace with different lengths",

from: []byte("hello"),
to: []byte("goodbye"),

in: []byte(`s:11:\"hello-world\";`),
out: []byte(`s:13:\"goodbye-world\";`),
},
{
testName: "search and replace with different lengths",

from: []byte("bbbbbbbbbb"),
to: []byte("ccccccccccccccc"),

in: []byte(`s:20:\"aaaaabbbbbbbbbbaaaaa\";`),
out: []byte(`s:25:\"aaaaacccccccccccccccaaaaa\";`),
},
}

for _, test := range tests {
t.Run(test.testName, func(t *testing.T) {
replaced := replaceAndFix(&test.in, []*Replacement{
replaced := fixLine(&test.in, []*Replacement{
{
From: test.from,
To: test.to,
Expand Down