Skip to content

Commit

Permalink
fix: use common lineHash to share indice between text1 and text2 (#5)
Browse files Browse the repository at this point in the history
Use common cache of line contents between two texts in `DiffLinesToChars` to get line diffs correctly.
In some cases, line diffs cannot be retrieved correctly in the standard way (https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode).
In the below case, we failed to get line diffs correctly before this fix.

```go:main.go
package main

import (
	"fmt"

	"github.com/sergi/go-diff/diffmatchpatch"
)

const (
	text1 = `hoge:
  step11:
  - arrayitem1
  - arrayitem2
  step12:
    step21: hoge
    step22: -93
fuga: flatitem
`
	text2 = `hoge:
  step11:
  - arrayitem4
  - arrayitem2
  - arrayitem3
  step12:
    step21: hoge
    step22: -92
fuga: flatitem
`
)

func main() {
	dmp := diffmatchpatch.New()
	a, b, c := dmp.DiffLinesToChars(text1, text2)
	diffs := dmp.DiffMain(a, b, false)
	diffs = dmp.DiffCharsToLines(diffs, c)
	// diffs = dmp.DiffCleanupSemantic(diffs)
	fmt.Println(diffs)
}
```

```text:output
[{Insert hoge:
  step11:
hoge:
} {Equal hoge:
} {Insert hoge:
} {Equal   step11:
} {Insert hoge:
} {Equal   - arrayitem1
} {Insert hoge:
} {Equal   - arrayitem2
} {Insert hoge:
} {Equal   step12:
} {Insert hoge:
} {Equal     step21: hoge
} {Insert hoge:
} {Equal     step22: -93
} {Delete fuga: flatitem
}]
```

Note: This fix corresponds to a javascript implementation.
(ref: https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L466)

Co-authored-by: nrnrk <[email protected]>
  • Loading branch information
gkampitakis and nrnrk committed Feb 18, 2023
1 parent dd597d9 commit 68bad34
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
12 changes: 8 additions & 4 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -1338,19 +1338,23 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
// So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'

lineHash := make(map[string]int)
// Each string has the index of lineArray which it points to
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)

return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}

// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(
text string,
lineArray *[]string,
lineHash map[string]int,
) []uint32 {
// Walk the text, pulling out a substring for each line. text.split('\n')
// would would temporarily double our memory footprint.
// Modifying text would create many large strings to garbage collect.
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
lineStart := 0
lineEnd := -1
strs := []uint32{}
Expand Down
2 changes: 2 additions & 0 deletions diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ func TestDiffLinesToChars(t *testing.T) {
{"a", "b", "1", "2", []string{"", "a", "b"}},
// Omit final newline.
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
// Same lines in Text1 and Text2
{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
} {
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
Expand Down

0 comments on commit 68bad34

Please sign in to comment.