Skip to content

Commit

Permalink
fix: revert regressions
Browse files Browse the repository at this point in the history
  • Loading branch information
gkampitakis committed Sep 4, 2022
1 parent 46eaa6f commit d7cda67
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 164,647 deletions.
115 changes: 56 additions & 59 deletions diffmatchpatch/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ const (
DiffInsert Operation = 1
// DiffEqual item represents an equal diff.
DiffEqual Operation = 0
// IndexSeparator is used to seperate the array indexes in an index string
IndexSeparator = ","
)

// Diff represents one diff operation
Expand Down Expand Up @@ -205,7 +203,7 @@ func (dmp *DiffMatchPatch) diffCompute(
// then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
// Scan the text on a line-by-line basis first.
text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)

diffs := dmp.diffMainRunes(text1, text2, false, deadline)

Expand Down Expand Up @@ -406,28 +404,73 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
// a string of hashes where each Unicode character represents one line.
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return chars1, chars2, lineArray
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
return string(chars1), string(chars2), lineArray
}

// DiffLinesToRunes splits two texts into a list of runes.
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
return []rune(chars1), []rune(chars2), lineArray
// '\x00' is a valid character, but various debuggers don't like it.
// So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4

chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)

return chars1, chars2, lineArray
}

func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
return dmp.DiffLinesToRunes(string(text1), string(text2))
}

// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune
// where each Unicode character represents one line.
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(
text string,
lineArray *[]string,
lineHash map[string]int,
) []rune {
// Walk the text, pulling out a substring for each line. text.split('\n')
// would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
lineStart := 0
lineEnd := -1
runes := []rune{}

for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)

if lineEnd == -1 {
lineEnd = len(text) - 1
}

line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]

if ok {
runes = append(runes, rune(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
runes = append(runes, rune(len(*lineArray)-1))
}
}

return runes
}

// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
hydrated := make([]Diff, 0, len(diffs))
for _, aDiff := range diffs {
chars := strings.Split(aDiff.Text, IndexSeparator)
chars := aDiff.Text
text := make([]string, len(chars))

for i, r := range chars {
i1, err := strconv.Atoi(r)
if err == nil {
text[i] = lineArray[i1]
}
text[i] = lineArray[r]
}

aDiff.Text = strings.Join(text, "")
Expand Down Expand Up @@ -1331,49 +1374,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err

return diffs, nil
}

// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
// '\x00' is a valid character, but various debuggers don't like it.
// So we'll insert a junk entry to avoid generating a null character.
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'

// Each string has the index of lineArray which it points to
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)

return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
}

// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
// Walk the text, pulling out a substring for each line. text.split('\n')
// would would temporarily double our memory footprint.
// Modifying text would create many large strings to garbage collect.
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
lineStart := 0
lineEnd := -1
strs := []uint32{}

for lineEnd < len(text)-1 {
lineEnd = indexOf(text, "\n", lineStart)

if lineEnd == -1 {
lineEnd = len(text) - 1
}

line := text[lineStart : lineEnd+1]
lineStart = lineEnd + 1
lineValue, ok := lineHash[line]

if ok {
strs = append(strs, uint32(lineValue))
} else {
*lineArray = append(*lineArray, line)
lineHash[line] = len(*lineArray) - 1
strs = append(strs, uint32(len(*lineArray)-1))
}
}

return strs
}
59 changes: 12 additions & 47 deletions diffmatchpatch/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ package diffmatchpatch

import (
"fmt"
"io"
"os"
"reflect"
"strconv"
"strings"
Expand Down Expand Up @@ -302,10 +300,10 @@ func TestDiffLinesToChars(t *testing.T) {
dmp := New()

for i, tc := range []TestCase{
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
{"a", "b", "1", "2", []string{"", "a", "b"}},
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
// Omit final newline.
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
} {
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
Expand All @@ -318,14 +316,14 @@ func TestDiffLinesToChars(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
var charList []string
var charList []rune
for x := 1; x < n+1; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
charList = append(charList, strconv.Itoa(x))
charList = append(charList, rune(x))
}
lines := strings.Join(lineList, "")
chars := strings.Join(charList[:], ",")
assertEqual(t, n, len(strings.Split(chars, ",")))
chars := string(charList)
assertEqual(t, n, utf8.RuneCountInString(chars))

actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
assertEqual(t, chars, actualChars1)
Expand All @@ -345,8 +343,8 @@ func TestDiffCharsToLines(t *testing.T) {
for i, tc := range []TestCase{
{
Diffs: []Diff{
{DiffEqual, "1,2,1"},
{DiffInsert, "2,1,2"},
{DiffEqual, "\u0001\u0002\u0001"},
{DiffInsert, "\u0002\u0001\u0002"},
},
Lines: []string{"", "alpha\n", "beta\n"},

Expand All @@ -365,15 +363,14 @@ func TestDiffCharsToLines(t *testing.T) {
lineList := []string{
"", // Account for the initial empty element of the lines array.
}
charList := []string{}
charList := []rune{}
for x := 1; x <= n; x++ {
lineList = append(lineList, strconv.Itoa(x)+"\n")
charList = append(charList, strconv.Itoa(x))
charList = append(charList, rune(x))
}
assertEqual(t, n, len(charList))
chars := strings.Join(charList[:], ",")

actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, chars}}, lineList)
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, string(charList)}}, lineList)
assertEqual(t, []Diff{{DiffDelete, strings.Join(lineList, "")}}, actual)
}

Expand Down Expand Up @@ -1507,19 +1504,6 @@ func TestDiffMainWithCheckLines(t *testing.T) {
}
}

func TestMassiveRuneDiffConversion(t *testing.T) {
sNew, err := os.ReadFile("../testdata/fixture.go")
if err != nil {
panic(err)
}

dmp := New()
t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
diffs := dmp.DiffMain(t1, t2, false)
diffs = dmp.DiffCharsToLines(diffs, tt)
assertEqual(t, true, len(diffs) > 0)
}

func BenchmarkDiffMain(bench *testing.B) {
var r []Diff

Expand Down Expand Up @@ -1579,22 +1563,3 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {

SinkSliceDiff = r
}

func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
var r []Diff

fp, _ := os.Open("../testdata/diff10klinestest.txt")
defer fp.Close()
data, _ := io.ReadAll(fp)
dmp := New()

b.ResetTimer()

for i := 0; i < b.N; i++ {
text1, text2, linearray := dmp.DiffLinesToRunes(string(data), "")
r = dmp.DiffMainRunes(text1, text2, false)
r = dmp.DiffCharsToLines(r, linearray)
}

SinkSliceDiff = r
}
56 changes: 56 additions & 0 deletions diffmatchpatch/patch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,3 +567,59 @@ func TestPatchApply(t *testing.T) {
)
}
}

func TestIssues(t *testing.T) {
t.Run("https://github.com/sergi/go-diff/issues/127", func(t *testing.T) {
text1 := `
1111111111111 000000
------------- ------
xxxxxxxxxxxxx ------
xxxxxxxxxxxxx ------
xxxxxxxxxxxxx xxxxxx
xxxxxxxxxxxxx ......
xxxxxxxxxxxxx 111111
xxxxxxxxxxxxx ??????
xxxxxxxxxxxxx 333333
xxxxxxxxxxxxx 555555
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
xxxxxxxxxx xxxxx
`
text2 := `
2222222222222 000000
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`

patches := New().PatchMake(text1, text2)
assertEqual(t, 6, len(patches), "Issue https://github.com/sergi/go-diff/issues/127")
})

t.Run("https://github.com/sergi/go-diff/issues/4", func(t *testing.T) {
// doesn't panic
text1 := "1\n2\n3\n4\n5\n6\n7\n3\n8\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13" +
"\n16\n13\n13\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34" +
"\n35\n12\n36\n37\n38\n39\n40\n41\n42\n13\n43\n44\n13\n45\n46\n47\n13\n13\n48\n49\n50" +
"\n51\n52\n13\n53\n54\n55\n56\n57\n58\n59\n60\n61\n62\n63\n64\n65\n66\n67\n68\n69\n13\n" +
"70\n71\n72\n73\n74\n13\n75\n13\n76\n77\n78\n79\n80\n81\n82\n83\n84\n85\n86\n87\n88\n89\n" +
"90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102\n63\n103\n67\n104" +
"\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72\n73\n" +
"74\n13\n75\n13\n76\n118\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13" +
"\n128\n129\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n141\n142" +
"\n68\n13\n143\n144\n145\n146\n13\n147\n148\n13\n149\n150\n151\n152\n153\n150\n154\n13\n155\n156\n"
text2 := "1\n2\n3\n4\n5\n6\n7\n3\n157\n9\n3\n10\n3\n11\n3\n12\n13\n14\n15\n12\n13\n16\n13\n13" +
"\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n12\n36\n37\n38\n39\n40" +
"\n41\n42\n13\n158\n159\n13\n45\n46\n47\n13\n13\n48\n49\n50\n51\n13\n53\n54\n55\n56\n57\n160\n59\n60" +
"\n61\n62\n63\n64\n161\n66\n67\n68\n69\n13\n70\n71\n72\n73\n74\n13\n75\n13\n162\n77\n78\n79\n80\n81\n" +
"82\n83\n84\n85\n86\n88\n89\n90\n67\n91\n92\n93\n81\n68\n13\n94\n71\n95\n96\n97\n98\n99\n100\n101\n102" +
"\n63\n103\n67\n104\n105\n13\n106\n107\n108\n109\n110\n111\n112\n113\n114\n115\n90\n116\n67\n13\n117\n72" +
"\n73\n74\n13\n75\n13\n163\n119\n120\n78\n68\n121\n13\n122\n123\n124\n125\n93\n126\n68\n127\n13\n128\n164" +
"\n130\n131\n132\n133\n134\n135\n13\n136\n137\n138\n13\n78\n68\n13\n139\n140\n165\n68\n13\n143\n144\n145\n" +
"146\n13\n147\n148\n13\n149\n150\n151\n166\n153\n150\n154\n13\n155\n156\n"

dmp := New()
t1, t2, lineArray := dmp.DiffLinesToChars(text1, text2)
diffs := dmp.DiffMain(t1, t2, false)
diffs = dmp.DiffCharsToLines(diffs, lineArray)
_ = diffs
})
}
18 changes: 0 additions & 18 deletions diffmatchpatch/stringutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
package diffmatchpatch

import (
"strconv"
"strings"
"unicode/utf8"
)
Expand Down Expand Up @@ -92,20 +91,3 @@ func runesIndex(r1, r2 []rune) int {
}
return -1
}

func intArrayToString(ns []uint32) string {
if len(ns) == 0 {
return ""
}

indexSeparator := IndexSeparator[0]

// Appr. 3 chars per num plus the comma.
b := []byte{}
for _, n := range ns {
b = strconv.AppendInt(b, int64(n), 10)
b = append(b, indexSeparator)
}
b = b[:len(b)-1]
return string(b)
}
Loading

0 comments on commit d7cda67

Please sign in to comment.