Skip to content

Commit a839d2c

Browse files
authored
Add new functionaltiy for reversing diffs. (#72)
1 parent 3751ac7 commit a839d2c

File tree

8 files changed

+556
-0
lines changed

8 files changed

+556
-0
lines changed

diff/reverse.go

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package diff
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
)
8+
9+
// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation.
10+
// This is a FileDiff that undoes the edit of the original.
11+
func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) {
12+
reverse := FileDiff{
13+
OrigName: fd.NewName,
14+
OrigTime: fd.NewTime,
15+
NewName: fd.OrigName,
16+
NewTime: fd.OrigTime,
17+
Extended: fd.Extended,
18+
}
19+
for _, hunk := range fd.Hunks {
20+
invHunk, err := reverseHunk(hunk)
21+
if err != nil {
22+
return nil, err
23+
}
24+
reverse.Hunks = append(reverse.Hunks, invHunk)
25+
}
26+
return &reverse, nil
27+
}
28+
29+
// ReverseMultiFileDiff reverses a series of FileDiffs.
30+
func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) {
31+
var reverse []*FileDiff
32+
for _, fd := range fds {
33+
r, err := ReverseFileDiff(fd)
34+
if err != nil {
35+
return nil, err
36+
}
37+
reverse = append(reverse, r)
38+
}
39+
return reverse, nil
40+
}
41+
42+
// A subhunk represents a portion of a Hunk.Body, split into three sections.
43+
// It consists of zero or more context lines, followed by zero or more orig
44+
// lines and then zero or more new lines.
45+
//
46+
// Each line is stored WITHOUT its starting character, but with the newlines
47+
// included. The final entry in a section may be missing a trailing newline.
48+
//
49+
// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt,
50+
// but is represented here as a missing newline.
51+
type contextLine struct {
52+
body []byte
53+
bare bool
54+
}
55+
56+
type subhunk struct {
57+
context []contextLine
58+
orig [][]byte
59+
new [][]byte
60+
}
61+
62+
// reverseHunk converts a Hunk into its reverse operation.
63+
func reverseHunk(forward *Hunk) (*Hunk, error) {
64+
reverse := Hunk{
65+
OrigStartLine: forward.NewStartLine,
66+
OrigLines: forward.NewLines,
67+
OrigNoNewlineAt: 0, // we may change this below
68+
NewStartLine: forward.OrigStartLine,
69+
NewLines: forward.OrigLines,
70+
Section: forward.Section,
71+
StartPosition: forward.StartPosition,
72+
}
73+
subs, err := toSubhunks(forward)
74+
if err != nil {
75+
return nil, err
76+
}
77+
for _, sub := range subs {
78+
invSub := subhunk{
79+
context: sub.context,
80+
orig: sub.new,
81+
new: sub.orig,
82+
}
83+
for _, line := range invSub.context {
84+
if line.bare {
85+
reverse.Body = append(reverse.Body, line.body...)
86+
continue
87+
}
88+
reverse.Body = append(reverse.Body, ' ')
89+
reverse.Body = append(reverse.Body, line.body...)
90+
}
91+
for _, line := range invSub.orig {
92+
reverse.Body = append(reverse.Body, '-')
93+
reverse.Body = append(reverse.Body, line...)
94+
}
95+
if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' {
96+
// There was a missing newline in `orig`, which we encode in a
97+
// hunk with an offset.
98+
reverse.Body = append(reverse.Body, '\n')
99+
reverse.OrigNoNewlineAt = int32(len(reverse.Body))
100+
}
101+
for _, line := range invSub.new {
102+
reverse.Body = append(reverse.Body, '+')
103+
reverse.Body = append(reverse.Body, line...)
104+
}
105+
}
106+
return &reverse, nil
107+
}
108+
109+
func extractContextLines(from *[]byte) []contextLine {
110+
var lines []contextLine
111+
for len(*from) > 0 {
112+
if (*from)[0] == '\n' {
113+
lines = append(lines, contextLine{body: []byte{'\n'}, bare: true})
114+
*from = (*from)[1:]
115+
continue
116+
}
117+
if (*from)[0] != ' ' {
118+
break
119+
}
120+
121+
newline := bytes.IndexByte(*from, '\n')
122+
if newline < 0 {
123+
lines = append(lines, contextLine{body: (*from)[1:]})
124+
*from = nil
125+
continue
126+
}
127+
128+
lines = append(lines, contextLine{body: (*from)[1 : newline+1]})
129+
*from = (*from)[newline+1:]
130+
}
131+
return lines
132+
}
133+
134+
func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte {
135+
var lines [][]byte
136+
for len(*from) > 0 {
137+
if (*from)[0] != startingWith {
138+
break
139+
}
140+
141+
newline := bytes.IndexByte(*from, '\n')
142+
if newline < 0 {
143+
lines = append(lines, (*from)[1:])
144+
*from = nil
145+
continue
146+
}
147+
148+
lines = append(lines, (*from)[1:newline+1])
149+
*from = (*from)[newline+1:]
150+
}
151+
return lines
152+
}
153+
154+
// Extracts the subhunks from a diff.Hunk.
155+
//
156+
// This groups a Hunk's buffer into one or more subhunks, matching the conditions
157+
// of `subhunk` above. This function groups, strips prefix characters, and strips
158+
// a newline for `OrigNoNewlineAt` if necessary.
159+
func toSubhunks(hunk *Hunk) ([]subhunk, error) {
160+
var body []byte = hunk.Body
161+
var subhunks []subhunk
162+
if len(body) == 0 {
163+
return nil, nil
164+
}
165+
for len(body) > 0 {
166+
sh := subhunk{
167+
context: extractContextLines(&body),
168+
orig: extractLinesStartingWith(&body, '-'),
169+
new: extractLinesStartingWith(&body, '+'),
170+
}
171+
if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 {
172+
// The first line didn't start with any expected prefix.
173+
return nil, fmt.Errorf("unexpected character %q at start of line", body[0])
174+
}
175+
subhunks = append(subhunks, sh)
176+
}
177+
if hunk.OrigNoNewlineAt > 0 {
178+
// The Hunk represents a missing newline at the end of an "orig" line with a
179+
// OrigNoNewlineAt index. We represent it here as an actual missing newline.
180+
var lastSubhunk *subhunk = &subhunks[len(subhunks)-1]
181+
s := len(lastSubhunk.orig)
182+
if s == 0 {
183+
return nil, errors.New("inconsistent OrigNoNewlineAt in input")
184+
}
185+
var cut bool
186+
lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n"))
187+
if !cut {
188+
return nil, errors.New("missing newline in input")
189+
}
190+
}
191+
return subhunks, nil
192+
}

0 commit comments

Comments
 (0)