Skip to content

Commit ef9bf34

Browse files
committed
Avoid diffing by lines if inefficient
Avoid diffing by lines if it turns out to be significantly less efficient than diffing by bytes. Before this change: ( """ - d5c14bdf6bac81c27afc5429500ed750 - 25483503b557c606dad4f144d27ae10b - 90bdbcdbb6ea7156068e3dcfb7459244 - 978f480a6e3cced51e297fbff9a506b7 + Xd5c14bdf6bac81c27afc5429500ed750 + X25483503b557c606dad4f144d27ae10b + X90bdbcdbb6ea7156068e3dcfb7459244 + X978f480a6e3cced51e297fbff9a506b7 """ ) After this change: strings.Join({ + "X", "d5c14bdf6bac81c27afc5429500ed750\n", + "X", "25483503b557c606dad4f144d27ae10b\n", + "X", "90bdbcdbb6ea7156068e3dcfb7459244\n", + "X", "978f480a6e3cced51e297fbff9a506b7\n", }, "")
1 parent c5c3378 commit ef9bf34

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

cmp/compare_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,6 +1307,11 @@ using the AllowUnexported option.`, "\n"),
13071307
x: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa,#=_value _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb,#=_value _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc,#=_value _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd,#=_value _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c,#=_value _value=4 41\t",
13081308
y: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c _value=4 41\t",
13091309
reason: "leading/trailing equal spans should not appear in diff lines",
1310+
}, {
1311+
label: label + "/AllLinesDiffer",
1312+
x: "d5c14bdf6bac81c27afc5429500ed750\n25483503b557c606dad4f144d27ae10b\n90bdbcdbb6ea7156068e3dcfb7459244\n978f480a6e3cced51e297fbff9a506b7\n",
1313+
y: "Xd5c14bdf6bac81c27afc5429500ed750\nX25483503b557c606dad4f144d27ae10b\nX90bdbcdbb6ea7156068e3dcfb7459244\nX978f480a6e3cced51e297fbff9a506b7\n",
1314+
reason: "all lines are different, so diffing based on lines is pointless",
13101315
}}
13111316
}
13121317

cmp/report_slices.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
9898
// Auto-detect the type of the data.
9999
var isLinedText, isText, isBinary bool
100100
var sx, sy string
101+
var ssx, ssy []string
101102
switch {
102103
case t.Kind() == reflect.String:
103104
sx, sy = vx.String(), vy.String()
@@ -130,6 +131,22 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
130131
}
131132
isText = !isBinary
132133
isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
134+
135+
// Avoid diffing by lines if it produces a significantly more complex
136+
// edit script than diffing by bytes.
137+
if isLinedText {
138+
ssx = strings.Split(sx, "\n")
139+
ssy = strings.Split(sy, "\n")
140+
esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
141+
return diff.BoolResult(ssx[ix] == ssy[iy])
142+
})
143+
esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
144+
return diff.BoolResult(sx[ix] == sy[iy])
145+
})
146+
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
147+
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
148+
isLinedText = efficiencyLines < 4*efficiencyBytes
149+
}
133150
}
134151

135152
// Format the string into printable records.
@@ -139,8 +156,6 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
139156
// If the text appears to be multi-lined text,
140157
// then perform differencing across individual lines.
141158
case isLinedText:
142-
ssx := strings.Split(sx, "\n")
143-
ssy := strings.Split(sy, "\n")
144159
list = opts.formatDiffSlice(
145160
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
146161
func(v reflect.Value, d diffMode) textRecord {

cmp/testdata/diffs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,18 @@
10651065
` _value=4 41 `,
10661066
}, "")
10671067
>>> TestDiff/Reporter/SurroundingEqualElements
1068+
<<< TestDiff/Reporter/AllLinesDiffer
1069+
strings.Join({
1070+
+ "X",
1071+
"d5c14bdf6bac81c27afc5429500ed750\n",
1072+
+ "X",
1073+
"25483503b557c606dad4f144d27ae10b\n",
1074+
+ "X",
1075+
"90bdbcdbb6ea7156068e3dcfb7459244\n",
1076+
+ "X",
1077+
"978f480a6e3cced51e297fbff9a506b7\n",
1078+
}, "")
1079+
>>> TestDiff/Reporter/AllLinesDiffer
10681080
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
10691081
teststructs.ParentStructA{
10701082
privateStruct: teststructs.privateStruct{

0 commit comments

Comments
 (0)