Skip to content

Commit 616cc70

Browse files
committed
Change bsdiff's suffix sort algorithm to sais. Fixes sparkle-project#132
Although sparkle-project#132 is marked as closed, the issue is still real. bsdiff can crash or hang when creating diffs on some files. In particular, bsdiff hangs/crashes in its recursive split function. As you can see from: http://stackoverflow.com/a/20305493/871119 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=409664 bsdiff 4.3 uses qsufsort for its suffix sorting algorithm, which has a bug causing the hanging/crashing. Our solution is to change the algorithm to use sais for suffix sorting: https://sites.google.com/site/yuta256/sais Notes: *I had to change sais_index_type #define from int to off_t in sais.h/c and in some other places, to eliminate all warnings. Compiles cleanly with clang's -Weverything *sais is claimed to be more efficient than other algorithms including qsufsort. If a more efficient (and stable) algorithm comes out, we may be able to swap it out in the future.
1 parent 09d9893 commit 616cc70

File tree

5 files changed

+604
-133
lines changed

5 files changed

+604
-133
lines changed

LICENSE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ EXTERNAL LICENSES
3030
bspatch.c and bsdiff.c, from bsdiff 4.3 <http://www.daemonology.net/bsdiff/>:
3131
Copyright (c) 2003-2005 Colin Percival.
3232

33+
sais.c and sais.c, from sais-lite (2010/08/07) <https://sites.google.com/site/yuta256/sais>:
34+
Copyright (c) 2008-2010 Yuta Mori.
35+
3336
SUDSAVerifier.m:
3437
Copyright (c) 2011 Mark Hamlin.
3538

Sparkle.xcodeproj/project.pbxproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
61F83F720DBFE140006FDD30 /* SUBasicUpdateDriver.m in Sources */ = {isa = PBXBuildFile; fileRef = 61F83F700DBFE137006FDD30 /* SUBasicUpdateDriver.m */; };
168168
61F83F740DBFE141006FDD30 /* SUBasicUpdateDriver.h in Headers */ = {isa = PBXBuildFile; fileRef = 61F83F6F0DBFE137006FDD30 /* SUBasicUpdateDriver.h */; settings = {ATTRIBUTES = (); }; };
169169
61FA52880E2D9EA400EF58AD /* Sparkle.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8DC2EF5B0486A6940098B216 /* Sparkle.framework */; settings = {ATTRIBUTES = (Required, ); }; };
170+
7223E7631AD1AEFF008E3161 /* sais.c in Sources */ = {isa = PBXBuildFile; fileRef = 7223E7611AD1AEFF008E3161 /* sais.c */; };
170171
767B61AC1972D488004E0C3C /* SUGuidedPackageInstaller.h in Headers */ = {isa = PBXBuildFile; fileRef = 767B61AA1972D488004E0C3C /* SUGuidedPackageInstaller.h */; };
171172
767B61AD1972D488004E0C3C /* SUGuidedPackageInstaller.m in Sources */ = {isa = PBXBuildFile; fileRef = 767B61AB1972D488004E0C3C /* SUGuidedPackageInstaller.m */; };
172173
767B61AE1972D488004E0C3C /* SUGuidedPackageInstaller.m in Sources */ = {isa = PBXBuildFile; fileRef = 767B61AB1972D488004E0C3C /* SUGuidedPackageInstaller.m */; };
@@ -534,6 +535,8 @@
534535
61F614540E24A12D009F47E7 /* it */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.strings; name = it; path = it.lproj/Sparkle.strings; sourceTree = "<group>"; };
535536
61F83F6F0DBFE137006FDD30 /* SUBasicUpdateDriver.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SUBasicUpdateDriver.h; sourceTree = "<group>"; };
536537
61F83F700DBFE137006FDD30 /* SUBasicUpdateDriver.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = SUBasicUpdateDriver.m; sourceTree = "<group>"; };
538+
7223E7611AD1AEFF008E3161 /* sais.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sais.c; sourceTree = "<group>"; };
539+
7223E7621AD1AEFF008E3161 /* sais.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sais.h; sourceTree = "<group>"; };
537540
767B61AA1972D488004E0C3C /* SUGuidedPackageInstaller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SUGuidedPackageInstaller.h; sourceTree = "<group>"; };
538541
767B61AB1972D488004E0C3C /* SUGuidedPackageInstaller.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = SUGuidedPackageInstaller.m; sourceTree = "<group>"; };
539542
8DC2EF5A0486A6940098B216 /* Sparkle-Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = "Sparkle-Info.plist"; sourceTree = "<group>"; };
@@ -707,6 +710,8 @@
707710
5D06E8DB0FD68CB9005AE3F6 /* bsdiff.c */,
708711
5D06E8DC0FD68CB9005AE3F6 /* bspatch.c */,
709712
611142E810FB1BE5009810AA /* bspatch.h */,
713+
7223E7611AD1AEFF008E3161 /* sais.c */,
714+
7223E7621AD1AEFF008E3161 /* sais.h */,
710715
);
711716
path = bsdiff;
712717
sourceTree = "<group>";
@@ -1395,6 +1400,7 @@
13951400
5D06E8E90FD68CDB005AE3F6 /* bsdiff.c in Sources */,
13961401
14652F7E19A9728A00959E44 /* bspatch.c in Sources */,
13971402
14652F7D19A9726700959E44 /* SUBinaryDeltaApply.m in Sources */,
1403+
7223E7631AD1AEFF008E3161 /* sais.c in Sources */,
13981404
14652F7C19A9725300959E44 /* SUBinaryDeltaCommon.m in Sources */,
13991405
5D06E8EA0FD68CDB005AE3F6 /* SUBinaryDeltaTool.m in Sources */,
14001406
);

Vendor/bsdiff/bsdiff.c

Lines changed: 2 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c, v 1.1 2005/08/06 01:59:0
2929
#endif
3030

3131
#include <sys/types.h>
32+
#include "sais.h"
3233

3334
#include <err.h>
3435
#include <fcntl.h>
@@ -39,138 +40,6 @@ __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c, v 1.1 2005/08/06 01:59:0
3940

4041
#define MIN(x, y) (((x)<(y)) ? (x) : (y))
4142

42-
static void split(off_t *I, off_t *V, off_t start, off_t len, off_t h)
43-
{
44-
off_t i, j, k, x, tmp, jj, kk;
45-
46-
if (len < 16) {
47-
for (k = start; k < start + len; k += j) {
48-
j = 1; x = V[I[k] + h];
49-
for (i = 1; k + i < start + len; i++) {
50-
if (V[I[k + i] + h] < x) {
51-
x = V[I[k + i] + h];
52-
j = 0;
53-
};
54-
if (V[I[k + i] + h] == x) {
55-
tmp = I[k + j]; I[k + j] = I[k + i]; I[k + i] = tmp;
56-
j++;
57-
};
58-
};
59-
for (i = 0; i < j; i++)
60-
V[I[k + i]] = k + j - 1;
61-
if (j == 1)
62-
I[k] = -1;
63-
};
64-
return;
65-
};
66-
67-
x = V[I[start + len/2] + h];
68-
jj = 0; kk = 0;
69-
for (i = start; i < start + len; i++) {
70-
if (V[I[i] + h] < x)
71-
jj++;
72-
if (V[I[i] + h] == x)
73-
kk++;
74-
};
75-
jj += start; kk += jj;
76-
77-
i = start; j = 0; k = 0;
78-
while (i < jj) {
79-
if (V[I[i] + h] < x) {
80-
i++;
81-
} else if (V[I[i] + h] == x) {
82-
tmp = I[i]; I[i] = I[jj + j]; I[jj + j] = tmp;
83-
j++;
84-
} else {
85-
tmp = I[i]; I[i] = I[kk + k]; I[kk + k] = tmp;
86-
k++;
87-
};
88-
};
89-
90-
while (jj + j < kk) {
91-
if (V[I[jj + j] + h] == x) {
92-
j++;
93-
} else {
94-
tmp = I[jj + j]; I[jj + j] = I[kk + k]; I[kk + k] = tmp;
95-
k++;
96-
};
97-
};
98-
99-
if (jj > start)
100-
split(I, V, start, jj - start, h);
101-
102-
for (i = 0; i < kk - jj; i++)
103-
V[I[jj + i]] = kk - 1;
104-
if (jj == kk - 1)
105-
I[jj] = -1;
106-
107-
if (start + len > kk)
108-
split(I, V, kk, start + len - kk, h);
109-
}
110-
111-
/* qsufsort(I, V, old, oldsize)
112-
*
113-
* Computes the suffix sort of the string at 'old' and stores the resulting
114-
* indices in 'I', using 'V' as a temporary array for the computation. */
115-
static void qsufsort(off_t *I, off_t *V, u_char *old, off_t oldsize)
116-
{
117-
off_t buckets[256];
118-
off_t i, h, len;
119-
120-
/* count number of each byte */
121-
for (i = 0; i < 256; i++)
122-
buckets[i] = 0;
123-
for (i = 0; i < oldsize; i++)
124-
buckets[old[i]]++;
125-
/* make buckets cumulative */
126-
for (i = 1; i < 256; i++)
127-
buckets[i] += buckets[i - 1];
128-
/* shift right by one */
129-
for (i = 255; i > 0; i--)
130-
buckets[i] = buckets[i - 1];
131-
buckets[0] = 0;
132-
/* at this point, buckets[c] is the number of bytes in the old file with
133-
* value less than c. */
134-
135-
/* set up the sort order of the suffixes based solely on the first
136-
* character */
137-
for (i = 0; i < oldsize; i++)
138-
I[++buckets[old[i]]] = i;
139-
I[0] = oldsize;
140-
/* ? */
141-
for (i = 0; i < oldsize; i++)
142-
V[i] = buckets[old[i]];
143-
V[oldsize] = 0;
144-
/* forward any entries in the ordering which have the same initial
145-
* character */
146-
for (i = 1; i < 256; i++) {
147-
if (buckets[i] == buckets[i - 1] + 1)
148-
I[buckets[i]] = -1;
149-
}
150-
I[0] = -1;
151-
152-
for (h = 1; I[0] != -(oldsize + 1); h += h) {
153-
len = 0;
154-
for (i = 0; i < oldsize + 1;) {
155-
if (I[i] < 0) {
156-
len -= I[i];
157-
i -= I[i];
158-
} else {
159-
if (len)
160-
I[i - len] = -len;
161-
len = V[I[i]] + 1 - i;
162-
split(I, V, i, len, h);
163-
i += len;
164-
len = 0;
165-
}
166-
}
167-
if (len)
168-
I[i - len] = -len;
169-
};
170-
171-
for (i = 0; i < oldsize + 1; i++) I[V[i]] = i;
172-
}
173-
17443
/* matchlen(old, oldsize, new, newsize)
17544
*
17645
* Returns the length of the longest common prefix between 'old' and 'new'. */
@@ -288,7 +157,7 @@ int bsdiff(int argc, char *argv[])
288157
err(1, NULL);
289158

290159
/* Do a suffix sort on the old file. */
291-
qsufsort(I, V, old, oldsize);
160+
I[0] = oldsize; sais(old, I+1, oldsize);
292161

293162
free(V);
294163

0 commit comments

Comments
 (0)