1 /* ported from https://github.com/mendsley/bsdiff
2 */
3 module bsdiff;
4 
5 import std.experimental.allocator : theAllocator, makeArray, dispose;
6 import std.exception : enforce;
7 
8 void bsdiff(Allocator)(in ubyte[] old, in ubyte[] new_, scope void delegate(in ubyte[]) write, Allocator allocator = theAllocator)
9 {
10 	auto I = allocator.makeArray!long(old.length + 1);
11 	scope (exit) allocator.dispose(I);
12 	auto buffer = allocator.makeArray!ubyte(new_.length + 1);
13 	scope (exit) allocator.dispose(buffer);
14 
15 	qsufsort(I, old, allocator);
16 
17 	/* Compute the differences, writing ctrl as we go */
18 	long scan = 0;
19 	long len = 0;
20 	long pos = 0;
21 	long lastscan = 0;
22 	long lastpos = 0;
23 	long lastoffset = 0;
24 	while (scan < new_.length) {
25 		long oldscore = 0;
26 
27 		for (long scsc = scan+=len; scan < new_.length; scan++) {
28 			len = search(I, old,new_[scan .. $], 0, old.length, &pos);
29 
30 			for (; scsc < scan+len; scsc++)
31 				if ((scsc+lastoffset < old.length) && (old[scsc+lastoffset] == new_[scsc]))
32 					oldscore++;
33 
34 			if (((len==oldscore) && (len!=0)) || (len>oldscore+8))
35 				break;
36 
37 			if ((scan+lastoffset<old.length) && (old[scan+lastoffset] == new_[scan]))
38 				oldscore--;
39 		}
40 
41 		if ((len != oldscore) || (scan == new_.length)) {
42 			long s = 0;
43 			long Sf = 0;
44 			long lenf = 0;
45 			for (long i = 0; (lastscan+i < scan) && (lastpos+i < old.length);) {
46 				if (old[lastpos+i] == new_[lastscan+i]) s++;
47 				i++;
48 				if (s*2-i > Sf*2-lenf) {
49 					Sf = s;
50 					lenf = i;
51 				}
52 			}
53 
54 			long lenb = 0;
55 			if (scan < new_.length) {
56 				s = 0;
57 				long Sb = 0;
58 				for (long i = 1; (scan>=lastscan+i)&&(pos>=i); i++) {
59 					if (old[pos-i] == new_[scan-i]) s++;
60 					if (s*2-i > Sb*2-lenb) {
61 						Sb=s;
62 						lenb=i;
63 					}
64 				}
65 			}
66 
67 			if (lastscan+lenf > scan-lenb) {
68 				long overlap = (lastscan+lenf)-(scan-lenb);
69 				s = 0;
70 				long Ss = 0;
71 				long lens = 0;
72 				foreach (i; 0 .. overlap) {
73 					if (new_[lastscan+lenf-overlap+i] == old[lastpos+lenf-overlap+i])
74 						s++;
75 					if (new_[scan-lenb+i] == old[pos-lenb+i])
76 						s--;
77 					if (s > Ss) {
78 						Ss = s;
79 						lens = i+1;
80 					}
81 				}
82 
83 				lenf += lens-overlap;
84 				lenb -= lens;
85 			}
86 
87 			ubyte[3 * 8] buf;
88 			offtout(lenf, buf);
89 			offtout((scan-lenb)-(lastscan+lenf), buf[8 .. $]);
90 			offtout((pos-lenb)-(lastpos+lenf), buf[16 .. $]);
91 
92 			/* Write control data */
93 			write(buf);
94 
95 			/* Write diff data */
96 			if (lenf) {
97 				buffer[0 .. lenf] = new_[lastscan .. lastscan+lenf] - old[lastpos .. lastpos+lenf];
98 				write(buffer[0 .. lenf]);
99 			}
100 
101 			/* Write extra data */
102 			if (scan-lenb > lastscan+lenf)
103 				write(new_[lastscan+lenf .. scan-lenb]);
104 
105 			lastscan = scan-lenb;
106 			lastpos = pos-lenb;
107 			lastoffset = pos-scan;
108 		}
109 	}
110 }
111 
112 
113 void bspatch(in ubyte[] old, ubyte[] new_, scope void delegate(ubyte[]) read)
114 {
115 	ubyte[8] buf;
116 	long oldpos = 0, newpos = 0;
117 	long[3] ctrl;
118 
119 	while (newpos < new_.length) {
120 		/* Read control data */
121 		foreach (i; 0 .. 3) {
122 			read(buf);
123 			ctrl[i] = offtin(buf);
124 		}
125 
126 		/* Sanity-check */
127 		enforce(newpos + ctrl[0] < new_.length);
128 
129 		/* Read diff string */
130 		read(new_[newpos .. newpos + ctrl[0]]);
131 
132 		/* Add old data to diff string */
133 		foreach (i; 0 .. ctrl[0])
134 			if ((oldpos+i>=0) && (oldpos+i<old.length))
135 				new_[newpos+i] += old[oldpos+i];
136 
137 		/* Adjust pointers */
138 		newpos += ctrl[0];
139 		oldpos += ctrl[0];
140 
141 		/* Sanity-check */
142 		enforce(newpos+ctrl[1] <= new_.length);
143 
144 		/* Read extra string */
145 		if (ctrl[1] > 0)
146 			read(new_[newpos .. newpos + ctrl[1]]);
147 
148 		/* Adjust pointers */
149 		newpos += ctrl[1];
150 		oldpos += ctrl[2];
151 	}
152 }
153 
154 unittest {
155 	ubyte[] a = [1, 2, 3, 4, 5, 6, 7, 8];
156 	ubyte[] b = [2, 3, 4, 5, 13, 14, 15, 16, 7];
157 	ubyte[] patch;
158 	void writePatch(in ubyte[] bts) {
159 		patch ~= bts;
160 	}
161 	void readPatch(ubyte[] dst) {
162 		dst[] = patch[0 .. dst.length];
163 		patch = patch[dst.length .. $];
164 	}
165 
166 	bsdiff(a, b, &writePatch);
167 	ubyte[] c = new ubyte[](b.length);
168 	bspatch(a, c, &readPatch);
169 	assert(b == c);
170 }
171 
172 
173 private void split(long[] I, long[] V, long start, long len, long h)
174 {
175 	import std.algorithm.mutation : swap;
176 
177 	long x;
178 
179 	if (len < 16) {
180 		long j;
181 		for (long k = start; k < start+len; k += j) {
182 			j = 1;
183 			x = V[I[k]+h];
184 			for (long i = 1; k+i < start+len; i++) {
185 				if (V[I[k+i]+h] < x) {
186 					x = V[I[k+i]+h];
187 					j = 0;
188 				}
189 				if (V[I[k+i]+h] == x) {
190 					swap(I[k+i], I[k+j]);
191 					j++;
192 				}
193 			}
194 			foreach (i; 0 .. j) V[I[k+i]] = k+j-1;
195 			if (j == 1) I[k]=-1;
196 		}
197 		return;
198 	}
199 
200 	x = V[I[start+len/2]+h];
201 	long jj = 0;
202 	long kk = 0;
203 	foreach (i; start .. start+len) {
204 		if (V[I[i]+h] < x) jj++;
205 		if (V[I[i]+h] == x) kk++;
206 	}
207 	jj += start;
208 	kk += jj;
209 
210 	long i = start;
211 	long j = 0;
212 	long k = 0;
213 	while (i < jj) {
214 		if (V[I[i]+h] < x) {
215 			i++;
216 		} else if (V[I[i]+h] == x) {
217 			swap(I[i], I[jj+j]);
218 			j++;
219 		} else {
220 			swap(I[i], I[kk+k]);
221 			k++;
222 		}
223 	}
224 
225 	while (jj+j < kk) {
226 		if (V[I[jj+j]+h] == x) {
227 			j++;
228 		} else {
229 			swap(I[jj+j], I[kk+k]);
230 			k++;
231 		}
232 	}
233 
234 	if (jj > start) split(I, V, start, jj-start, h);
235 
236 	foreach (i2; 0 .. kk-jj) V[I[jj+i2]] = kk-1;
237 	if (jj == kk-1) I[jj] = -1;
238 
239 	if (start+len > kk) split(I, V, kk, start+len-kk, h);
240 }
241 
242 private void qsufsort(Allocator)(long[] I, const(ubyte)[] old, Allocator allocator)
243 {
244 	long[] V = allocator.makeArray!long(old.length+1);
245 	allocator.dispose(V);
246 
247 	long[256] buckets = 0;
248 
249 	foreach (i; 0 .. old.length) buckets[old[i]]++;
250 	foreach (i; 1 .. buckets.length) buckets[i] += buckets[i-1];
251 	foreach_reverse (i; 1 .. buckets.length) buckets[i] = buckets[i-1];
252 	buckets[0] =0 ;
253 
254 	foreach (i; 0 .. old.length) I[++buckets[old[i]]] = i;
255 	I[0] = old.length;
256 	foreach (i; 0 .. old.length) V[i] = buckets[old[i]];
257 	V[old.length] = 0;
258 	foreach (i; 1 .. buckets.length)
259 		if (buckets[i] == buckets[i-1]+1)
260 			I[buckets[i]] = -1;
261 	I[0] = -1;
262 
263 	for (long h = 1; I[0] != -(old.length+1); h += h) {
264 		long len = 0;
265 		long i = 0;
266 		while (i < old.length+1) {
267 			if (I[i] < 0) {
268 				len -= I[i];
269 				i -= I[i];
270 			} else {
271 				if (len) I[i-len] = -len;
272 				len = V[I[i]] + 1 - i;
273 				split(I, V, i, len, h);
274 				i += len;
275 				len = 0;
276 			}
277 		}
278 		if (len) I[i-len] = -len;
279 	}
280 
281 	foreach (i; 0 .. old.length+1) I[V[i]] = i;
282 }
283 
284 private long matchlen(const(ubyte)[] old, const(ubyte)[] new_)
285 {
286 	import std.algorithm.comparison : min;
287 	auto len = min(old.length, new_.length);
288 
289 	foreach (i; 0 .. len)
290 		if (old[i] != new_[i])
291 			return i;
292 
293 	return len;
294 }
295 
296 private long search(in long[] I, in ubyte[] old,
297 		in ubyte[] new_, long st, long en, long *pos)
298 {
299 	import core.stdc.string : memcmp;
300 	import std.algorithm.comparison : min;
301 
302 	if (en-st < 2) {
303 		auto x = matchlen(old[I[st] .. $], new_);
304 		auto y = matchlen(old[I[en] .. $], new_);
305 
306 		if (x > y) {
307 			*pos = I[st];
308 			return x;
309 		} else {
310 			*pos = I[en];
311 			return y;
312 		}
313 	}
314 
315 	long x = st + (en-st)/2;
316 	if (memcmp(&old[I[x]], &new_[0], min(old.length-I[x], new_.length)) < 0) {
317 		return search(I, old, new_, x, en, pos);
318 	} else {
319 		return search(I, old, new_, st, x, pos);
320 	}
321 }
322 
323 struct bsdiff_request
324 {
325 	const(ubyte)[] old;
326 	const(ubyte)[] new_;
327 	void delegate(in ubyte[] data) write;
328 	long[] I;
329 	ubyte[] buffer;
330 };
331 
332 
333 private long offtin(in ubyte[] buf)
334 {
335 	long y = buf[7]&0x7F;
336 	y = y*256; y+= buf[6];
337 	y = y*256; y+= buf[5];
338 	y = y*256; y+= buf[4];
339 	y = y*256; y+= buf[3];
340 	y = y*256; y+= buf[2];
341 	y = y*256; y+= buf[1];
342 	y = y*256; y+= buf[0];
343 	return buf[7] & 0x80 ? -y : y;
344 }
345 
346 
347 private void offtout(long x, ubyte[] buf)
348 {
349 	ulong y = x < 0 ? -x : x;
350 	buf[0] = y%256; y -= buf[0];
351 	y = y/256; buf[1] = y%256; y -= buf[1];
352 	y = y/256; buf[2] = y%256; y -= buf[2];
353 	y = y/256; buf[3] = y%256; y -= buf[3];
354 	y = y/256; buf[4] = y%256; y -= buf[4];
355 	y = y/256; buf[5] = y%256; y -= buf[5];
356 	y = y/256; buf[6] = y%256; y -= buf[6];
357 	y = y/256; buf[7] = y%256;
358 	if (x < 0) buf[7] |= 0x80;
359 }