bsdiff.c revision 148771
1148771Scperciva/*- 2148771Scperciva * Copyright 2003-2005 Colin Percival 3148771Scperciva * All rights reserved 4148771Scperciva * 5148771Scperciva * Redistribution and use in source and binary forms, with or without 6148771Scperciva * modification, are permitted providing that the following conditions 7148771Scperciva * are met: 8148771Scperciva * 1. Redistributions of source code must retain the above copyright 9148771Scperciva * notice, this list of conditions and the following disclaimer. 10148771Scperciva * 2. Redistributions in binary form must reproduce the above copyright 11148771Scperciva * notice, this list of conditions and the following disclaimer in the 12148771Scperciva * documentation and/or other materials provided with the distribution. 13148771Scperciva * 14148771Scperciva * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15148771Scperciva * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16148771Scperciva * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17148771Scperciva * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 18148771Scperciva * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19148771Scperciva * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20148771Scperciva * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21148771Scperciva * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22148771Scperciva * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23148771Scperciva * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24148771Scperciva * POSSIBILITY OF SUCH DAMAGE. 25148771Scperciva */ 26148771Scperciva 27148771Scperciva#include <sys/cdefs.h> 28148771Scperciva__FBSDID("$FreeBSD: head/usr.bin/bsdiff/bsdiff/bsdiff.c 148771 2005-08-06 01:59:06Z cperciva $"); 29148771Scperciva 30148771Scperciva#include <sys/types.h> 31148771Scperciva 32148771Scperciva#include <bzlib.h> 33148771Scperciva#include <err.h> 34148771Scperciva#include <fcntl.h> 35148771Scperciva#include <stdio.h> 36148771Scperciva#include <stdlib.h> 37148771Scperciva#include <string.h> 38148771Scperciva#include <unistd.h> 39148771Scperciva 40148771Scperciva#define MIN(x,y) (((x)<(y)) ? (x) : (y)) 41148771Scperciva 42148771Scpercivastatic void split(off_t *I,off_t *V,off_t start,off_t len,off_t h) 43148771Scperciva{ 44148771Scperciva off_t i,j,k,x,tmp,jj,kk; 45148771Scperciva 46148771Scperciva if(len<16) { 47148771Scperciva for(k=start;k<start+len;k+=j) { 48148771Scperciva j=1;x=V[I[k]+h]; 49148771Scperciva for(i=1;k+i<start+len;i++) { 50148771Scperciva if(V[I[k+i]+h]<x) { 51148771Scperciva x=V[I[k+i]+h]; 52148771Scperciva j=0; 53148771Scperciva }; 54148771Scperciva if(V[I[k+i]+h]==x) { 55148771Scperciva tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; 56148771Scperciva j++; 57148771Scperciva }; 58148771Scperciva }; 59148771Scperciva for(i=0;i<j;i++) V[I[k+i]]=k+j-1; 60148771Scperciva if(j==1) I[k]=-1; 61148771Scperciva }; 62148771Scperciva return; 63148771Scperciva }; 64148771Scperciva 65148771Scperciva x=V[I[start+len/2]+h]; 66148771Scperciva jj=0;kk=0; 67148771Scperciva for(i=start;i<start+len;i++) { 68148771Scperciva if(V[I[i]+h]<x) jj++; 69148771Scperciva if(V[I[i]+h]==x) kk++; 70148771Scperciva }; 71148771Scperciva jj+=start;kk+=jj; 72148771Scperciva 73148771Scperciva i=start;j=0;k=0; 74148771Scperciva while(i<jj) { 75148771Scperciva if(V[I[i]+h]<x) { 76148771Scperciva i++; 77148771Scperciva } else if(V[I[i]+h]==x) { 78148771Scperciva tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; 79148771Scperciva j++; 80148771Scperciva } else { 81148771Scperciva tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; 82148771Scperciva k++; 83148771Scperciva }; 84148771Scperciva }; 85148771Scperciva 86148771Scperciva while(jj+j<kk) { 87148771Scperciva if(V[I[jj+j]+h]==x) { 88148771Scperciva j++; 89148771Scperciva } else { 90148771Scperciva tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; 91148771Scperciva k++; 92148771Scperciva }; 93148771Scperciva }; 94148771Scperciva 95148771Scperciva if(jj>start) split(I,V,start,jj-start,h); 96148771Scperciva 97148771Scperciva for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; 98148771Scperciva if(jj==kk-1) I[jj]=-1; 99148771Scperciva 100148771Scperciva if(start+len>kk) split(I,V,kk,start+len-kk,h); 101148771Scperciva} 102148771Scperciva 103148771Scpercivastatic void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize) 104148771Scperciva{ 105148771Scperciva off_t buckets[256]; 106148771Scperciva off_t i,h,len; 107148771Scperciva 108148771Scperciva for(i=0;i<256;i++) buckets[i]=0; 109148771Scperciva for(i=0;i<oldsize;i++) buckets[old[i]]++; 110148771Scperciva for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; 111148771Scperciva for(i=255;i>0;i--) buckets[i]=buckets[i-1]; 112148771Scperciva buckets[0]=0; 113148771Scperciva 114148771Scperciva for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; 115148771Scperciva I[0]=oldsize; 116148771Scperciva for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; 117148771Scperciva V[oldsize]=0; 118148771Scperciva for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; 119148771Scperciva I[0]=-1; 120148771Scperciva 121148771Scperciva for(h=1;I[0]!=-(oldsize+1);h+=h) { 122148771Scperciva len=0; 123148771Scperciva for(i=0;i<oldsize+1;) { 124148771Scperciva if(I[i]<0) { 125148771Scperciva len-=I[i]; 126148771Scperciva i-=I[i]; 127148771Scperciva } else { 128148771Scperciva if(len) I[i-len]=-len; 129148771Scperciva len=V[I[i]]+1-i; 130148771Scperciva split(I,V,i,len,h); 131148771Scperciva i+=len; 132148771Scperciva len=0; 133148771Scperciva }; 134148771Scperciva }; 135148771Scperciva if(len) I[i-len]=-len; 136148771Scperciva }; 137148771Scperciva 138148771Scperciva for(i=0;i<oldsize+1;i++) I[V[i]]=i; 139148771Scperciva} 140148771Scperciva 141148771Scpercivastatic off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) 142148771Scperciva{ 143148771Scperciva off_t i; 144148771Scperciva 145148771Scperciva for(i=0;(i<oldsize)&&(i<newsize);i++) 146148771Scperciva if(old[i]!=new[i]) break; 147148771Scperciva 148148771Scperciva return i; 149148771Scperciva} 150148771Scperciva 151148771Scpercivastatic off_t search(off_t *I,u_char *old,off_t oldsize, 152148771Scperciva u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) 153148771Scperciva{ 154148771Scperciva off_t x,y; 155148771Scperciva 156148771Scperciva if(en-st<2) { 157148771Scperciva x=matchlen(old+I[st],oldsize-I[st],new,newsize); 158148771Scperciva y=matchlen(old+I[en],oldsize-I[en],new,newsize); 159148771Scperciva 160148771Scperciva if(x>y) { 161148771Scperciva *pos=I[st]; 162148771Scperciva return x; 163148771Scperciva } else { 164148771Scperciva *pos=I[en]; 165148771Scperciva return y; 166148771Scperciva } 167148771Scperciva }; 168148771Scperciva 169148771Scperciva x=st+(en-st)/2; 170148771Scperciva if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) { 171148771Scperciva return search(I,old,oldsize,new,newsize,x,en,pos); 172148771Scperciva } else { 173148771Scperciva return search(I,old,oldsize,new,newsize,st,x,pos); 174148771Scperciva }; 175148771Scperciva} 176148771Scperciva 177148771Scpercivastatic void offtout(off_t x,u_char *buf) 178148771Scperciva{ 179148771Scperciva off_t y; 180148771Scperciva 181148771Scperciva if(x<0) y=-x; else y=x; 182148771Scperciva 183148771Scperciva buf[0]=y%256;y-=buf[0]; 184148771Scperciva y=y/256;buf[1]=y%256;y-=buf[1]; 185148771Scperciva y=y/256;buf[2]=y%256;y-=buf[2]; 186148771Scperciva y=y/256;buf[3]=y%256;y-=buf[3]; 187148771Scperciva y=y/256;buf[4]=y%256;y-=buf[4]; 188148771Scperciva y=y/256;buf[5]=y%256;y-=buf[5]; 189148771Scperciva y=y/256;buf[6]=y%256;y-=buf[6]; 190148771Scperciva y=y/256;buf[7]=y%256; 191148771Scperciva 192148771Scperciva if(x<0) buf[7]|=0x80; 193148771Scperciva} 194148771Scperciva 195148771Scpercivaint main(int argc,char *argv[]) 196148771Scperciva{ 197148771Scperciva int fd; 198148771Scperciva u_char *old,*new; 199148771Scperciva off_t oldsize,newsize; 200148771Scperciva off_t *I,*V; 201148771Scperciva off_t scan,pos,len; 202148771Scperciva off_t lastscan,lastpos,lastoffset; 203148771Scperciva off_t oldscore,scsc; 204148771Scperciva off_t s,Sf,lenf,Sb,lenb; 205148771Scperciva off_t overlap,Ss,lens; 206148771Scperciva off_t i; 207148771Scperciva off_t dblen,eblen; 208148771Scperciva u_char *db,*eb; 209148771Scperciva u_char buf[8]; 210148771Scperciva u_char header[32]; 211148771Scperciva FILE * pf; 212148771Scperciva BZFILE * pfbz2; 213148771Scperciva int bz2err; 214148771Scperciva 215148771Scperciva if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]); 216148771Scperciva 217148771Scperciva /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure 218148771Scperciva that we never try to malloc(0) and get a NULL pointer */ 219148771Scperciva if(((fd=open(argv[1],O_RDONLY,0))<0) || 220148771Scperciva ((oldsize=lseek(fd,0,SEEK_END))==-1) || 221148771Scperciva ((old=malloc(oldsize+1))==NULL) || 222148771Scperciva (lseek(fd,0,SEEK_SET)!=0) || 223148771Scperciva (read(fd,old,oldsize)!=oldsize) || 224148771Scperciva (close(fd)==-1)) err(1,"%s",argv[1]); 225148771Scperciva 226148771Scperciva if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) || 227148771Scperciva ((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL); 228148771Scperciva 229148771Scperciva qsufsort(I,V,old,oldsize); 230148771Scperciva 231148771Scperciva free(V); 232148771Scperciva 233148771Scperciva /* Allocate newsize+1 bytes instead of newsize bytes to ensure 234148771Scperciva that we never try to malloc(0) and get a NULL pointer */ 235148771Scperciva if(((fd=open(argv[2],O_RDONLY,0))<0) || 236148771Scperciva ((newsize=lseek(fd,0,SEEK_END))==-1) || 237148771Scperciva ((new=malloc(newsize+1))==NULL) || 238148771Scperciva (lseek(fd,0,SEEK_SET)!=0) || 239148771Scperciva (read(fd,new,newsize)!=newsize) || 240148771Scperciva (close(fd)==-1)) err(1,"%s",argv[2]); 241148771Scperciva 242148771Scperciva if(((db=malloc(newsize+1))==NULL) || 243148771Scperciva ((eb=malloc(newsize+1))==NULL)) err(1,NULL); 244148771Scperciva dblen=0; 245148771Scperciva eblen=0; 246148771Scperciva 247148771Scperciva /* Create the patch file */ 248148771Scperciva if ((pf = fopen(argv[3], "w")) == NULL) 249148771Scperciva err(1, "%s", argv[3]); 250148771Scperciva 251148771Scperciva /* Header is 252148771Scperciva 0 8 "BSDIFF40" 253148771Scperciva 8 8 length of bzip2ed ctrl block 254148771Scperciva 16 8 length of bzip2ed diff block 255148771Scperciva 24 8 length of new file */ 256148771Scperciva /* File is 257148771Scperciva 0 32 Header 258148771Scperciva 32 ?? Bzip2ed ctrl block 259148771Scperciva ?? ?? Bzip2ed diff block 260148771Scperciva ?? ?? Bzip2ed extra block */ 261148771Scperciva memcpy(header,"BSDIFF40",8); 262148771Scperciva offtout(0, header + 8); 263148771Scperciva offtout(0, header + 16); 264148771Scperciva offtout(newsize, header + 24); 265148771Scperciva if (fwrite(header, 32, 1, pf) != 1) 266148771Scperciva err(1, "fwrite(%s)", argv[3]); 267148771Scperciva 268148771Scperciva /* Compute the differences, writing ctrl as we go */ 269148771Scperciva if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 270148771Scperciva errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 271148771Scperciva scan=0;len=0; 272148771Scperciva lastscan=0;lastpos=0;lastoffset=0; 273148771Scperciva while(scan<newsize) { 274148771Scperciva oldscore=0; 275148771Scperciva 276148771Scperciva for(scsc=scan+=len;scan<newsize;scan++) { 277148771Scperciva len=search(I,old,oldsize,new+scan,newsize-scan, 278148771Scperciva 0,oldsize,&pos); 279148771Scperciva 280148771Scperciva for(;scsc<scan+len;scsc++) 281148771Scperciva if((scsc+lastoffset<oldsize) && 282148771Scperciva (old[scsc+lastoffset] == new[scsc])) 283148771Scperciva oldscore++; 284148771Scperciva 285148771Scperciva if(((len==oldscore) && (len!=0)) || 286148771Scperciva (len>oldscore+8)) break; 287148771Scperciva 288148771Scperciva if((scan+lastoffset<oldsize) && 289148771Scperciva (old[scan+lastoffset] == new[scan])) 290148771Scperciva oldscore--; 291148771Scperciva }; 292148771Scperciva 293148771Scperciva if((len!=oldscore) || (scan==newsize)) { 294148771Scperciva s=0;Sf=0;lenf=0; 295148771Scperciva for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { 296148771Scperciva if(old[lastpos+i]==new[lastscan+i]) s++; 297148771Scperciva i++; 298148771Scperciva if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }; 299148771Scperciva }; 300148771Scperciva 301148771Scperciva lenb=0; 302148771Scperciva if(scan<newsize) { 303148771Scperciva s=0;Sb=0; 304148771Scperciva for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { 305148771Scperciva if(old[pos-i]==new[scan-i]) s++; 306148771Scperciva if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; 307148771Scperciva }; 308148771Scperciva }; 309148771Scperciva 310148771Scperciva if(lastscan+lenf>scan-lenb) { 311148771Scperciva overlap=(lastscan+lenf)-(scan-lenb); 312148771Scperciva s=0;Ss=0;lens=0; 313148771Scperciva for(i=0;i<overlap;i++) { 314148771Scperciva if(new[lastscan+lenf-overlap+i]== 315148771Scperciva old[lastpos+lenf-overlap+i]) s++; 316148771Scperciva if(new[scan-lenb+i]== 317148771Scperciva old[pos-lenb+i]) s--; 318148771Scperciva if(s>Ss) { Ss=s; lens=i+1; }; 319148771Scperciva }; 320148771Scperciva 321148771Scperciva lenf+=lens-overlap; 322148771Scperciva lenb-=lens; 323148771Scperciva }; 324148771Scperciva 325148771Scperciva for(i=0;i<lenf;i++) 326148771Scperciva db[dblen+i]=new[lastscan+i]-old[lastpos+i]; 327148771Scperciva for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) 328148771Scperciva eb[eblen+i]=new[lastscan+lenf+i]; 329148771Scperciva 330148771Scperciva dblen+=lenf; 331148771Scperciva eblen+=(scan-lenb)-(lastscan+lenf); 332148771Scperciva 333148771Scperciva offtout(lenf,buf); 334148771Scperciva BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 335148771Scperciva if (bz2err != BZ_OK) 336148771Scperciva errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 337148771Scperciva 338148771Scperciva offtout((scan-lenb)-(lastscan+lenf),buf); 339148771Scperciva BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 340148771Scperciva if (bz2err != BZ_OK) 341148771Scperciva errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 342148771Scperciva 343148771Scperciva offtout((pos-lenb)-(lastpos+lenf),buf); 344148771Scperciva BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 345148771Scperciva if (bz2err != BZ_OK) 346148771Scperciva errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 347148771Scperciva 348148771Scperciva lastscan=scan-lenb; 349148771Scperciva lastpos=pos-lenb; 350148771Scperciva lastoffset=pos-scan; 351148771Scperciva }; 352148771Scperciva }; 353148771Scperciva BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 354148771Scperciva if (bz2err != BZ_OK) 355148771Scperciva errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 356148771Scperciva 357148771Scperciva /* Compute size of compressed ctrl data */ 358148771Scperciva if ((len = ftello(pf)) == -1) 359148771Scperciva err(1, "ftello"); 360148771Scperciva offtout(len-32, header + 8); 361148771Scperciva 362148771Scperciva /* Write compressed diff data */ 363148771Scperciva if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 364148771Scperciva errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 365148771Scperciva BZ2_bzWrite(&bz2err, pfbz2, db, dblen); 366148771Scperciva if (bz2err != BZ_OK) 367148771Scperciva errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 368148771Scperciva BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 369148771Scperciva if (bz2err != BZ_OK) 370148771Scperciva errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 371148771Scperciva 372148771Scperciva /* Compute size of compressed diff data */ 373148771Scperciva if ((newsize = ftello(pf)) == -1) 374148771Scperciva err(1, "ftello"); 375148771Scperciva offtout(newsize - len, header + 16); 376148771Scperciva 377148771Scperciva /* Write compressed extra data */ 378148771Scperciva if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 379148771Scperciva errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 380148771Scperciva BZ2_bzWrite(&bz2err, pfbz2, eb, eblen); 381148771Scperciva if (bz2err != BZ_OK) 382148771Scperciva errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 383148771Scperciva BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 384148771Scperciva if (bz2err != BZ_OK) 385148771Scperciva errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 386148771Scperciva 387148771Scperciva /* Seek to the beginning, write the header, and close the file */ 388148771Scperciva if (fseeko(pf, 0, SEEK_SET)) 389148771Scperciva err(1, "fseeko"); 390148771Scperciva if (fwrite(header, 32, 1, pf) != 1) 391148771Scperciva err(1, "fwrite(%s)", argv[3]); 392148771Scperciva if (fclose(pf)) 393148771Scperciva err(1, "fclose"); 394148771Scperciva 395148771Scperciva /* Free the memory we used */ 396148771Scperciva free(db); 397148771Scperciva free(eb); 398148771Scperciva free(I); 399148771Scperciva free(old); 400148771Scperciva free(new); 401148771Scperciva 402148771Scperciva return 0; 403148771Scperciva} 404