1251881Speter/* 2251881Speter * xdelta.c: xdelta generator. 3251881Speter * 4251881Speter * ==================================================================== 5251881Speter * Licensed to the Apache Software Foundation (ASF) under one 6251881Speter * or more contributor license agreements. See the NOTICE file 7251881Speter * distributed with this work for additional information 8251881Speter * regarding copyright ownership. The ASF licenses this file 9251881Speter * to you under the Apache License, Version 2.0 (the 10251881Speter * "License"); you may not use this file except in compliance 11251881Speter * with the License. You may obtain a copy of the License at 12251881Speter * 13251881Speter * http://www.apache.org/licenses/LICENSE-2.0 14251881Speter * 15251881Speter * Unless required by applicable law or agreed to in writing, 16251881Speter * software distributed under the License is distributed on an 17251881Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18251881Speter * KIND, either express or implied. See the License for the 19251881Speter * specific language governing permissions and limitations 20251881Speter * under the License. 21251881Speter * ==================================================================== 22251881Speter */ 23251881Speter 24251881Speter 25251881Speter#include <assert.h> 26251881Speter 27251881Speter#include <apr_general.h> /* for APR_INLINE */ 28251881Speter#include <apr_hash.h> 29251881Speter 30251881Speter#include "svn_hash.h" 31251881Speter#include "svn_delta.h" 32251881Speter#include "delta.h" 33251881Speter 34251881Speter/* This is pseudo-adler32. It is adler32 without the prime modulus. 35251881Speter The idea is borrowed from monotone, and is a translation of the C++ 36251881Speter code. Graydon Hoare, the author of the original code, gave his 37251881Speter explicit permission to use it under these terms at 8:02pm on 38251881Speter Friday, February 11th, 2005. */ 39251881Speter 40251881Speter/* Size of the blocks we compute checksums for. This was chosen out of 41251881Speter thin air. Monotone used 64, xdelta1 used 64, rsync uses 128. 42251881Speter However, later optimizations assume it to be 256 or less. 43251881Speter */ 44251881Speter#define MATCH_BLOCKSIZE 64 45251881Speter 46251881Speter/* "no" / "invalid" / "unused" value for positions within the delta windows 47251881Speter */ 48251881Speter#define NO_POSITION ((apr_uint32_t)-1) 49251881Speter 50251881Speter/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time. 51251881Speter * This function may (and will) only be called for characters that are 52251881Speter * MATCH_BLOCKSIZE positions apart. 53251881Speter * 54251881Speter * Please note that the lower 16 bits cannot overflow in neither direction. 55251881Speter * Therefore, we don't need to split the value into separate values for 56251881Speter * sum(char) and sum(sum(char)). 57251881Speter */ 58251881Speterstatic APR_INLINE apr_uint32_t 59251881Speteradler32_replace(apr_uint32_t adler32, const char c_out, const char c_in) 60251881Speter{ 61251881Speter adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out)); 62251881Speter 63251881Speter adler32 -= (unsigned char)c_out; 64251881Speter adler32 += (unsigned char)c_in; 65251881Speter 66251881Speter return adler32 + adler32 * 0x10000; 67251881Speter} 68251881Speter 69251881Speter/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting 70251881Speter at DATA. Return the checksum value. */ 71251881Speter 72251881Speterstatic APR_INLINE apr_uint32_t 73251881Speterinit_adler32(const char *data) 74251881Speter{ 75251881Speter const unsigned char *input = (const unsigned char *)data; 76251881Speter const unsigned char *last = input + MATCH_BLOCKSIZE; 77251881Speter 78251881Speter apr_uint32_t s1 = 0; 79251881Speter apr_uint32_t s2 = 0; 80251881Speter 81251881Speter for (; input < last; input += 8) 82251881Speter { 83251881Speter s1 += input[0]; s2 += s1; 84251881Speter s1 += input[1]; s2 += s1; 85251881Speter s1 += input[2]; s2 += s1; 86251881Speter s1 += input[3]; s2 += s1; 87251881Speter s1 += input[4]; s2 += s1; 88251881Speter s1 += input[5]; s2 += s1; 89251881Speter s1 += input[6]; s2 += s1; 90251881Speter s1 += input[7]; s2 += s1; 91251881Speter } 92251881Speter 93251881Speter return s2 * 0x10000 + s1; 94251881Speter} 95251881Speter 96251881Speter/* Information for a block of the delta source. The length of the 97251881Speter block is the smaller of MATCH_BLOCKSIZE and the difference between 98251881Speter the size of the source data and the position of this block. */ 99251881Speterstruct block 100251881Speter{ 101251881Speter apr_uint32_t adlersum; 102251881Speter 103251881Speter/* Even in 64 bit systems, store only 32 bit offsets in our hash table 104251881Speter (our delta window size much much smaller then 4GB). 105251881Speter That reduces the hash table size by 50% from 32to 16KB 106251881Speter and makes it easier to fit into the CPU's L1 cache. */ 107251881Speter apr_uint32_t pos; /* NO_POSITION -> block is not used */ 108251881Speter}; 109251881Speter 110251881Speter/* A hash table, using open addressing, of the blocks of the source. */ 111251881Speterstruct blocks 112251881Speter{ 113251881Speter /* The largest valid index of slots. 114251881Speter This value has an upper bound proportionate to the text delta 115251881Speter window size, so unless we dramatically increase the window size, 116251881Speter it's safe to make this a 32-bit value. In any case, it has to be 117251881Speter hte same width as the block position index, (struct 118251881Speter block).pos. */ 119251881Speter apr_uint32_t max; 120251881Speter /* Source buffer that the positions in SLOTS refer to. */ 121251881Speter const char* data; 122251881Speter /* The vector of blocks. A pos value of NO_POSITION represents an unused 123251881Speter slot. */ 124251881Speter struct block *slots; 125251881Speter}; 126251881Speter 127251881Speter 128251881Speter/* Return a hash value calculated from the adler32 SUM, suitable for use with 129251881Speter our hash table. */ 130251881Speterstatic apr_uint32_t hash_func(apr_uint32_t sum) 131251881Speter{ 132251881Speter /* Since the adl32 checksum have a bad distribution for the 11th to 16th 133251881Speter bits when used for our small block size, we add some bits from the 134251881Speter other half of the checksum. */ 135251881Speter return sum ^ (sum >> 12); 136251881Speter} 137251881Speter 138251881Speter/* Insert a block with the checksum ADLERSUM at position POS in the source 139251881Speter data into the table BLOCKS. Ignore true duplicates, i.e. blocks with 140251881Speter actually the same content. */ 141251881Speterstatic void 142251881Speteradd_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos) 143251881Speter{ 144251881Speter apr_uint32_t h = hash_func(adlersum) & blocks->max; 145251881Speter 146251881Speter /* This will terminate, since we know that we will not fill the table. */ 147251881Speter for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) 148251881Speter if (blocks->slots[h].adlersum == adlersum) 149251881Speter if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos, 150251881Speter MATCH_BLOCKSIZE) == 0) 151251881Speter return; 152251881Speter 153251881Speter blocks->slots[h].adlersum = adlersum; 154251881Speter blocks->slots[h].pos = pos; 155251881Speter} 156251881Speter 157251881Speter/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content 158251881Speter at DATA, returning its position in the source data. If there is no such 159251881Speter block, return NO_POSITION. */ 160251881Speterstatic apr_uint32_t 161251881Speterfind_block(const struct blocks *blocks, 162251881Speter apr_uint32_t adlersum, 163251881Speter const char* data) 164251881Speter{ 165251881Speter apr_uint32_t h = hash_func(adlersum) & blocks->max; 166251881Speter 167251881Speter for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max) 168251881Speter if (blocks->slots[h].adlersum == adlersum) 169251881Speter if (memcmp(blocks->data + blocks->slots[h].pos, data, 170251881Speter MATCH_BLOCKSIZE) == 0) 171251881Speter return blocks->slots[h].pos; 172251881Speter 173251881Speter return NO_POSITION; 174251881Speter} 175251881Speter 176251881Speter/* Initialize the matches table from DATA of size DATALEN. This goes 177251881Speter through every block of MATCH_BLOCKSIZE bytes in the source and 178251881Speter checksums it, inserting the result into the BLOCKS table. */ 179251881Speterstatic void 180251881Speterinit_blocks_table(const char *data, 181251881Speter apr_size_t datalen, 182251881Speter struct blocks *blocks, 183251881Speter apr_pool_t *pool) 184251881Speter{ 185251881Speter apr_size_t nblocks; 186251881Speter apr_size_t wnslots = 1; 187251881Speter apr_uint32_t nslots; 188251881Speter apr_uint32_t i; 189251881Speter 190251881Speter /* Be pessimistic about the block count. */ 191251881Speter nblocks = datalen / MATCH_BLOCKSIZE + 1; 192251881Speter /* Find nearest larger power of two. */ 193251881Speter while (wnslots <= nblocks) 194251881Speter wnslots *= 2; 195251881Speter /* Double the number of slots to avoid a too high load. */ 196251881Speter wnslots *= 2; 197251881Speter /* Narrow the number of slots to 32 bits, which is the size of the 198251881Speter block position index in the hash table. 199251881Speter Sanity check: On 64-bit platforms, apr_size_t is likely to be 200251881Speter larger than apr_uint32_t. Make sure that the number of slots 201251881Speter actually fits into blocks->max. It's safe to use a hard assert 202251881Speter here, because the largest possible value for nslots is 203251881Speter proportional to the text delta window size and is therefore much 204251881Speter smaller than the range of an apr_uint32_t. If we ever happen to 205251881Speter increase the window size too much, this assertion will get 206251881Speter triggered by the test suite. */ 207251881Speter nslots = (apr_uint32_t) wnslots; 208251881Speter SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots); 209251881Speter blocks->max = nslots - 1; 210251881Speter blocks->data = data; 211251881Speter blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots))); 212251881Speter for (i = 0; i < nslots; ++i) 213251881Speter { 214251881Speter /* Avoid using an indeterminate value in the lookup. */ 215251881Speter blocks->slots[i].adlersum = 0; 216251881Speter blocks->slots[i].pos = NO_POSITION; 217251881Speter } 218251881Speter 219251881Speter /* If there is an odd block at the end of the buffer, we will 220251881Speter not use that shorter block for deltification (only indirectly 221251881Speter as an extension of some previous block). */ 222251881Speter for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE) 223251881Speter add_block(blocks, init_adler32(data + i), i); 224251881Speter} 225251881Speter 226251881Speter/* Return the lowest position at which A and B differ. If no difference 227251881Speter * can be found in the first MAX_LEN characters, MAX_LEN will be returned. 228251881Speter */ 229251881Speterstatic apr_size_t 230251881Spetermatch_length(const char *a, const char *b, apr_size_t max_len) 231251881Speter{ 232251881Speter apr_size_t pos = 0; 233251881Speter 234251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 235251881Speter 236251881Speter /* Chunky processing is so much faster ... 237251881Speter * 238251881Speter * We can't make this work on architectures that require aligned access 239251881Speter * because A and B will probably have different alignment. So, skipping 240251881Speter * the first few chars until alignment is reached is not an option. 241251881Speter */ 242251881Speter for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t)) 243251881Speter if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos)) 244251881Speter break; 245251881Speter 246251881Speter#endif 247251881Speter 248251881Speter for (; pos < max_len; ++pos) 249251881Speter if (a[pos] != b[pos]) 250251881Speter break; 251251881Speter 252251881Speter return pos; 253251881Speter} 254251881Speter 255251881Speter/* Return the number of bytes before A and B that don't differ. If no 256251881Speter * difference can be found in the first MAX_LEN characters, MAX_LEN will 257251881Speter * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be 258251881Speter * valid addresses. 259251881Speter */ 260251881Speterstatic apr_size_t 261251881Speterreverse_match_length(const char *a, const char *b, apr_size_t max_len) 262251881Speter{ 263251881Speter apr_size_t pos = 0; 264251881Speter 265251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK 266251881Speter 267251881Speter /* Chunky processing is so much faster ... 268251881Speter * 269251881Speter * We can't make this work on architectures that require aligned access 270251881Speter * because A and B will probably have different alignment. So, skipping 271251881Speter * the first few chars until alignment is reached is not an option. 272251881Speter */ 273251881Speter for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t)) 274251881Speter if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos)) 275251881Speter break; 276251881Speter 277251881Speter pos -= sizeof(apr_size_t); 278251881Speter 279251881Speter#endif 280251881Speter 281251881Speter /* If we find a mismatch at -pos, pos-1 characters matched. 282251881Speter */ 283251881Speter while (++pos <= max_len) 284251881Speter if (a[0-pos] != b[0-pos]) 285251881Speter return pos - 1; 286251881Speter 287251881Speter /* No mismatch found -> at least MAX_LEN matching chars. 288251881Speter */ 289251881Speter return max_len; 290251881Speter} 291251881Speter 292251881Speter 293251881Speter/* Try to find a match for the target data B in BLOCKS, and then 294251881Speter extend the match as long as data in A and B at the match position 295251881Speter continues to match. We set the position in A we ended up in (in 296251881Speter case we extended it backwards) in APOSP and update the corresponding 297251881Speter position within B given in BPOSP. PENDING_INSERT_START sets the 298251881Speter lower limit to BPOSP. 299251881Speter Return number of matching bytes starting at ASOP. Return 0 if 300251881Speter no match has been found. 301251881Speter */ 302251881Speterstatic apr_size_t 303251881Speterfind_match(const struct blocks *blocks, 304251881Speter const apr_uint32_t rolling, 305251881Speter const char *a, 306251881Speter apr_size_t asize, 307251881Speter const char *b, 308251881Speter apr_size_t bsize, 309251881Speter apr_size_t *bposp, 310251881Speter apr_size_t *aposp, 311251881Speter apr_size_t pending_insert_start) 312251881Speter{ 313251881Speter apr_size_t apos, bpos = *bposp; 314251881Speter apr_size_t delta, max_delta; 315251881Speter 316251881Speter apos = find_block(blocks, rolling, b + bpos); 317251881Speter 318251881Speter /* See if we have a match. */ 319251881Speter if (apos == NO_POSITION) 320251881Speter return 0; 321251881Speter 322251881Speter /* Extend the match forward as far as possible */ 323251881Speter max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE 324251881Speter ? asize - apos - MATCH_BLOCKSIZE 325251881Speter : bsize - bpos - MATCH_BLOCKSIZE; 326251881Speter delta = match_length(a + apos + MATCH_BLOCKSIZE, 327251881Speter b + bpos + MATCH_BLOCKSIZE, 328251881Speter max_delta); 329251881Speter 330251881Speter /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's 331251881Speter content has been sampled only every MATCH_BLOCKSIZE positions). */ 332251881Speter while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1]) 333251881Speter { 334251881Speter --apos; 335251881Speter --bpos; 336251881Speter ++delta; 337251881Speter } 338251881Speter 339251881Speter *aposp = apos; 340251881Speter *bposp = bpos; 341251881Speter 342251881Speter return MATCH_BLOCKSIZE + delta; 343251881Speter} 344251881Speter 345251881Speter/* Utility for compute_delta() that compares the range B[START,BSIZE) with 346251881Speter * the range of similar size before A[ASIZE]. Create corresponding copy and 347251881Speter * insert operations. 348251881Speter * 349251881Speter * BUILD_BATON and POOL will be passed through from compute_delta(). 350251881Speter */ 351251881Speterstatic void 352251881Speterstore_delta_trailer(svn_txdelta__ops_baton_t *build_baton, 353251881Speter const char *a, 354251881Speter apr_size_t asize, 355251881Speter const char *b, 356251881Speter apr_size_t bsize, 357251881Speter apr_size_t start, 358251881Speter apr_pool_t *pool) 359251881Speter{ 360251881Speter apr_size_t end_match; 361251881Speter apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize; 362251881Speter if (max_len == 0) 363251881Speter return; 364251881Speter 365251881Speter end_match = reverse_match_length(a + asize, b + bsize, max_len); 366251881Speter if (end_match <= 4) 367251881Speter end_match = 0; 368251881Speter 369251881Speter if (bsize - start > end_match) 370251881Speter svn_txdelta__insert_op(build_baton, svn_txdelta_new, 371251881Speter start, bsize - start - end_match, b + start, pool); 372251881Speter if (end_match) 373251881Speter svn_txdelta__insert_op(build_baton, svn_txdelta_source, 374251881Speter asize - end_match, end_match, NULL, pool); 375251881Speter} 376251881Speter 377251881Speter 378251881Speter/* Compute a delta from A to B using xdelta. 379251881Speter 380251881Speter The basic xdelta algorithm is as follows: 381251881Speter 382251881Speter 1. Go through the source data, checksumming every MATCH_BLOCKSIZE 383251881Speter block of bytes using adler32, and inserting the checksum into a 384251881Speter match table with the position of the match. 385251881Speter 2. Go through the target byte by byte, seeing if that byte starts a 386251881Speter match that we have in the match table. 387251881Speter 2a. If so, try to extend the match as far as possible both 388251881Speter forwards and backwards, and then insert a source copy 389251881Speter operation into the delta ops builder for the match. 390251881Speter 2b. If not, insert the byte as new data using an insert delta op. 391251881Speter 392251881Speter Our implementation doesn't immediately insert "insert" operations, 393251881Speter it waits until we have another copy, or we are done. The reasoning 394251881Speter is twofold: 395251881Speter 396251881Speter 1. Otherwise, we would just be building a ton of 1 byte insert 397251881Speter operations 398251881Speter 2. So that we can extend a source match backwards into a pending 399251881Speter insert operation, and possibly remove the need for the insert 400251881Speter entirely. This can happen due to stream alignment. 401251881Speter*/ 402251881Speterstatic void 403251881Spetercompute_delta(svn_txdelta__ops_baton_t *build_baton, 404251881Speter const char *a, 405251881Speter apr_size_t asize, 406251881Speter const char *b, 407251881Speter apr_size_t bsize, 408251881Speter apr_pool_t *pool) 409251881Speter{ 410251881Speter struct blocks blocks; 411251881Speter apr_uint32_t rolling; 412251881Speter apr_size_t lo = 0, pending_insert_start = 0; 413251881Speter 414251881Speter /* Optimization: directly compare window starts. If more than 4 415251881Speter * bytes match, we can immediately create a matching windows. 416251881Speter * Shorter sequences result in a net data increase. */ 417251881Speter lo = match_length(a, b, asize > bsize ? bsize : asize); 418251881Speter if ((lo > 4) || (lo == bsize)) 419251881Speter { 420251881Speter svn_txdelta__insert_op(build_baton, svn_txdelta_source, 421251881Speter 0, lo, NULL, pool); 422251881Speter pending_insert_start = lo; 423251881Speter } 424251881Speter else 425251881Speter lo = 0; 426251881Speter 427251881Speter /* If the size of the target is smaller than the match blocksize, just 428251881Speter insert the entire target. */ 429251881Speter if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE)) 430251881Speter { 431251881Speter store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool); 432251881Speter return; 433251881Speter } 434251881Speter 435251881Speter /* Initialize the matches table. */ 436251881Speter init_blocks_table(a, asize, &blocks, pool); 437251881Speter 438251881Speter /* Initialize our rolling checksum. */ 439251881Speter rolling = init_adler32(b + lo); 440251881Speter while (lo < bsize) 441251881Speter { 442251881Speter apr_size_t matchlen = 0; 443251881Speter apr_size_t apos; 444251881Speter 445251881Speter if (lo + MATCH_BLOCKSIZE <= bsize) 446251881Speter matchlen = find_match(&blocks, rolling, a, asize, b, bsize, 447251881Speter &lo, &apos, pending_insert_start); 448251881Speter 449251881Speter /* If we didn't find a real match, insert the byte at the target 450251881Speter position into the pending insert. */ 451251881Speter if (matchlen == 0) 452251881Speter { 453251881Speter /* move block one position forward. Short blocks at the end of 454251881Speter the buffer cannot be used as the beginning of a new match */ 455251881Speter if (lo + MATCH_BLOCKSIZE < bsize) 456251881Speter rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]); 457251881Speter 458251881Speter lo++; 459251881Speter } 460251881Speter else 461251881Speter { 462251881Speter /* store the sequence of B that is between the matches */ 463251881Speter if (lo - pending_insert_start > 0) 464251881Speter svn_txdelta__insert_op(build_baton, svn_txdelta_new, 465251881Speter 0, lo - pending_insert_start, 466251881Speter b + pending_insert_start, pool); 467251881Speter else 468251881Speter { 469251881Speter /* the match borders on the previous op. Maybe, we found a 470251881Speter * match that is better than / overlapping the previous one. */ 471251881Speter apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo); 472251881Speter if (len > 0) 473251881Speter { 474251881Speter len = svn_txdelta__remove_copy(build_baton, len); 475251881Speter apos -= len; 476251881Speter matchlen += len; 477251881Speter lo -= len; 478251881Speter } 479251881Speter } 480251881Speter 481251881Speter /* Reset the pending insert start to immediately after the 482251881Speter match. */ 483251881Speter lo += matchlen; 484251881Speter pending_insert_start = lo; 485251881Speter svn_txdelta__insert_op(build_baton, svn_txdelta_source, 486251881Speter apos, matchlen, NULL, pool); 487251881Speter 488251881Speter /* Calculate the Adler32 sum for the first block behind the match. 489251881Speter * Ignore short buffers at the end of B. 490251881Speter */ 491251881Speter if (lo + MATCH_BLOCKSIZE <= bsize) 492251881Speter rolling = init_adler32(b + lo); 493251881Speter } 494251881Speter } 495251881Speter 496251881Speter /* If we still have an insert pending at the end, throw it in. */ 497251881Speter store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool); 498251881Speter} 499251881Speter 500251881Spetervoid 501251881Spetersvn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton, 502251881Speter const char *data, 503251881Speter apr_size_t source_len, 504251881Speter apr_size_t target_len, 505251881Speter apr_pool_t *pool) 506251881Speter{ 507251881Speter /* We should never be asked to compute something when the source_len is 0; 508251881Speter we just use a single insert op there (and rely on zlib for 509251881Speter compression). */ 510251881Speter assert(source_len != 0); 511251881Speter compute_delta(build_baton, data, source_len, 512251881Speter data + source_len, target_len, 513251881Speter pool); 514251881Speter} 515