subversion/libsvn_delta/xdelta.c

251881Speter/*
251881Speter * xdelta.c:  xdelta generator.
251881Speter *
251881Speter * ====================================================================
251881Speter *    Licensed to the Apache Software Foundation (ASF) under one
251881Speter *    or more contributor license agreements.  See the NOTICE file
251881Speter *    distributed with this work for additional information
251881Speter *    regarding copyright ownership.  The ASF licenses this file
251881Speter *    to you under the Apache License, Version 2.0 (the
251881Speter *    "License"); you may not use this file except in compliance
251881Speter *    with the License.  You may obtain a copy of the License at
251881Speter *
251881Speter *      http://www.apache.org/licenses/LICENSE-2.0
251881Speter *
251881Speter *    Unless required by applicable law or agreed to in writing,
251881Speter *    software distributed under the License is distributed on an
251881Speter *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
251881Speter *    KIND, either express or implied.  See the License for the
251881Speter *    specific language governing permissions and limitations
251881Speter *    under the License.
251881Speter * ====================================================================
251881Speter */
251881Speter
251881Speter
251881Speter#include <assert.h>
251881Speter
251881Speter#include <apr_general.h>        /* for APR_INLINE */
251881Speter#include <apr_hash.h>
251881Speter
251881Speter#include "svn_hash.h"
251881Speter#include "svn_delta.h"
251881Speter#include "delta.h"
251881Speter
251881Speter/* This is pseudo-adler32. It is adler32 without the prime modulus.
251881Speter   The idea is borrowed from monotone, and is a translation of the C++
251881Speter   code.  Graydon Hoare, the author of the original code, gave his
251881Speter   explicit permission to use it under these terms at 8:02pm on
251881Speter   Friday, February 11th, 2005.  */
251881Speter
251881Speter/* Size of the blocks we compute checksums for. This was chosen out of
251881Speter   thin air.  Monotone used 64, xdelta1 used 64, rsync uses 128.
251881Speter   However, later optimizations assume it to be 256 or less.
251881Speter */
251881Speter#define MATCH_BLOCKSIZE 64
251881Speter
251881Speter/* "no" / "invalid" / "unused" value for positions within the delta windows
251881Speter */
251881Speter#define NO_POSITION ((apr_uint32_t)-1)
251881Speter
251881Speter/* Feed C_IN into the adler32 checksum and remove C_OUT at the same time.
251881Speter * This function may (and will) only be called for characters that are
251881Speter * MATCH_BLOCKSIZE positions apart.
251881Speter *
251881Speter * Please note that the lower 16 bits cannot overflow in neither direction.
251881Speter * Therefore, we don't need to split the value into separate values for
251881Speter * sum(char) and sum(sum(char)).
251881Speter */
251881Speterstatic APR_INLINE apr_uint32_t
251881Speteradler32_replace(apr_uint32_t adler32, const char c_out, const char c_in)
251881Speter{
251881Speter  adler32 -= (MATCH_BLOCKSIZE * 0x10000u * ((unsigned char) c_out));
251881Speter
251881Speter  adler32 -= (unsigned char)c_out;
251881Speter  adler32 += (unsigned char)c_in;
251881Speter
251881Speter  return adler32 + adler32 * 0x10000;
251881Speter}
251881Speter
251881Speter/* Calculate an pseudo-adler32 checksum for MATCH_BLOCKSIZE bytes starting
251881Speter   at DATA.  Return the checksum value.  */
251881Speter
251881Speterstatic APR_INLINE apr_uint32_t
251881Speterinit_adler32(const char *data)
251881Speter{
251881Speter  const unsigned char *input = (const unsigned char *)data;
251881Speter  const unsigned char *last = input + MATCH_BLOCKSIZE;
251881Speter
251881Speter  apr_uint32_t s1 = 0;
251881Speter  apr_uint32_t s2 = 0;
251881Speter
251881Speter  for (; input < last; input += 8)
251881Speter    {
251881Speter      s1 += input[0]; s2 += s1;
251881Speter      s1 += input[1]; s2 += s1;
251881Speter      s1 += input[2]; s2 += s1;
251881Speter      s1 += input[3]; s2 += s1;
251881Speter      s1 += input[4]; s2 += s1;
251881Speter      s1 += input[5]; s2 += s1;
251881Speter      s1 += input[6]; s2 += s1;
251881Speter      s1 += input[7]; s2 += s1;
251881Speter    }
251881Speter
251881Speter  return s2 * 0x10000 + s1;
251881Speter}
251881Speter
251881Speter/* Information for a block of the delta source.  The length of the
251881Speter   block is the smaller of MATCH_BLOCKSIZE and the difference between
251881Speter   the size of the source data and the position of this block. */
251881Speterstruct block
251881Speter{
251881Speter  apr_uint32_t adlersum;
251881Speter
251881Speter/* Even in 64 bit systems, store only 32 bit offsets in our hash table
251881Speter   (our delta window size much much smaller then 4GB).
251881Speter   That reduces the hash table size by 50% from 32to 16KB
251881Speter   and makes it easier to fit into the CPU's L1 cache. */
251881Speter  apr_uint32_t pos;			/* NO_POSITION -> block is not used */
251881Speter};
251881Speter
251881Speter/* A hash table, using open addressing, of the blocks of the source. */
251881Speterstruct blocks
251881Speter{
251881Speter  /* The largest valid index of slots.
251881Speter     This value has an upper bound proportionate to the text delta
251881Speter     window size, so unless we dramatically increase the window size,
251881Speter     it's safe to make this a 32-bit value.  In any case, it has to be
251881Speter     hte same width as the block position index, (struct
251881Speter     block).pos. */
251881Speter  apr_uint32_t max;
251881Speter  /* Source buffer that the positions in SLOTS refer to. */
251881Speter  const char* data;
251881Speter  /* The vector of blocks.  A pos value of NO_POSITION represents an unused
251881Speter     slot. */
251881Speter  struct block *slots;
251881Speter};
251881Speter
251881Speter
251881Speter/* Return a hash value calculated from the adler32 SUM, suitable for use with
251881Speter   our hash table. */
251881Speterstatic apr_uint32_t hash_func(apr_uint32_t sum)
251881Speter{
251881Speter  /* Since the adl32 checksum have a bad distribution for the 11th to 16th
251881Speter     bits when used for our small block size, we add some bits from the
251881Speter     other half of the checksum. */
251881Speter  return sum ^ (sum >> 12);
251881Speter}
251881Speter
251881Speter/* Insert a block with the checksum ADLERSUM at position POS in the source
251881Speter   data into the table BLOCKS.  Ignore true duplicates, i.e. blocks with
251881Speter   actually the same content. */
251881Speterstatic void
251881Speteradd_block(struct blocks *blocks, apr_uint32_t adlersum, apr_uint32_t pos)
251881Speter{
251881Speter  apr_uint32_t h = hash_func(adlersum) & blocks->max;
251881Speter
251881Speter  /* This will terminate, since we know that we will not fill the table. */
251881Speter  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
251881Speter    if (blocks->slots[h].adlersum == adlersum)
251881Speter      if (memcmp(blocks->data + blocks->slots[h].pos, blocks->data + pos,
251881Speter                 MATCH_BLOCKSIZE) == 0)
251881Speter        return;
251881Speter
251881Speter  blocks->slots[h].adlersum = adlersum;
251881Speter  blocks->slots[h].pos = pos;
251881Speter}
251881Speter
251881Speter/* Find a block in BLOCKS with the checksum ADLERSUM and matching the content
251881Speter   at DATA, returning its position in the source data.  If there is no such
251881Speter   block, return NO_POSITION. */
251881Speterstatic apr_uint32_t
251881Speterfind_block(const struct blocks *blocks,
251881Speter           apr_uint32_t adlersum,
251881Speter           const char* data)
251881Speter{
251881Speter  apr_uint32_t h = hash_func(adlersum) & blocks->max;
251881Speter
251881Speter  for (; blocks->slots[h].pos != NO_POSITION; h = (h + 1) & blocks->max)
251881Speter    if (blocks->slots[h].adlersum == adlersum)
251881Speter      if (memcmp(blocks->data + blocks->slots[h].pos, data,
251881Speter                 MATCH_BLOCKSIZE) == 0)
251881Speter        return blocks->slots[h].pos;
251881Speter
251881Speter  return NO_POSITION;
251881Speter}
251881Speter
251881Speter/* Initialize the matches table from DATA of size DATALEN.  This goes
251881Speter   through every block of MATCH_BLOCKSIZE bytes in the source and
251881Speter   checksums it, inserting the result into the BLOCKS table.  */
251881Speterstatic void
251881Speterinit_blocks_table(const char *data,
251881Speter                  apr_size_t datalen,
251881Speter                  struct blocks *blocks,
251881Speter                  apr_pool_t *pool)
251881Speter{
251881Speter  apr_size_t nblocks;
251881Speter  apr_size_t wnslots = 1;
251881Speter  apr_uint32_t nslots;
251881Speter  apr_uint32_t i;
251881Speter
251881Speter  /* Be pessimistic about the block count. */
251881Speter  nblocks = datalen / MATCH_BLOCKSIZE + 1;
251881Speter  /* Find nearest larger power of two. */
251881Speter  while (wnslots <= nblocks)
251881Speter    wnslots *= 2;
251881Speter  /* Double the number of slots to avoid a too high load. */
251881Speter  wnslots *= 2;
251881Speter  /* Narrow the number of slots to 32 bits, which is the size of the
251881Speter     block position index in the hash table.
251881Speter     Sanity check: On 64-bit platforms, apr_size_t is likely to be
251881Speter     larger than apr_uint32_t. Make sure that the number of slots
251881Speter     actually fits into blocks->max.  It's safe to use a hard assert
251881Speter     here, because the largest possible value for nslots is
251881Speter     proportional to the text delta window size and is therefore much
251881Speter     smaller than the range of an apr_uint32_t.  If we ever happen to
251881Speter     increase the window size too much, this assertion will get
251881Speter     triggered by the test suite. */
251881Speter  nslots = (apr_uint32_t) wnslots;
251881Speter  SVN_ERR_ASSERT_NO_RETURN(wnslots == nslots);
251881Speter  blocks->max = nslots - 1;
251881Speter  blocks->data = data;
251881Speter  blocks->slots = apr_palloc(pool, nslots * sizeof(*(blocks->slots)));
251881Speter  for (i = 0; i < nslots; ++i)
251881Speter    {
251881Speter      /* Avoid using an indeterminate value in the lookup. */
251881Speter      blocks->slots[i].adlersum = 0;
251881Speter      blocks->slots[i].pos = NO_POSITION;
251881Speter    }
251881Speter
251881Speter  /* If there is an odd block at the end of the buffer, we will
251881Speter     not use that shorter block for deltification (only indirectly
251881Speter     as an extension of some previous block). */
251881Speter  for (i = 0; i + MATCH_BLOCKSIZE <= datalen; i += MATCH_BLOCKSIZE)
251881Speter    add_block(blocks, init_adler32(data + i), i);
251881Speter}
251881Speter
251881Speter/* Return the lowest position at which A and B differ. If no difference
251881Speter * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
251881Speter */
251881Speterstatic apr_size_t
251881Spetermatch_length(const char *a, const char *b, apr_size_t max_len)
251881Speter{
251881Speter  apr_size_t pos = 0;
251881Speter
251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
251881Speter
251881Speter  /* Chunky processing is so much faster ...
251881Speter   *
251881Speter   * We can't make this work on architectures that require aligned access
251881Speter   * because A and B will probably have different alignment. So, skipping
251881Speter   * the first few chars until alignment is reached is not an option.
251881Speter   */
251881Speter  for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
251881Speter    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
251881Speter      break;
251881Speter
251881Speter#endif
251881Speter
251881Speter  for (; pos < max_len; ++pos)
251881Speter    if (a[pos] != b[pos])
251881Speter      break;
251881Speter
251881Speter  return pos;
251881Speter}
251881Speter
251881Speter/* Return the number of bytes before A and B that don't differ.  If no
251881Speter * difference can be found in the first MAX_LEN characters,  MAX_LEN will
251881Speter * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
251881Speter * valid addresses.
251881Speter */
251881Speterstatic apr_size_t
251881Speterreverse_match_length(const char *a, const char *b, apr_size_t max_len)
251881Speter{
251881Speter  apr_size_t pos = 0;
251881Speter
251881Speter#if SVN_UNALIGNED_ACCESS_IS_OK
251881Speter
251881Speter  /* Chunky processing is so much faster ...
251881Speter   *
251881Speter   * We can't make this work on architectures that require aligned access
251881Speter   * because A and B will probably have different alignment. So, skipping
251881Speter   * the first few chars until alignment is reached is not an option.
251881Speter   */
251881Speter  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
251881Speter    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
251881Speter      break;
251881Speter
251881Speter  pos -= sizeof(apr_size_t);
251881Speter
251881Speter#endif
251881Speter
251881Speter  /* If we find a mismatch at -pos, pos-1 characters matched.
251881Speter   */
251881Speter  while (++pos <= max_len)
251881Speter    if (a[0-pos] != b[0-pos])
251881Speter      return pos - 1;
251881Speter
251881Speter  /* No mismatch found -> at least MAX_LEN matching chars.
251881Speter   */
251881Speter  return max_len;
251881Speter}
251881Speter
251881Speter
251881Speter/* Try to find a match for the target data B in BLOCKS, and then
251881Speter   extend the match as long as data in A and B at the match position
251881Speter   continues to match.  We set the position in A we ended up in (in
251881Speter   case we extended it backwards) in APOSP and update the corresponding
251881Speter   position within B given in BPOSP. PENDING_INSERT_START sets the
251881Speter   lower limit to BPOSP.
251881Speter   Return number of matching bytes starting at ASOP.  Return 0 if
251881Speter   no match has been found.
251881Speter */
251881Speterstatic apr_size_t
251881Speterfind_match(const struct blocks *blocks,
251881Speter           const apr_uint32_t rolling,
251881Speter           const char *a,
251881Speter           apr_size_t asize,
251881Speter           const char *b,
251881Speter           apr_size_t bsize,
251881Speter           apr_size_t *bposp,
251881Speter           apr_size_t *aposp,
251881Speter           apr_size_t pending_insert_start)
251881Speter{
251881Speter  apr_size_t apos, bpos = *bposp;
251881Speter  apr_size_t delta, max_delta;
251881Speter
251881Speter  apos = find_block(blocks, rolling, b + bpos);
251881Speter
251881Speter  /* See if we have a match.  */
251881Speter  if (apos == NO_POSITION)
251881Speter    return 0;
251881Speter
251881Speter  /* Extend the match forward as far as possible */
251881Speter  max_delta = asize - apos - MATCH_BLOCKSIZE < bsize - bpos - MATCH_BLOCKSIZE
251881Speter            ? asize - apos - MATCH_BLOCKSIZE
251881Speter            : bsize - bpos - MATCH_BLOCKSIZE;
251881Speter  delta = match_length(a + apos + MATCH_BLOCKSIZE,
251881Speter                       b + bpos + MATCH_BLOCKSIZE,
251881Speter                       max_delta);
251881Speter
251881Speter  /* See if we can extend backwards (max MATCH_BLOCKSIZE-1 steps because A's
251881Speter     content has been sampled only every MATCH_BLOCKSIZE positions).  */
251881Speter  while (apos > 0 && bpos > pending_insert_start && a[apos-1] == b[bpos-1])
251881Speter    {
251881Speter      --apos;
251881Speter      --bpos;
251881Speter      ++delta;
251881Speter    }
251881Speter
251881Speter  *aposp = apos;
251881Speter  *bposp = bpos;
251881Speter
251881Speter  return MATCH_BLOCKSIZE + delta;
251881Speter}
251881Speter
251881Speter/* Utility for compute_delta() that compares the range B[START,BSIZE) with
251881Speter * the range of similar size before A[ASIZE]. Create corresponding copy and
251881Speter * insert operations.
251881Speter *
251881Speter * BUILD_BATON and POOL will be passed through from compute_delta().
251881Speter */
251881Speterstatic void
251881Speterstore_delta_trailer(svn_txdelta__ops_baton_t *build_baton,
251881Speter                    const char *a,
251881Speter                    apr_size_t asize,
251881Speter                    const char *b,
251881Speter                    apr_size_t bsize,
251881Speter                    apr_size_t start,
251881Speter                    apr_pool_t *pool)
251881Speter{
251881Speter  apr_size_t end_match;
251881Speter  apr_size_t max_len = asize > (bsize - start) ? bsize - start : asize;
251881Speter  if (max_len == 0)
251881Speter    return;
251881Speter
251881Speter  end_match = reverse_match_length(a + asize, b + bsize, max_len);
251881Speter  if (end_match <= 4)
251881Speter    end_match = 0;
251881Speter
251881Speter  if (bsize - start > end_match)
251881Speter    svn_txdelta__insert_op(build_baton, svn_txdelta_new,
251881Speter                           start, bsize - start - end_match, b + start, pool);
251881Speter  if (end_match)
251881Speter    svn_txdelta__insert_op(build_baton, svn_txdelta_source,
251881Speter                           asize - end_match, end_match, NULL, pool);
251881Speter}
251881Speter
251881Speter
251881Speter/* Compute a delta from A to B using xdelta.
251881Speter
251881Speter   The basic xdelta algorithm is as follows:
251881Speter
251881Speter   1. Go through the source data, checksumming every MATCH_BLOCKSIZE
251881Speter      block of bytes using adler32, and inserting the checksum into a
251881Speter      match table with the position of the match.
251881Speter   2. Go through the target byte by byte, seeing if that byte starts a
251881Speter      match that we have in the match table.
251881Speter      2a. If so, try to extend the match as far as possible both
251881Speter          forwards and backwards, and then insert a source copy
251881Speter          operation into the delta ops builder for the match.
251881Speter      2b. If not, insert the byte as new data using an insert delta op.
251881Speter
251881Speter   Our implementation doesn't immediately insert "insert" operations,
251881Speter   it waits until we have another copy, or we are done.  The reasoning
251881Speter   is twofold:
251881Speter
251881Speter   1. Otherwise, we would just be building a ton of 1 byte insert
251881Speter      operations
251881Speter   2. So that we can extend a source match backwards into a pending
251881Speter     insert operation, and possibly remove the need for the insert
251881Speter     entirely.  This can happen due to stream alignment.
251881Speter*/
251881Speterstatic void
251881Spetercompute_delta(svn_txdelta__ops_baton_t *build_baton,
251881Speter              const char *a,
251881Speter              apr_size_t asize,
251881Speter              const char *b,
251881Speter              apr_size_t bsize,
251881Speter              apr_pool_t *pool)
251881Speter{
251881Speter  struct blocks blocks;
251881Speter  apr_uint32_t rolling;
251881Speter  apr_size_t lo = 0, pending_insert_start = 0;
251881Speter
251881Speter  /* Optimization: directly compare window starts. If more than 4
251881Speter   * bytes match, we can immediately create a matching windows.
251881Speter   * Shorter sequences result in a net data increase. */
251881Speter  lo = match_length(a, b, asize > bsize ? bsize : asize);
251881Speter  if ((lo > 4) || (lo == bsize))
251881Speter    {
251881Speter      svn_txdelta__insert_op(build_baton, svn_txdelta_source,
251881Speter                             0, lo, NULL, pool);
251881Speter      pending_insert_start = lo;
251881Speter    }
251881Speter  else
251881Speter    lo = 0;
251881Speter
251881Speter  /* If the size of the target is smaller than the match blocksize, just
251881Speter     insert the entire target.  */
251881Speter  if ((bsize - lo < MATCH_BLOCKSIZE) || (asize < MATCH_BLOCKSIZE))
251881Speter    {
251881Speter      store_delta_trailer(build_baton, a, asize, b, bsize, lo, pool);
251881Speter      return;
251881Speter    }
251881Speter
251881Speter  /* Initialize the matches table.  */
251881Speter  init_blocks_table(a, asize, &blocks, pool);
251881Speter
251881Speter  /* Initialize our rolling checksum.  */
251881Speter  rolling = init_adler32(b + lo);
251881Speter  while (lo < bsize)
251881Speter    {
251881Speter      apr_size_t matchlen = 0;
251881Speter      apr_size_t apos;
251881Speter
251881Speter      if (lo + MATCH_BLOCKSIZE <= bsize)
251881Speter        matchlen = find_match(&blocks, rolling, a, asize, b, bsize,
251881Speter                              &lo, &apos, pending_insert_start);
251881Speter
251881Speter      /* If we didn't find a real match, insert the byte at the target
251881Speter         position into the pending insert.  */
251881Speter      if (matchlen == 0)
251881Speter        {
251881Speter          /* move block one position forward. Short blocks at the end of
251881Speter             the buffer cannot be used as the beginning of a new match */
251881Speter          if (lo + MATCH_BLOCKSIZE < bsize)
251881Speter            rolling = adler32_replace(rolling, b[lo], b[lo+MATCH_BLOCKSIZE]);
251881Speter
251881Speter          lo++;
251881Speter        }
251881Speter      else
251881Speter        {
251881Speter          /* store the sequence of B that is between the matches */
251881Speter          if (lo - pending_insert_start > 0)
251881Speter            svn_txdelta__insert_op(build_baton, svn_txdelta_new,
251881Speter                                   0, lo - pending_insert_start,
251881Speter                                   b + pending_insert_start, pool);
251881Speter          else
251881Speter            {
251881Speter              /* the match borders on the previous op. Maybe, we found a
251881Speter               * match that is better than / overlapping the previous one. */
251881Speter              apr_size_t len = reverse_match_length(a + apos, b + lo, apos < lo ? apos : lo);
251881Speter              if (len > 0)
251881Speter                {
251881Speter                  len = svn_txdelta__remove_copy(build_baton, len);
251881Speter                  apos -= len;
251881Speter                  matchlen += len;
251881Speter                  lo -= len;
251881Speter                }
251881Speter            }
251881Speter
251881Speter          /* Reset the pending insert start to immediately after the
251881Speter             match. */
251881Speter          lo += matchlen;
251881Speter          pending_insert_start = lo;
251881Speter          svn_txdelta__insert_op(build_baton, svn_txdelta_source,
251881Speter                                 apos, matchlen, NULL, pool);
251881Speter
251881Speter          /* Calculate the Adler32 sum for the first block behind the match.
251881Speter           * Ignore short buffers at the end of B.
251881Speter           */
251881Speter          if (lo + MATCH_BLOCKSIZE <= bsize)
251881Speter            rolling = init_adler32(b + lo);
251881Speter        }
251881Speter    }
251881Speter
251881Speter  /* If we still have an insert pending at the end, throw it in.  */
251881Speter  store_delta_trailer(build_baton, a, asize, b, bsize, pending_insert_start, pool);
251881Speter}
251881Speter
251881Spetervoid
251881Spetersvn_txdelta__xdelta(svn_txdelta__ops_baton_t *build_baton,
251881Speter                    const char *data,
251881Speter                    apr_size_t source_len,
251881Speter                    apr_size_t target_len,
251881Speter                    apr_pool_t *pool)
251881Speter{
251881Speter  /*  We should never be asked to compute something when the source_len is 0;
251881Speter      we just use a single insert op there (and rely on zlib for
251881Speter      compression). */
251881Speter  assert(source_len != 0);
251881Speter  compute_delta(build_baton, data, source_len,
251881Speter                data + source_len, target_len,
251881Speter                pool);
251881Speter}