compress.c revision 299742
1/*
2 * compress.c:  various data compression routines
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25#include <string.h>
26#include <assert.h>
27#include <zlib.h>
28
29#include "private/svn_subr_private.h"
30#include "private/svn_error_private.h"
31
32#include "svn_private_config.h"
33
34const char *
35svn_zlib__compiled_version(void)
36{
37  static const char zlib_version_str[] = ZLIB_VERSION;
38
39  return zlib_version_str;
40}
41
42const char *
43svn_zlib__runtime_version(void)
44{
45  return zlibVersion();
46}
47
48
49/* The zlib compressBound function was not exported until 1.2.0. */
50#if ZLIB_VERNUM >= 0x1200
51#define svnCompressBound(LEN) compressBound(LEN)
52#else
53#define svnCompressBound(LEN) ((LEN) + ((LEN) >> 12) + ((LEN) >> 14) + 11)
54#endif
55
56/* For svndiff1, address/instruction/new data under this size will not
57   be compressed using zlib as a secondary compressor.  */
58#define MIN_COMPRESS_SIZE 512
59
60unsigned char *
61svn__encode_uint(unsigned char *p, apr_uint64_t val)
62{
63  int n;
64  apr_uint64_t v;
65
66  /* Figure out how many bytes we'll need.  */
67  v = val >> 7;
68  n = 1;
69  while (v > 0)
70    {
71      v = v >> 7;
72      n++;
73    }
74
75  /* Encode the remaining bytes; n is always the number of bytes
76     coming after the one we're encoding.  */
77  while (--n >= 1)
78    *p++ = (unsigned char)(((val >> (n * 7)) | 0x80) & 0xff);
79
80  *p++ = (unsigned char)(val & 0x7f);
81
82  return p;
83}
84
85const unsigned char *
86svn__decode_uint(apr_uint64_t *val,
87                 const unsigned char *p,
88                 const unsigned char *end)
89{
90  apr_uint64_t temp = 0;
91
92  if (p + SVN__MAX_ENCODED_UINT_LEN < end)
93    end = p + SVN__MAX_ENCODED_UINT_LEN;
94
95  /* Decode bytes until we're done. */
96  while (SVN__PREDICT_TRUE(p < end))
97    {
98      unsigned int c = *p++;
99
100      if (c < 0x80)
101        {
102          *val = (temp << 7) | c;
103          return p;
104        }
105      else
106        {
107          temp = (temp << 7) | (c & 0x7f);
108        }
109    }
110
111  return NULL;
112}
113
114/* If IN is a string that is >= MIN_COMPRESS_SIZE and the COMPRESSION_LEVEL
115   is not SVN_DELTA_COMPRESSION_LEVEL_NONE, zlib compress it and places the
116   result in OUT, with an integer prepended specifying the original size.
117   If IN is < MIN_COMPRESS_SIZE, or if the compressed version of IN was no
118   smaller than the original IN, OUT will be a copy of IN with the size
119   prepended as an integer. */
120static svn_error_t *
121zlib_encode(const char *data,
122            apr_size_t len,
123            svn_stringbuf_t *out,
124            int compression_level)
125{
126  unsigned long endlen;
127  apr_size_t intlen;
128  unsigned char buf[SVN__MAX_ENCODED_UINT_LEN], *p;
129
130  svn_stringbuf_setempty(out);
131  p = svn__encode_uint(buf, (apr_uint64_t)len);
132  svn_stringbuf_appendbytes(out, (const char *)buf, p - buf);
133
134  intlen = out->len;
135
136  /* Compression initialization overhead is considered to large for
137     short buffers.  Also, if we don't actually want to compress data,
138     ZLIB will produce an output no shorter than the input.  Hence,
139     the DATA would directly appended to OUT, so we can do that directly
140     without calling ZLIB before. */
141  if (len < MIN_COMPRESS_SIZE || compression_level == SVN__COMPRESSION_NONE)
142    {
143      svn_stringbuf_appendbytes(out, data, len);
144    }
145  else
146    {
147      int zerr;
148
149      svn_stringbuf_ensure(out, svnCompressBound(len) + intlen);
150      endlen = out->blocksize;
151
152      zerr = compress2((unsigned char *)out->data + intlen, &endlen,
153                       (const unsigned char *)data, len,
154                       compression_level);
155      if (zerr != Z_OK)
156        return svn_error_trace(svn_error__wrap_zlib(
157                                 zerr, "compress2",
158                                 _("Compression of svndiff data failed")));
159
160      /* Compression didn't help :(, just append the original text */
161      if (endlen >= len)
162        {
163          svn_stringbuf_appendbytes(out, data, len);
164          return SVN_NO_ERROR;
165        }
166      out->len = endlen + intlen;
167      out->data[out->len] = 0;
168    }
169  return SVN_NO_ERROR;
170}
171
172/* Decode the possibly-zlib compressed string of length INLEN that is in
173   IN, into OUT.  We expect an integer is prepended to IN that specifies
174   the original size, and that if encoded size == original size, that the
175   remaining data is not compressed.
176   In that case, we will simply return pointer into IN as data pointer for
177   OUT, COPYLESS_ALLOWED has been set.  The, the caller is expected not to
178   modify the contents of OUT.
179   An error is returned if the decoded length exceeds the given LIMIT.
180 */
181static svn_error_t *
182zlib_decode(const unsigned char *in, apr_size_t inLen, svn_stringbuf_t *out,
183            apr_size_t limit)
184{
185  apr_size_t len;
186  apr_uint64_t size;
187  const unsigned char *oldplace = in;
188
189  /* First thing in the string is the original length.  */
190  in = svn__decode_uint(&size, in, in + inLen);
191  len = (apr_size_t)size;
192  if (in == NULL || len != size)
193    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL,
194                            _("Decompression of zlib compressed data failed: no size"));
195  if (len > limit)
196    return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA, NULL,
197                            _("Decompression of zlib compressed data failed: "
198                              "size too large"));
199
200  /* We need to subtract the size of the encoded original length off the
201   *      still remaining input length.  */
202  inLen -= (in - oldplace);
203  if (inLen == len)
204    {
205      svn_stringbuf_ensure(out, len);
206      memcpy(out->data, in, len);
207      out->data[len] = 0;
208      out->len = len;
209
210      return SVN_NO_ERROR;
211    }
212  else
213    {
214      unsigned long zlen = len;
215      int zerr;
216
217      svn_stringbuf_ensure(out, len);
218      zerr = uncompress((unsigned char *)out->data, &zlen, in, inLen);
219      if (zerr != Z_OK)
220        return svn_error_trace(svn_error__wrap_zlib(
221                                 zerr, "uncompress",
222                                 _("Decompression of svndiff data failed")));
223
224      /* Zlib should not produce something that has a different size than the
225         original length we stored. */
226      if (zlen != len)
227        return svn_error_create(SVN_ERR_SVNDIFF_INVALID_COMPRESSED_DATA,
228                                NULL,
229                                _("Size of uncompressed data "
230                                  "does not match stored original length"));
231      out->data[zlen] = 0;
232      out->len = zlen;
233    }
234  return SVN_NO_ERROR;
235}
236
237svn_error_t *
238svn__compress(svn_stringbuf_t *in,
239              svn_stringbuf_t *out,
240              int compression_method)
241{
242  if (   compression_method < SVN__COMPRESSION_NONE
243      || compression_method > SVN__COMPRESSION_ZLIB_MAX)
244    return svn_error_createf(SVN_ERR_BAD_COMPRESSION_METHOD, NULL,
245                             _("Unsupported compression method %d"),
246                             compression_method);
247
248  return zlib_encode(in->data, in->len, out, compression_method);
249}
250
251svn_error_t *
252svn__decompress(svn_stringbuf_t *in,
253                svn_stringbuf_t *out,
254                apr_size_t limit)
255{
256  return zlib_decode((const unsigned char*)in->data, in->len, out, limit);
257}
258