1/*
2 * base64.c:  base64 encoding and decoding functions
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <string.h>
27
28#include <apr.h>
29#include <apr_pools.h>
30#include <apr_general.h>        /* for APR_INLINE */
31
32#include "svn_pools.h"
33#include "svn_io.h"
34#include "svn_error.h"
35#include "svn_base64.h"
36#include "private/svn_string_private.h"
37#include "private/svn_subr_private.h"
38
39/* When asked to format the base64-encoded output as multiple lines,
40   we put this many chars in each line (plus one new line char) unless
41   we run out of data.
42   It is vital for some of the optimizations below that this value is
43   a multiple of 4. */
44#define BASE64_LINELEN 76
45
46/* This number of bytes is encoded in a line of base64 chars. */
47#define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
48
49/* Value -> base64 char mapping table (2^6 entries) */
50static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
51                                "abcdefghijklmnopqrstuvwxyz0123456789+/";
52
53
54/* Binary input --> base64-encoded output */
55
56struct encode_baton {
57  svn_stream_t *output;
58  unsigned char buf[3];         /* Bytes waiting to be encoded */
59  size_t buflen;                /* Number of bytes waiting */
60  size_t linelen;               /* Bytes output so far on this line */
61  apr_pool_t *scratch_pool;
62};
63
64
65/* Base64-encode a group.  IN needs to have three bytes and OUT needs
66   to have room for four bytes.  The input group is treated as four
67   six-bit units which are treated as lookups into base64tab for the
68   bytes of the output group.  */
69static APR_INLINE void
70encode_group(const unsigned char *in, char *out)
71{
72  /* Expand input bytes to machine word length (with zero extra cost
73     on x86/x64) ... */
74  apr_size_t part0 = in[0];
75  apr_size_t part1 = in[1];
76  apr_size_t part2 = in[2];
77
78  /* ... to prevent these arithmetic operations from being limited to
79     byte size.  This saves non-zero cost conversions of the result when
80     calculating the addresses within base64tab. */
81  out[0] = base64tab[part0 >> 2];
82  out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
83  out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
84  out[3] = base64tab[part2 & 0x3f];
85}
86
87/* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
88   BASE64_LINELEN chars and append it to STR.  It does not assume that
89   a new line char will be appended, though.
90   The code in this function will simply transform the data without
91   performing any boundary checks.  Therefore, DATA must have at least
92   BYTES_PER_LINE left and space for at least another BASE64_LINELEN
93   chars must have been pre-allocated in STR before calling this
94   function. */
95static void
96encode_line(svn_stringbuf_t *str, const char *data)
97{
98  /* Translate directly from DATA to STR->DATA. */
99  const unsigned char *in = (const unsigned char *)data;
100  char *out = str->data + str->len;
101  char *end = out + BASE64_LINELEN;
102
103  /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
104     a multiple of 4. */
105  for ( ; out != end; in += 3, out += 4)
106    encode_group(in, out);
107
108  /* Expand and terminate the string. */
109  *out = '\0';
110  str->len += BASE64_LINELEN;
111}
112
113/* (Continue to) Base64-encode the byte string DATA (of length LEN)
114   into STR. Include newlines every so often if BREAK_LINES is true.
115   INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
116   make INBUF have room for three characters and initialize *INBUFLEN
117   and *LINELEN to 0.
118
119   INBUF and *INBUFLEN carry the leftover data from call to call, and
120   *LINELEN carries the length of the current output line. */
121static void
122encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
123             unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
124             svn_boolean_t break_lines)
125{
126  char group[4];
127  const char *p = data, *end = p + len;
128  apr_size_t buflen;
129
130  /* Resize the stringbuf to make room for the (approximate) size of
131     output, to avoid repeated resizes later.
132     Please note that our optimized code relies on the fact that STR
133     never needs to be resized until we leave this function. */
134  buflen = len * 4 / 3 + 4;
135  if (break_lines)
136    {
137      /* Add an extra space for line breaks. */
138      buflen += buflen / BASE64_LINELEN;
139    }
140  svn_stringbuf_ensure(str, str->len + buflen);
141
142  /* Keep encoding three-byte groups until we run out.  */
143  while (*inbuflen + (end - p) >= 3)
144    {
145      /* May we encode BYTES_PER_LINE bytes without caring about
146         line breaks, data in the temporary INBUF or running out
147         of data? */
148      if (   *inbuflen == 0
149          && (*linelen == 0 || !break_lines)
150          && (end - p >= BYTES_PER_LINE))
151        {
152          /* Yes, we can encode a whole chunk of data at once. */
153          encode_line(str, p);
154          p += BYTES_PER_LINE;
155          *linelen += BASE64_LINELEN;
156        }
157      else
158        {
159          /* No, this is one of a number of special cases.
160             Encode the data byte by byte. */
161          memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
162          p += (3 - *inbuflen);
163          encode_group(inbuf, group);
164          svn_stringbuf_appendbytes(str, group, 4);
165          *inbuflen = 0;
166          *linelen += 4;
167        }
168
169      /* Add line breaks as necessary. */
170      if (break_lines && *linelen == BASE64_LINELEN)
171        {
172          svn_stringbuf_appendbyte(str, '\n');
173          *linelen = 0;
174        }
175    }
176
177  /* Tack any extra input onto *INBUF.  */
178  memcpy(inbuf + *inbuflen, p, end - p);
179  *inbuflen += (end - p);
180}
181
182
183/* Encode leftover data, if any, and possibly a final newline (if
184   there has been any data and BREAK_LINES is set), appending to STR.
185   LEN must be in the range 0..2.  */
186static void
187encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
188                     size_t len, size_t linelen, svn_boolean_t break_lines)
189{
190  unsigned char ingroup[3];
191  char outgroup[4];
192
193  if (len > 0)
194    {
195      memcpy(ingroup, extra, len);
196      memset(ingroup + len, 0, 3 - len);
197      encode_group(ingroup, outgroup);
198      memset(outgroup + (len + 1), '=', 4 - (len + 1));
199      svn_stringbuf_appendbytes(str, outgroup, 4);
200      linelen += 4;
201    }
202  if (break_lines && linelen > 0)
203    svn_stringbuf_appendbyte(str, '\n');
204}
205
206
207/* Write handler for svn_base64_encode.  */
208static svn_error_t *
209encode_data(void *baton, const char *data, apr_size_t *len)
210{
211  struct encode_baton *eb = baton;
212  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
213  apr_size_t enclen;
214  svn_error_t *err = SVN_NO_ERROR;
215
216  /* Encode this block of data and write it out.  */
217  encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen, TRUE);
218  enclen = encoded->len;
219  if (enclen != 0)
220    err = svn_stream_write(eb->output, encoded->data, &enclen);
221  svn_pool_clear(eb->scratch_pool);
222  return err;
223}
224
225
226/* Close handler for svn_base64_encode().  */
227static svn_error_t *
228finish_encoding_data(void *baton)
229{
230  struct encode_baton *eb = baton;
231  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
232  apr_size_t enclen;
233  svn_error_t *err = SVN_NO_ERROR;
234
235  /* Encode a partial group at the end if necessary, and write it out.  */
236  encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen, TRUE);
237  enclen = encoded->len;
238  if (enclen != 0)
239    err = svn_stream_write(eb->output, encoded->data, &enclen);
240
241  /* Pass on the close request and clean up the baton.  */
242  if (err == SVN_NO_ERROR)
243    err = svn_stream_close(eb->output);
244  svn_pool_destroy(eb->scratch_pool);
245  return err;
246}
247
248
249svn_stream_t *
250svn_base64_encode(svn_stream_t *output, apr_pool_t *pool)
251{
252  struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
253  svn_stream_t *stream;
254
255  eb->output = output;
256  eb->buflen = 0;
257  eb->linelen = 0;
258  eb->scratch_pool = svn_pool_create(pool);
259  stream = svn_stream_create(eb, pool);
260  svn_stream_set_write(stream, encode_data);
261  svn_stream_set_close(stream, finish_encoding_data);
262  return stream;
263}
264
265
266const svn_string_t *
267svn_base64_encode_string2(const svn_string_t *str,
268                          svn_boolean_t break_lines,
269                          apr_pool_t *pool)
270{
271  svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
272  unsigned char ingroup[3];
273  size_t ingrouplen = 0;
274  size_t linelen = 0;
275
276  encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
277               break_lines);
278  encode_partial_group(encoded, ingroup, ingrouplen, linelen,
279                       break_lines);
280  return svn_stringbuf__morph_into_string(encoded);
281}
282
283const svn_string_t *
284svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
285{
286  return svn_base64_encode_string2(str, TRUE, pool);
287}
288
289
290
291/* Base64-encoded input --> binary output */
292
293struct decode_baton {
294  svn_stream_t *output;
295  unsigned char buf[4];         /* Bytes waiting to be decoded */
296  int buflen;                   /* Number of bytes waiting */
297  svn_boolean_t done;           /* True if we already saw an '=' */
298  apr_pool_t *scratch_pool;
299};
300
301
302/* Base64-decode a group.  IN needs to have four bytes and OUT needs
303   to have room for three bytes.  The input bytes must already have
304   been decoded from base64tab into the range 0..63.  The four
305   six-bit values are pasted together to form three eight-bit bytes.  */
306static APR_INLINE void
307decode_group(const unsigned char *in, char *out)
308{
309  out[0] = (char)((in[0] << 2) | (in[1] >> 4));
310  out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
311  out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
312}
313
314/* Lookup table for base64 characters; reverse_base64[ch] gives a
315   negative value if ch is not a valid base64 character, or otherwise
316   the value of the byte represented; 'A' => 0 etc. */
317static const signed char reverse_base64[256] = {
318-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
319-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
320-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
32152, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
322-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
32315, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
324-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
32541, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
326-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
327-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
328-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
329-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
330-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
331-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
332-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
333-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
334};
335
336/* Similar to decode_group but this function also translates the
337   6-bit values from the IN buffer before translating them.
338   Return FALSE if a non-base64 char (e.g. '=' or new line)
339   has been encountered. */
340static APR_INLINE svn_boolean_t
341decode_group_directly(const unsigned char *in, char *out)
342{
343  /* Translate the base64 chars in values [0..63, 0xff] */
344  apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
345  apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
346  apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
347  apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
348
349  /* Pack 4x6 bits into 3x8.*/
350  out[0] = (char)((part0 << 2) | (part1 >> 4));
351  out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
352  out[2] = (char)(((part2 & 0x3) << 6) | part3);
353
354  /* FALSE, iff any part is 0xff. */
355  return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
356}
357
358/* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
359   STR.  After the function returns, *DATA will point to the first char
360   that has not been translated, yet.  Returns TRUE if all BASE64_LINELEN
361   chars could be translated, i.e. no special char has been encountered
362   in between.
363   The code in this function will simply transform the data without
364   performing any boundary checks.  Therefore, DATA must have at least
365   BASE64_LINELEN left and space for at least another BYTES_PER_LINE
366   chars must have been pre-allocated in STR before calling this
367   function. */
368static svn_boolean_t
369decode_line(svn_stringbuf_t *str, const char **data)
370{
371  /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
372  const unsigned char *p = *(const unsigned char **)data;
373  char *out = str->data + str->len;
374  char *end = out + BYTES_PER_LINE;
375
376  /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
377     a multiple of 4.  Stop translation as soon as we encounter a special
378     char.  Leave the entire group untouched in that case. */
379  for (; out < end; p += 4, out += 3)
380    if (!decode_group_directly(p, out))
381      break;
382
383  /* Update string sizes and positions. */
384  str->len = out - str->data;
385  *out = '\0';
386  *data = (const char *)p;
387
388  /* Return FALSE, if the caller should continue the decoding process
389     using the slow standard method. */
390  return out == end;
391}
392
393
394/* (Continue to) Base64-decode the byte string DATA (of length LEN)
395   into STR. INBUF, INBUFLEN, and DONE are used internally; the
396   caller shall have room for four bytes in INBUF and initialize
397   *INBUFLEN to 0 and *DONE to FALSE.
398
399   INBUF and *INBUFLEN carry the leftover bytes from call to call, and
400   *DONE keeps track of whether we've seen an '=' which terminates the
401   encoded data. */
402static void
403decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
404             unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
405{
406  const char *p = data;
407  char group[3];
408  signed char find;
409  const char *end = data + len;
410
411  /* Resize the stringbuf to make room for the maximum size of output,
412     to avoid repeated resizes later.  The optimizations in
413     decode_line rely on no resizes being necessary!
414
415     (*inbuflen+len) is encoded data length
416     (*inbuflen+len)/4 is the number of complete 4-bytes sets
417     (*inbuflen+len)/4*3 is the number of decoded bytes
418     svn_stringbuf_ensure will add an additional byte for the terminating 0.
419  */
420  svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);
421
422  while ( !*done && p < end )
423    {
424      /* If no data is left in temporary INBUF and there is at least
425         one line-sized chunk left to decode, we may use the optimized
426         code path. */
427      if ((*inbuflen == 0) && (p + BASE64_LINELEN <= end))
428        if (decode_line(str, &p))
429          continue;
430
431      /* A special case or decode_line encountered a special char. */
432      if (*p == '=')
433        {
434          /* We are at the end and have to decode a partial group.  */
435          if (*inbuflen >= 2)
436            {
437              memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
438              decode_group(inbuf, group);
439              svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
440            }
441          *done = TRUE;
442        }
443      else
444        {
445          find = reverse_base64[(unsigned char)*p];
446          ++p;
447
448          if (find >= 0)
449            inbuf[(*inbuflen)++] = find;
450          if (*inbuflen == 4)
451            {
452              decode_group(inbuf, group);
453              svn_stringbuf_appendbytes(str, group, 3);
454              *inbuflen = 0;
455            }
456        }
457    }
458}
459
460
461/* Write handler for svn_base64_decode.  */
462static svn_error_t *
463decode_data(void *baton, const char *data, apr_size_t *len)
464{
465  struct decode_baton *db = baton;
466  svn_stringbuf_t *decoded;
467  apr_size_t declen;
468  svn_error_t *err = SVN_NO_ERROR;
469
470  /* Decode this block of data.  */
471  decoded = svn_stringbuf_create_empty(db->scratch_pool);
472  decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);
473
474  /* Write the output, clean up, go home.  */
475  declen = decoded->len;
476  if (declen != 0)
477    err = svn_stream_write(db->output, decoded->data, &declen);
478  svn_pool_clear(db->scratch_pool);
479  return err;
480}
481
482
483/* Close handler for svn_base64_decode().  */
484static svn_error_t *
485finish_decoding_data(void *baton)
486{
487  struct decode_baton *db = baton;
488  svn_error_t *err;
489
490  /* Pass on the close request and clean up the baton.  */
491  err = svn_stream_close(db->output);
492  svn_pool_destroy(db->scratch_pool);
493  return err;
494}
495
496
497svn_stream_t *
498svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
499{
500  struct decode_baton *db = apr_palloc(pool, sizeof(*db));
501  svn_stream_t *stream;
502
503  db->output = output;
504  db->buflen = 0;
505  db->done = FALSE;
506  db->scratch_pool = svn_pool_create(pool);
507  stream = svn_stream_create(db, pool);
508  svn_stream_set_write(stream, decode_data);
509  svn_stream_set_close(stream, finish_decoding_data);
510  return stream;
511}
512
513
514const svn_string_t *
515svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
516{
517  svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
518  unsigned char ingroup[4];
519  int ingrouplen = 0;
520  svn_boolean_t done = FALSE;
521
522  decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
523  return svn_stringbuf__morph_into_string(decoded);
524}
525
526
527/* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
528   If CHECKSUM->kind is not recognized, return NULL.
529   ### That 'NULL' claim was in the header file when this was public, but
530   doesn't look true in the implementation.
531
532   ### This is now only used as a new implementation of svn_base64_from_md5();
533   it would probably be safer to revert that to its old implementation. */
534static svn_stringbuf_t *
535base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
536{
537  svn_stringbuf_t *checksum_str;
538  unsigned char ingroup[3];
539  size_t ingrouplen = 0;
540  size_t linelen = 0;
541  checksum_str = svn_stringbuf_create_empty(pool);
542
543  encode_bytes(checksum_str, checksum->digest,
544               svn_checksum_size(checksum), ingroup, &ingrouplen,
545               &linelen, TRUE);
546  encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);
547
548  /* Our base64-encoding routines append a final newline if any data
549     was created at all, so let's hack that off. */
550  if (checksum_str->len)
551    {
552      checksum_str->len--;
553      checksum_str->data[checksum_str->len] = 0;
554    }
555
556  return checksum_str;
557}
558
559
560svn_stringbuf_t *
561svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
562{
563  svn_checksum_t *checksum
564    = svn_checksum__from_digest_md5(digest, pool);
565
566  return base64_from_checksum(checksum, pool);
567}
568