1/* 2 * base64.c: base64 encoding and decoding functions 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24 25 26#include <string.h> 27 28#include <apr.h> 29#include <apr_pools.h> 30#include <apr_general.h> /* for APR_INLINE */ 31 32#include "svn_pools.h" 33#include "svn_io.h" 34#include "svn_error.h" 35#include "svn_base64.h" 36#include "private/svn_string_private.h" 37#include "private/svn_subr_private.h" 38 39/* When asked to format the base64-encoded output as multiple lines, 40 we put this many chars in each line (plus one new line char) unless 41 we run out of data. 42 It is vital for some of the optimizations below that this value is 43 a multiple of 4. */ 44#define BASE64_LINELEN 76 45 46/* This number of bytes is encoded in a line of base64 chars. */ 47#define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3) 48 49/* Value -> base64 char mapping table (2^6 entries) */ 50static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ 51 "abcdefghijklmnopqrstuvwxyz0123456789+/"; 52 53 54/* Binary input --> base64-encoded output */ 55 56struct encode_baton { 57 svn_stream_t *output; 58 unsigned char buf[3]; /* Bytes waiting to be encoded */ 59 size_t buflen; /* Number of bytes waiting */ 60 size_t linelen; /* Bytes output so far on this line */ 61 apr_pool_t *scratch_pool; 62}; 63 64 65/* Base64-encode a group. IN needs to have three bytes and OUT needs 66 to have room for four bytes. The input group is treated as four 67 six-bit units which are treated as lookups into base64tab for the 68 bytes of the output group. */ 69static APR_INLINE void 70encode_group(const unsigned char *in, char *out) 71{ 72 /* Expand input bytes to machine word length (with zero extra cost 73 on x86/x64) ... */ 74 apr_size_t part0 = in[0]; 75 apr_size_t part1 = in[1]; 76 apr_size_t part2 = in[2]; 77 78 /* ... to prevent these arithmetic operations from being limited to 79 byte size. This saves non-zero cost conversions of the result when 80 calculating the addresses within base64tab. */ 81 out[0] = base64tab[part0 >> 2]; 82 out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)]; 83 out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)]; 84 out[3] = base64tab[part2 & 0x3f]; 85} 86 87/* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into 88 BASE64_LINELEN chars and append it to STR. It does not assume that 89 a new line char will be appended, though. 90 The code in this function will simply transform the data without 91 performing any boundary checks. Therefore, DATA must have at least 92 BYTES_PER_LINE left and space for at least another BASE64_LINELEN 93 chars must have been pre-allocated in STR before calling this 94 function. */ 95static void 96encode_line(svn_stringbuf_t *str, const char *data) 97{ 98 /* Translate directly from DATA to STR->DATA. */ 99 const unsigned char *in = (const unsigned char *)data; 100 char *out = str->data + str->len; 101 char *end = out + BASE64_LINELEN; 102 103 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN 104 a multiple of 4. */ 105 for ( ; out != end; in += 3, out += 4) 106 encode_group(in, out); 107 108 /* Expand and terminate the string. */ 109 *out = '\0'; 110 str->len += BASE64_LINELEN; 111} 112 113/* (Continue to) Base64-encode the byte string DATA (of length LEN) 114 into STR. Include newlines every so often if BREAK_LINES is true. 115 INBUF, INBUFLEN, and LINELEN are used internally; the caller shall 116 make INBUF have room for three characters and initialize *INBUFLEN 117 and *LINELEN to 0. 118 119 INBUF and *INBUFLEN carry the leftover data from call to call, and 120 *LINELEN carries the length of the current output line. */ 121static void 122encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len, 123 unsigned char *inbuf, size_t *inbuflen, size_t *linelen, 124 svn_boolean_t break_lines) 125{ 126 char group[4]; 127 const char *p = data, *end = p + len; 128 apr_size_t buflen; 129 130 /* Resize the stringbuf to make room for the (approximate) size of 131 output, to avoid repeated resizes later. 132 Please note that our optimized code relies on the fact that STR 133 never needs to be resized until we leave this function. */ 134 buflen = len * 4 / 3 + 4; 135 if (break_lines) 136 { 137 /* Add an extra space for line breaks. */ 138 buflen += buflen / BASE64_LINELEN; 139 } 140 svn_stringbuf_ensure(str, str->len + buflen); 141 142 /* Keep encoding three-byte groups until we run out. */ 143 while (*inbuflen + (end - p) >= 3) 144 { 145 /* May we encode BYTES_PER_LINE bytes without caring about 146 line breaks, data in the temporary INBUF or running out 147 of data? */ 148 if ( *inbuflen == 0 149 && (*linelen == 0 || !break_lines) 150 && (end - p >= BYTES_PER_LINE)) 151 { 152 /* Yes, we can encode a whole chunk of data at once. */ 153 encode_line(str, p); 154 p += BYTES_PER_LINE; 155 *linelen += BASE64_LINELEN; 156 } 157 else 158 { 159 /* No, this is one of a number of special cases. 160 Encode the data byte by byte. */ 161 memcpy(inbuf + *inbuflen, p, 3 - *inbuflen); 162 p += (3 - *inbuflen); 163 encode_group(inbuf, group); 164 svn_stringbuf_appendbytes(str, group, 4); 165 *inbuflen = 0; 166 *linelen += 4; 167 } 168 169 /* Add line breaks as necessary. */ 170 if (break_lines && *linelen == BASE64_LINELEN) 171 { 172 svn_stringbuf_appendbyte(str, '\n'); 173 *linelen = 0; 174 } 175 } 176 177 /* Tack any extra input onto *INBUF. */ 178 memcpy(inbuf + *inbuflen, p, end - p); 179 *inbuflen += (end - p); 180} 181 182 183/* Encode leftover data, if any, and possibly a final newline (if 184 there has been any data and BREAK_LINES is set), appending to STR. 185 LEN must be in the range 0..2. */ 186static void 187encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra, 188 size_t len, size_t linelen, svn_boolean_t break_lines) 189{ 190 unsigned char ingroup[3]; 191 char outgroup[4]; 192 193 if (len > 0) 194 { 195 memcpy(ingroup, extra, len); 196 memset(ingroup + len, 0, 3 - len); 197 encode_group(ingroup, outgroup); 198 memset(outgroup + (len + 1), '=', 4 - (len + 1)); 199 svn_stringbuf_appendbytes(str, outgroup, 4); 200 linelen += 4; 201 } 202 if (break_lines && linelen > 0) 203 svn_stringbuf_appendbyte(str, '\n'); 204} 205 206 207/* Write handler for svn_base64_encode. */ 208static svn_error_t * 209encode_data(void *baton, const char *data, apr_size_t *len) 210{ 211 struct encode_baton *eb = baton; 212 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool); 213 apr_size_t enclen; 214 svn_error_t *err = SVN_NO_ERROR; 215 216 /* Encode this block of data and write it out. */ 217 encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen, TRUE); 218 enclen = encoded->len; 219 if (enclen != 0) 220 err = svn_stream_write(eb->output, encoded->data, &enclen); 221 svn_pool_clear(eb->scratch_pool); 222 return err; 223} 224 225 226/* Close handler for svn_base64_encode(). */ 227static svn_error_t * 228finish_encoding_data(void *baton) 229{ 230 struct encode_baton *eb = baton; 231 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool); 232 apr_size_t enclen; 233 svn_error_t *err = SVN_NO_ERROR; 234 235 /* Encode a partial group at the end if necessary, and write it out. */ 236 encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen, TRUE); 237 enclen = encoded->len; 238 if (enclen != 0) 239 err = svn_stream_write(eb->output, encoded->data, &enclen); 240 241 /* Pass on the close request and clean up the baton. */ 242 if (err == SVN_NO_ERROR) 243 err = svn_stream_close(eb->output); 244 svn_pool_destroy(eb->scratch_pool); 245 return err; 246} 247 248 249svn_stream_t * 250svn_base64_encode(svn_stream_t *output, apr_pool_t *pool) 251{ 252 struct encode_baton *eb = apr_palloc(pool, sizeof(*eb)); 253 svn_stream_t *stream; 254 255 eb->output = output; 256 eb->buflen = 0; 257 eb->linelen = 0; 258 eb->scratch_pool = svn_pool_create(pool); 259 stream = svn_stream_create(eb, pool); 260 svn_stream_set_write(stream, encode_data); 261 svn_stream_set_close(stream, finish_encoding_data); 262 return stream; 263} 264 265 266const svn_string_t * 267svn_base64_encode_string2(const svn_string_t *str, 268 svn_boolean_t break_lines, 269 apr_pool_t *pool) 270{ 271 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool); 272 unsigned char ingroup[3]; 273 size_t ingrouplen = 0; 274 size_t linelen = 0; 275 276 encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen, 277 break_lines); 278 encode_partial_group(encoded, ingroup, ingrouplen, linelen, 279 break_lines); 280 return svn_stringbuf__morph_into_string(encoded); 281} 282 283const svn_string_t * 284svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool) 285{ 286 return svn_base64_encode_string2(str, TRUE, pool); 287} 288 289 290 291/* Base64-encoded input --> binary output */ 292 293struct decode_baton { 294 svn_stream_t *output; 295 unsigned char buf[4]; /* Bytes waiting to be decoded */ 296 int buflen; /* Number of bytes waiting */ 297 svn_boolean_t done; /* True if we already saw an '=' */ 298 apr_pool_t *scratch_pool; 299}; 300 301 302/* Base64-decode a group. IN needs to have four bytes and OUT needs 303 to have room for three bytes. The input bytes must already have 304 been decoded from base64tab into the range 0..63. The four 305 six-bit values are pasted together to form three eight-bit bytes. */ 306static APR_INLINE void 307decode_group(const unsigned char *in, char *out) 308{ 309 out[0] = (char)((in[0] << 2) | (in[1] >> 4)); 310 out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2)); 311 out[2] = (char)(((in[2] & 0x3) << 6) | in[3]); 312} 313 314/* Lookup table for base64 characters; reverse_base64[ch] gives a 315 negative value if ch is not a valid base64 character, or otherwise 316 the value of the byte represented; 'A' => 0 etc. */ 317static const signed char reverse_base64[256] = { 318-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 319-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 320-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 32152, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 322-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 32315, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 324-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 32541, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, 326-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 327-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 328-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 329-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 330-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 331-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 332-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 333-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 334}; 335 336/* Similar to decode_group but this function also translates the 337 6-bit values from the IN buffer before translating them. 338 Return FALSE if a non-base64 char (e.g. '=' or new line) 339 has been encountered. */ 340static APR_INLINE svn_boolean_t 341decode_group_directly(const unsigned char *in, char *out) 342{ 343 /* Translate the base64 chars in values [0..63, 0xff] */ 344 apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]]; 345 apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]]; 346 apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]]; 347 apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]]; 348 349 /* Pack 4x6 bits into 3x8.*/ 350 out[0] = (char)((part0 << 2) | (part1 >> 4)); 351 out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2)); 352 out[2] = (char)(((part2 & 0x3) << 6) | part3); 353 354 /* FALSE, iff any part is 0xff. */ 355 return (part0 | part1 | part2 | part3) != (unsigned char)(-1); 356} 357 358/* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to 359 STR. After the function returns, *DATA will point to the first char 360 that has not been translated, yet. Returns TRUE if all BASE64_LINELEN 361 chars could be translated, i.e. no special char has been encountered 362 in between. 363 The code in this function will simply transform the data without 364 performing any boundary checks. Therefore, DATA must have at least 365 BASE64_LINELEN left and space for at least another BYTES_PER_LINE 366 chars must have been pre-allocated in STR before calling this 367 function. */ 368static svn_boolean_t 369decode_line(svn_stringbuf_t *str, const char **data) 370{ 371 /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */ 372 const unsigned char *p = *(const unsigned char **)data; 373 char *out = str->data + str->len; 374 char *end = out + BYTES_PER_LINE; 375 376 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN 377 a multiple of 4. Stop translation as soon as we encounter a special 378 char. Leave the entire group untouched in that case. */ 379 for (; out < end; p += 4, out += 3) 380 if (!decode_group_directly(p, out)) 381 break; 382 383 /* Update string sizes and positions. */ 384 str->len = out - str->data; 385 *out = '\0'; 386 *data = (const char *)p; 387 388 /* Return FALSE, if the caller should continue the decoding process 389 using the slow standard method. */ 390 return out == end; 391} 392 393 394/* (Continue to) Base64-decode the byte string DATA (of length LEN) 395 into STR. INBUF, INBUFLEN, and DONE are used internally; the 396 caller shall have room for four bytes in INBUF and initialize 397 *INBUFLEN to 0 and *DONE to FALSE. 398 399 INBUF and *INBUFLEN carry the leftover bytes from call to call, and 400 *DONE keeps track of whether we've seen an '=' which terminates the 401 encoded data. */ 402static void 403decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len, 404 unsigned char *inbuf, int *inbuflen, svn_boolean_t *done) 405{ 406 const char *p = data; 407 char group[3]; 408 signed char find; 409 const char *end = data + len; 410 411 /* Resize the stringbuf to make room for the maximum size of output, 412 to avoid repeated resizes later. The optimizations in 413 decode_line rely on no resizes being necessary! 414 415 (*inbuflen+len) is encoded data length 416 (*inbuflen+len)/4 is the number of complete 4-bytes sets 417 (*inbuflen+len)/4*3 is the number of decoded bytes 418 svn_stringbuf_ensure will add an additional byte for the terminating 0. 419 */ 420 svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3); 421 422 while ( !*done && p < end ) 423 { 424 /* If no data is left in temporary INBUF and there is at least 425 one line-sized chunk left to decode, we may use the optimized 426 code path. */ 427 if ((*inbuflen == 0) && (p + BASE64_LINELEN <= end)) 428 if (decode_line(str, &p)) 429 continue; 430 431 /* A special case or decode_line encountered a special char. */ 432 if (*p == '=') 433 { 434 /* We are at the end and have to decode a partial group. */ 435 if (*inbuflen >= 2) 436 { 437 memset(inbuf + *inbuflen, 0, 4 - *inbuflen); 438 decode_group(inbuf, group); 439 svn_stringbuf_appendbytes(str, group, *inbuflen - 1); 440 } 441 *done = TRUE; 442 } 443 else 444 { 445 find = reverse_base64[(unsigned char)*p]; 446 ++p; 447 448 if (find >= 0) 449 inbuf[(*inbuflen)++] = find; 450 if (*inbuflen == 4) 451 { 452 decode_group(inbuf, group); 453 svn_stringbuf_appendbytes(str, group, 3); 454 *inbuflen = 0; 455 } 456 } 457 } 458} 459 460 461/* Write handler for svn_base64_decode. */ 462static svn_error_t * 463decode_data(void *baton, const char *data, apr_size_t *len) 464{ 465 struct decode_baton *db = baton; 466 svn_stringbuf_t *decoded; 467 apr_size_t declen; 468 svn_error_t *err = SVN_NO_ERROR; 469 470 /* Decode this block of data. */ 471 decoded = svn_stringbuf_create_empty(db->scratch_pool); 472 decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done); 473 474 /* Write the output, clean up, go home. */ 475 declen = decoded->len; 476 if (declen != 0) 477 err = svn_stream_write(db->output, decoded->data, &declen); 478 svn_pool_clear(db->scratch_pool); 479 return err; 480} 481 482 483/* Close handler for svn_base64_decode(). */ 484static svn_error_t * 485finish_decoding_data(void *baton) 486{ 487 struct decode_baton *db = baton; 488 svn_error_t *err; 489 490 /* Pass on the close request and clean up the baton. */ 491 err = svn_stream_close(db->output); 492 svn_pool_destroy(db->scratch_pool); 493 return err; 494} 495 496 497svn_stream_t * 498svn_base64_decode(svn_stream_t *output, apr_pool_t *pool) 499{ 500 struct decode_baton *db = apr_palloc(pool, sizeof(*db)); 501 svn_stream_t *stream; 502 503 db->output = output; 504 db->buflen = 0; 505 db->done = FALSE; 506 db->scratch_pool = svn_pool_create(pool); 507 stream = svn_stream_create(db, pool); 508 svn_stream_set_write(stream, decode_data); 509 svn_stream_set_close(stream, finish_decoding_data); 510 return stream; 511} 512 513 514const svn_string_t * 515svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool) 516{ 517 svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool); 518 unsigned char ingroup[4]; 519 int ingrouplen = 0; 520 svn_boolean_t done = FALSE; 521 522 decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done); 523 return svn_stringbuf__morph_into_string(decoded); 524} 525 526 527/* Return a base64-encoded representation of CHECKSUM, allocated in POOL. 528 If CHECKSUM->kind is not recognized, return NULL. 529 ### That 'NULL' claim was in the header file when this was public, but 530 doesn't look true in the implementation. 531 532 ### This is now only used as a new implementation of svn_base64_from_md5(); 533 it would probably be safer to revert that to its old implementation. */ 534static svn_stringbuf_t * 535base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool) 536{ 537 svn_stringbuf_t *checksum_str; 538 unsigned char ingroup[3]; 539 size_t ingrouplen = 0; 540 size_t linelen = 0; 541 checksum_str = svn_stringbuf_create_empty(pool); 542 543 encode_bytes(checksum_str, checksum->digest, 544 svn_checksum_size(checksum), ingroup, &ingrouplen, 545 &linelen, TRUE); 546 encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE); 547 548 /* Our base64-encoding routines append a final newline if any data 549 was created at all, so let's hack that off. */ 550 if (checksum_str->len) 551 { 552 checksum_str->len--; 553 checksum_str->data[checksum_str->len] = 0; 554 } 555 556 return checksum_str; 557} 558 559 560svn_stringbuf_t * 561svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool) 562{ 563 svn_checksum_t *checksum 564 = svn_checksum__from_digest_md5(digest, pool); 565 566 return base64_from_checksum(checksum, pool); 567} 568