1/* $NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2013 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Taylor R. Campbell. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__RCSID("$NetBSD: vndcompress.c,v 1.29 2017/07/29 21:04:07 riastradh Exp $"); 34 35#include <sys/endian.h> 36#include <sys/stat.h> 37 38#include <assert.h> 39#include <err.h> 40#include <errno.h> 41#include <fcntl.h> 42#include <inttypes.h> 43#include <limits.h> 44#include <signal.h> 45#include <stdbool.h> 46#include <stdint.h> 47#include <stdio.h> 48#include <stdlib.h> 49#include <string.h> 50#include <unistd.h> 51#include <zlib.h> 52 53#include "common.h" 54#include "offtab.h" 55#include "utils.h" 56 57/* 58 * XXX Switch to control bug-for-bug byte-for-byte compatibility with 59 * NetBSD's vndcompress. 60 */ 61#define VNDCOMPRESS_COMPAT 0 62 63__CTASSERT(sizeof(struct cloop2_header) == CLOOP2_OFFSET_TABLE_OFFSET); 64 65struct compress_state { 66 uint64_t size; /* uncompressed size */ 67 uint64_t offset; /* output byte offset */ 68 uint32_t blocksize; /* bytes per block */ 69 uint32_t blkno; /* input block number */ 70 uint32_t n_full_blocks; /* floor(size/blocksize) */ 71 uint32_t n_blocks; /* ceiling(size/blocksize) */ 72 uint32_t n_offsets; /* n_blocks + 1 */ 73 uint32_t end_block; /* last block to transfer */ 74 uint32_t checkpoint_blocks; /* blocks before checkpoint */ 75 int image_fd; 76 int cloop2_fd; 77 struct offtab offtab; 78 uint32_t n_checkpointed_blocks; 79 volatile sig_atomic_t 80 initialized; /* everything above initialized? */ 81}; 82 83/* Global compression state for SIGINFO handler. */ 84static struct compress_state global_state; 85 86struct sigdesc { 87 int sd_signo; 88 const char *sd_name; 89}; 90 91static const struct sigdesc info_signals[] = { 92 { SIGINFO, "SIGINFO" }, 93 { SIGUSR1, "SIGUSR1" }, 94}; 95 96static const struct sigdesc checkpoint_signals[] = { 97 { SIGUSR2, "SIGUSR2" }, 98}; 99 100static void init_signals(void); 101static void init_signal_handler(int, const struct sigdesc *, size_t, 102 void (*)(int)); 103static void info_signal_handler(int); 104static void checkpoint_signal_handler(int); 105static void compress_progress(struct compress_state *); 106static void compress_init(int, char **, const struct options *, 107 struct compress_state *); 108static bool compress_restart(struct compress_state *); 109static uint32_t compress_block(int, int, uint32_t, uint32_t, uint32_t, void *, 110 void *); 111static void compress_maybe_checkpoint(struct compress_state *); 112static void compress_checkpoint(struct compress_state *); 113static void compress_exit(struct compress_state *); 114 115/* 116 * Compression entry point. 117 */ 118int 119vndcompress(int argc, char **argv, const struct options *O) 120{ 121 struct compress_state *const S = &global_state; 122 123 /* Paranoia. The other fields either have no sentinel or use zero. */ 124 S->image_fd = -1; 125 S->cloop2_fd = -1; 126 127 /* Set up signal handlers so we can handle SIGINFO ASAP. */ 128 init_signals(); 129 130 /* 131 * Parse the arguments to initialize our state. 132 */ 133 compress_init(argc, argv, O, S); 134 assert(MIN_BLOCKSIZE <= S->blocksize); 135 assert(S->blocksize <= MAX_BLOCKSIZE); 136 137 /* 138 * Allocate compression buffers. 139 * 140 * Compression may actually expand. From an overabundance of 141 * caution, assume it can expand by at most double. 142 * 143 * XXX Check and consider tightening this assumption. 144 */ 145 __CTASSERT(MAX_BLOCKSIZE <= SIZE_MAX); 146 void *const uncompbuf = malloc(S->blocksize); 147 if (uncompbuf == NULL) 148 err(1, "malloc uncompressed buffer"); 149 150 /* XXX compression ratio bound */ 151 __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE)); 152 void *const compbuf = malloc(2 * (size_t)S->blocksize); 153 if (compbuf == NULL) 154 err(1, "malloc compressed buffer"); 155 156 /* 157 * Compress the blocks. S->blkno specifies the input block 158 * we're about to transfer. S->offset is the current output 159 * offset. 160 */ 161 while (S->blkno < S->n_blocks) { 162 /* Report any progress. */ 163 compress_progress(S); 164 165 /* Stop if we've done the requested partial transfer. */ 166 if ((0 < S->end_block) && (S->end_block <= S->blkno)) 167 goto out; 168 169 /* Checkpoint if appropriate. */ 170 compress_maybe_checkpoint(S); 171 offtab_prepare_put(&S->offtab, (S->blkno + 1)); 172 173 /* Choose read size: partial if last block, full if not. */ 174 const uint32_t readsize = (S->blkno == S->n_full_blocks? 175 (S->size % S->blocksize) : S->blocksize); 176 assert(readsize > 0); 177 assert(readsize <= S->blocksize); 178 179 /* Fail noisily if we might be about to overflow. */ 180 /* XXX compression ratio bound */ 181 __CTASSERT(MUL_OK(uint64_t, 2, MAX_BLOCKSIZE)); 182 __CTASSERT(MUL_OK(off_t, 2, MAX_BLOCKSIZE)); 183 assert(S->offset <= MIN(UINT64_MAX, OFF_MAX)); 184 if (!ADD_OK(uint64_t, S->offset, 2*(uintmax_t)readsize) || 185 !ADD_OK(off_t, S->offset, 2*(uintmax_t)readsize)) 186 errx(1, "blkno %"PRIu32" may overflow: %ju + 2*%ju", 187 S->blkno, (uintmax_t)S->offset, 188 (uintmax_t)readsize); 189 190 /* Process the block. */ 191 const uint32_t complen = 192 compress_block(S->image_fd, S->cloop2_fd, S->blkno, 193 S->blocksize, readsize, uncompbuf, compbuf); 194 195 /* 196 * Signal-atomically update the state to reflect 197 * (a) what block number we are now at, 198 * (b) how far we are now in the output file, and 199 * (c) where the last block ended. 200 */ 201 assert(ADD_OK(uint32_t, S->blkno, 1)); 202 assert(ADD_OK(uint64_t, S->offset, complen)); 203 assert(ADD_OK(off_t, (off_t)S->offset, (off_t)complen)); 204 assert((S->blkno + 1) < S->n_offsets); 205 { 206 sigset_t old_sigmask; 207 block_signals(&old_sigmask); 208 S->blkno += 1; /* (a) */ 209 S->offset += complen; /* (b) */ 210 offtab_put(&S->offtab, S->blkno, S->offset); /* (c) */ 211 restore_sigmask(&old_sigmask); 212 } 213 } 214 215 /* Make sure we're all done. */ 216 assert(S->blkno == S->n_blocks); 217 assert((S->blkno + 1) == S->n_offsets); 218 219 /* Pad to the disk block size. */ 220 const uint32_t n_extra = (S->offset % DEV_BSIZE); 221 if (n_extra != 0) { 222 const uint32_t n_padding = (DEV_BSIZE - n_extra); 223 /* Reuse compbuf -- guaranteed to be large enough. */ 224 (void)memset(compbuf, 0, n_padding); 225 const ssize_t n_written = write(S->cloop2_fd, compbuf, 226 n_padding); 227 if (n_written == -1) 228 err(1, "write final padding failed"); 229 assert(n_written >= 0); 230 if ((size_t)n_written != n_padding) 231 errx(1, "partial write of final padding bytes" 232 ": %zu != %"PRIu32, 233 (size_t)n_written, n_padding); 234 235 /* Account for the extra bytes in the output file. */ 236 assert(ADD_OK(uint64_t, S->offset, n_padding)); 237 assert(ADD_OK(off_t, (off_t)S->offset, (off_t)n_padding)); 238 { 239 sigset_t old_sigmask; 240 block_signals(&old_sigmask); 241 S->offset += n_padding; 242 restore_sigmask(&old_sigmask); 243 } 244 } 245 246out: 247 /* One last checkpoint to commit the offset table. */ 248 assert(S->offset <= OFF_MAX); 249 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 250 compress_checkpoint(S); 251 252 /* 253 * Free the compression buffers and finalize the compression. 254 */ 255 free(compbuf); 256 free(uncompbuf); 257 compress_exit(S); 258 259 return 0; 260} 261 262/* 263 * Signal cruft. 264 */ 265 266static void 267init_signals(void) 268{ 269 270 init_signal_handler(SA_RESTART, info_signals, 271 __arraycount(info_signals), &info_signal_handler); 272 init_signal_handler(SA_RESTART, checkpoint_signals, 273 __arraycount(checkpoint_signals), &checkpoint_signal_handler); 274} 275 276static void 277init_signal_handler(int flags, const struct sigdesc *signals, size_t n, 278 void (*handler)(int)) 279{ 280 static const struct sigaction zero_sa; 281 struct sigaction sa = zero_sa; 282 size_t i; 283 284 (void)sigemptyset(&sa.sa_mask); 285 for (i = 0; i < n; i++) 286 (void)sigaddset(&sa.sa_mask, signals[i].sd_signo); 287 sa.sa_flags = flags; 288 sa.sa_handler = handler; 289 for (i = 0; i < n; i++) 290 if (sigaction(signals[i].sd_signo, &sa, NULL) == -1) 291 err(1, "sigaction(%s)", signals[i].sd_name); 292} 293 294static void 295info_signal_handler(int signo __unused) 296{ 297 /* Save errno. */ 298 const int error = errno; 299 struct compress_state *const S = &global_state; 300 char buf[128]; 301 302 /* Bail if the state is not yet initialized. */ 303 if (!S->initialized) { 304 warnx_ss("initializing"); 305 goto out; 306 } 307 308 /* Carefully calculate our I/O position. */ 309 assert(S->blocksize > 0); 310 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE)); 311 const uint64_t nread = ((uint64_t)S->blkno * (uint64_t)S->blocksize); 312 313 assert(S->n_blocks > 0); 314 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, sizeof(uint64_t))); 315 __CTASSERT(ADD_OK(uint64_t, CLOOP2_OFFSET_TABLE_OFFSET, 316 MAX_N_BLOCKS*sizeof(uint64_t))); 317 const uint64_t nwritten = (S->offset <= (CLOOP2_OFFSET_TABLE_OFFSET + 318 ((uint64_t)S->n_blocks * sizeof(uint64_t)))? 319 0 : S->offset); 320 321 /* snprintf_ss can't do floating-point, so do fixed-point instead. */ 322 const uint64_t ratio_percent = 323 (nread > 0? 324 ((nwritten >= (UINT64_MAX / 100)) ? 325 ((nwritten / nread) * 100) : ((nwritten * 100) / nread)) 326 : 0); 327 328 /* Format the status. */ 329 assert(S->n_checkpointed_blocks <= MAX_N_BLOCKS); 330 assert(S->blocksize <= MAX_BLOCKSIZE); 331 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE)); 332 const int n = snprintf_ss(buf, sizeof(buf), 333 "vndcompress: read %"PRIu64" bytes, wrote %"PRIu64" bytes, " 334 "compression ratio %"PRIu64"%% (checkpointed %"PRIu64" bytes)\n", 335 nread, nwritten, ratio_percent, 336 ((uint64_t)S->n_checkpointed_blocks * (uint64_t)S->blocksize)); 337 if (n < 0) { 338 const char msg[] = "vndcompress: can't format info\n"; 339 (void)write(STDERR_FILENO, msg, __arraycount(msg)); 340 } else { 341 __CTASSERT(INT_MAX <= SIZE_MAX); 342 (void)write(STDERR_FILENO, buf, (size_t)n); 343 } 344 345out: 346 /* Restore errno. */ 347 errno = error; 348} 349 350static void 351checkpoint_signal_handler(int signo __unused) 352{ 353 /* Save errno. */ 354 const int error = errno; 355 struct compress_state *const S = &global_state; 356 357 /* Bail if the state is not yet initialized. */ 358 if (!S->initialized) { 359 warnx_ss("nothing to checkpoint yet"); 360 goto out; 361 } 362 363 assert(S->image_fd >= 0); 364 assert(S->cloop2_fd >= 0); 365 366 /* Take a checkpoint. */ 367 assert(S->blkno <= MAX_N_BLOCKS); 368 assert(S->blocksize <= MAX_BLOCKSIZE); 369 __CTASSERT(MUL_OK(uint64_t, MAX_N_BLOCKS, MAX_BLOCKSIZE)); 370 warnx_ss("checkpointing %"PRIu64" bytes", 371 ((uint64_t)S->blkno * (uint64_t)S->blocksize)); 372 compress_checkpoint(S); 373 374out: 375 /* Restore errno. */ 376 errno = error; 377} 378 379/* 380 * Report progress. 381 * 382 * XXX Should do a progress bar here. 383 */ 384static void 385compress_progress(struct compress_state *S __unused) 386{ 387} 388 389/* 390 * Parse arguments, open the files, and initialize the state. 391 */ 392static void 393compress_init(int argc, char **argv, const struct options *O, 394 struct compress_state *S) 395{ 396 397 if (!((argc == 2) || (argc == 3))) 398 usage(); 399 400 const char *const image_pathname = argv[0]; 401 const char *const cloop2_pathname = argv[1]; 402 403 /* Grab the block size either from `-b' or from the last argument. */ 404 __CTASSERT(0 < DEV_BSIZE); 405 __CTASSERT((MIN_BLOCKSIZE % DEV_BSIZE) == 0); 406 __CTASSERT(MIN_BLOCKSIZE <= DEF_BLOCKSIZE); 407 __CTASSERT((DEF_BLOCKSIZE % DEV_BSIZE) == 0); 408 __CTASSERT(DEF_BLOCKSIZE <= MAX_BLOCKSIZE); 409 __CTASSERT((MAX_BLOCKSIZE % DEV_BSIZE) == 0); 410 if (ISSET(O->flags, FLAG_b)) { 411 if (argc == 3) { 412 warnx("use -b or the extra argument, not both"); 413 usage(); 414 } 415 S->blocksize = O->blocksize; 416 } else { 417 S->blocksize = (argc == 2? DEF_BLOCKSIZE : 418 strsuftoll("block size", argv[2], MIN_BLOCKSIZE, 419 MAX_BLOCKSIZE)); 420 } 421 422 /* Sanity-check the blocksize. (strsuftoll guarantees bounds.) */ 423 __CTASSERT(DEV_BSIZE <= UINT32_MAX); 424 if ((S->blocksize % DEV_BSIZE) != 0) 425 errx(1, "bad blocksize: %"PRIu32 426 " (not a multiple of %"PRIu32")", 427 S->blocksize, (uint32_t)DEV_BSIZE); 428 assert(MIN_BLOCKSIZE <= S->blocksize); 429 assert((S->blocksize % DEV_BSIZE) == 0); 430 assert(S->blocksize <= MAX_BLOCKSIZE); 431 432 /* Grab the end block number if we have one. */ 433 S->end_block = (ISSET(O->flags, FLAG_p)? O->end_block : 0); 434 435 /* Grab the checkpoint block count, if we have one. */ 436 S->checkpoint_blocks = 437 (ISSET(O->flags, FLAG_k)? O->checkpoint_blocks : 0); 438 439 /* Open the input image file and the output cloop2 file. */ 440 S->image_fd = open(image_pathname, O_RDONLY); 441 if (S->image_fd == -1) 442 err(1, "open(%s)", image_pathname); 443 444 int oflags; 445 if (!ISSET(O->flags, FLAG_r)) 446 oflags = (O_WRONLY | O_TRUNC | O_CREAT); 447 else if (!ISSET(O->flags, FLAG_R)) 448 oflags = (O_RDWR | O_CREAT); 449 else 450 oflags = O_RDWR; 451 S->cloop2_fd = open(cloop2_pathname, oflags, 0777); 452 if (S->cloop2_fd == -1) 453 err(1, "open(%s)", cloop2_pathname); 454 455 /* Find the size of the input image. */ 456 if (ISSET(O->flags, FLAG_l)) { 457 S->size = O->length; 458 } else { 459 static const struct stat zero_st; 460 struct stat st = zero_st; 461 if (fstat(S->image_fd, &st) == -1) 462 err(1, "stat(%s)", image_pathname); 463 if (st.st_size <= 0) 464 errx(1, "unknown image size"); 465 assert(st.st_size >= 0); 466 __CTASSERT(OFF_MAX <= UINT64_MAX); 467 assert(__type_fit(uint64_t, st.st_size)); 468 S->size = st.st_size; 469 } 470 assert(S->size <= OFF_MAX); 471 472 /* Find number of full blocks and whether there's a partial block. */ 473 __CTASSERT(0 < MIN_BLOCKSIZE); 474 assert(0 < S->blocksize); 475 if (TOOMANY(off_t, (off_t)S->size, (off_t)S->blocksize, 476 (off_t)MAX_N_BLOCKS)) 477 errx(1, "image too large for block size %"PRIu32": %"PRIu64, 478 S->blocksize, S->size); 479 __CTASSERT(MAX_N_BLOCKS <= UINT32_MAX); 480 S->n_full_blocks = S->size/S->blocksize; 481 S->n_blocks = HOWMANY(S->size, S->blocksize); 482 assert(S->n_full_blocks <= S->n_blocks); 483 assert(S->n_blocks <= MAX_N_BLOCKS); 484 485 /* Choose a window size. */ 486 const uint32_t window_size = (ISSET(O->flags, FLAG_w)? O->window_size : 487 DEF_WINDOW_SIZE); 488 489 /* Create an offset table for the blocks; one extra for the end. */ 490 __CTASSERT(ADD_OK(uint32_t, MAX_N_BLOCKS, 1)); 491 S->n_offsets = (S->n_blocks + 1); 492 __CTASSERT(MAX_N_OFFSETS == (MAX_N_BLOCKS + 1)); 493 __CTASSERT(MUL_OK(size_t, MAX_N_OFFSETS, sizeof(uint64_t))); 494 __CTASSERT(CLOOP2_OFFSET_TABLE_OFFSET <= OFFTAB_MAX_FDPOS); 495 offtab_init(&S->offtab, S->n_offsets, window_size, S->cloop2_fd, 496 CLOOP2_OFFSET_TABLE_OFFSET); 497 498 /* Attempt to restart a partial transfer if requested. */ 499 if (ISSET(O->flags, FLAG_r)) { 500 if (compress_restart(S)) { 501 /* 502 * Restart succeeded. Truncate the output 503 * here, in case any garbage got appended. We 504 * are committed to making progress at this 505 * point. If the ftruncate fails, we don't 506 * lose anything valuable -- this is the last 507 * point at which we can restart anyway. 508 */ 509 if (ftruncate(S->cloop2_fd, S->offset) == -1) 510 err(1, "ftruncate failed"); 511 512 /* All set! No more initialization to do. */ 513 return; 514 } else { 515 /* Restart failed. Barf now if requested. */ 516 if (ISSET(O->flags, FLAG_R)) 517 errx(1, "restart failed, aborting"); 518 519 /* Otherwise, truncate and start at the top. */ 520 if (ftruncate(S->cloop2_fd, 0) == -1) 521 err(1, "truncate failed"); 522 if (lseek(S->cloop2_fd, 0, SEEK_SET) == -1) 523 err(1, "lseek to cloop2 beginning failed"); 524 525 /* If we seeked in the input, rewind. */ 526 if (S->blkno != 0) { 527 if (lseek(S->image_fd, 0, SEEK_SET) == -1) 528 err(1, 529 "lseek to image beginning failed"); 530 } 531 } 532 } 533 534 /* Write a bogus (zero) header for now, until we checkpoint. */ 535 static const struct cloop2_header zero_header; 536 const ssize_t h_written = write(S->cloop2_fd, &zero_header, 537 sizeof(zero_header)); 538 if (h_written == -1) 539 err(1, "write header"); 540 assert(h_written >= 0); 541 if ((size_t)h_written != sizeof(zero_header)) 542 errx(1, "partial write of header: %zu != %zu", 543 (size_t)h_written, sizeof(zero_header)); 544 545 /* Reset the offset table to be empty and write it. */ 546 offtab_reset_write(&S->offtab); 547 548 /* Start at the beginning of the image. */ 549 S->blkno = 0; 550 S->offset = (sizeof(struct cloop2_header) + 551 ((uint64_t)S->n_offsets * sizeof(uint64_t))); 552 S->n_checkpointed_blocks = 0; 553 554 /* Good to go and ready for interruption by a signal. */ 555 S->initialized = 1; 556} 557 558/* 559 * Try to recover state from an existing output file. 560 * 561 * On success, fill the offset table with what's in the file, set 562 * S->blkno and S->offset to reflect our position, and seek to the 563 * respective positions in the input and output files. 564 * 565 * On failure, return false. May clobber the offset table, S->blkno, 566 * S->offset, and the file pointers. 567 */ 568static bool 569compress_restart(struct compress_state *S) 570{ 571 572 /* Read in the header. */ 573 static const struct cloop2_header zero_header; 574 struct cloop2_header header = zero_header; 575 576 const ssize_t h_read = read_block(S->cloop2_fd, &header, 577 sizeof(header)); 578 if (h_read == -1) { 579 warn("failed to read header"); 580 return false; 581 } 582 assert(h_read >= 0); 583 if ((size_t)h_read != sizeof(header)) { 584 warnx("partial read of header"); 585 return false; 586 } 587 588 /* Check that the header looks like a header. */ 589 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 590 if (memcmp(header.cl2h_magic, cloop2_magic, sizeof(cloop2_magic)) 591 != 0) { 592 warnx("bad cloop2 shell script magic"); 593 return false; 594 } 595 596 /* Check the header parameters. */ 597 if (be32toh(header.cl2h_blocksize) != S->blocksize) { 598 warnx("mismatched block size: %"PRIu32 599 " (expected %"PRIu32")", 600 be32toh(header.cl2h_blocksize), S->blocksize); 601 return false; 602 } 603 if (be32toh(header.cl2h_n_blocks) != S->n_blocks) { 604 warnx("mismatched number of blocks: %"PRIu32 605 " (expected %"PRIu32")", 606 be32toh(header.cl2h_n_blocks), S->n_blocks); 607 return false; 608 } 609 610 /* Read in the partial offset table. */ 611 if (!offtab_reset_read(&S->offtab, &warn, &warnx)) 612 return false; 613 if (!offtab_prepare_get(&S->offtab, 0)) 614 return false; 615 const uint64_t first_offset = offtab_get(&S->offtab, 0); 616 __CTASSERT(MUL_OK(uint64_t, MAX_N_OFFSETS, sizeof(uint64_t))); 617 __CTASSERT(ADD_OK(uint64_t, sizeof(struct cloop2_header), 618 MAX_N_OFFSETS*sizeof(uint64_t))); 619 const uint64_t expected = sizeof(struct cloop2_header) + 620 ((uint64_t)S->n_offsets * sizeof(uint64_t)); 621 if (first_offset != expected) { 622 warnx("first offset is not 0x%"PRIx64": 0x%"PRIx64, 623 expected, first_offset); 624 return false; 625 } 626 627 /* Find where we left off. */ 628 __CTASSERT(MAX_N_OFFSETS <= UINT32_MAX); 629 uint32_t blkno = 0; 630 uint64_t last_offset = first_offset; 631 for (blkno = 0; blkno < S->n_blocks; blkno++) { 632 if (!offtab_prepare_get(&S->offtab, blkno)) 633 return false; 634 const uint64_t offset = offtab_get(&S->offtab, blkno); 635 if (offset == ~(uint64_t)0) 636 break; 637 638 if (0 < blkno) { 639 const uint64_t start = last_offset; 640 const uint64_t end = offset; 641 if (end <= start) { 642 warnx("bad offset table: 0x%"PRIx64 643 ", 0x%"PRIx64, start, end); 644 return false; 645 } 646 /* XXX compression ratio bound */ 647 __CTASSERT(MUL_OK(size_t, 2, MAX_BLOCKSIZE)); 648 if ((2 * (size_t)S->blocksize) <= (end - start)) { 649 warnx("block %"PRIu32" too large:" 650 " %"PRIu64" bytes" 651 " from 0x%"PRIx64" to 0x%"PRIx64, 652 blkno, (end - start), start, end); 653 return false; 654 } 655 } 656 657 last_offset = offset; 658 } 659 660 if (blkno == 0) { 661 warnx("no blocks were written; nothing to restart"); 662 return false; 663 } 664 665 /* Make sure the rest of the offset table is all ones. */ 666 if (blkno < S->n_blocks) { 667 uint32_t nblkno; 668 669 for (nblkno = blkno; nblkno < S->n_blocks; nblkno++) { 670 if (!offtab_prepare_get(&S->offtab, nblkno)) 671 return false; 672 const uint64_t offset = offtab_get(&S->offtab, nblkno); 673 if (offset != ~(uint64_t)0) { 674 warnx("bad partial offset table entry" 675 " at %"PRIu32": 0x%"PRIx64, 676 nblkno, offset); 677 return false; 678 } 679 } 680 } 681 682 /* 683 * XXX Consider decompressing some number of blocks to make 684 * sure they match. 685 */ 686 687 /* Back up by one. */ 688 assert(1 <= blkno); 689 blkno -= 1; 690 691 /* Seek to the output position. */ 692 assert(last_offset <= OFF_MAX); 693 if (lseek(S->cloop2_fd, last_offset, SEEK_SET) == -1) { 694 warn("lseek output cloop2 to %"PRIx64" failed", last_offset); 695 return false; 696 } 697 698 /* Switch from reading to writing the offset table. */ 699 if (!offtab_transmogrify_read_to_write(&S->offtab, blkno)) 700 return false; 701 702 /* 703 * Seek to the input position last, after all other possible 704 * failures, because if the input is a pipe, we can't change 705 * our mind, rewind, and start at the beginning instead of 706 * restarting. 707 */ 708 assert(S->size <= OFF_MAX); 709 assert(blkno <= (S->size / S->blocksize)); 710 const off_t restart_position = ((off_t)blkno * (off_t)S->blocksize); 711 assert(0 <= restart_position); 712 assert(restart_position <= (off_t)S->size); 713 if (lseek(S->image_fd, restart_position, SEEK_SET) == -1) { 714 if (errno != ESPIPE) { 715 warn("lseek input image failed"); 716 return false; 717 } 718 719 /* Try read instead of lseek for a pipe/socket/fifo. */ 720 void *const buffer = malloc(0x10000); 721 if (buffer == NULL) 722 err(1, "malloc temporary buffer"); 723 off_t left = restart_position; 724 while (left > 0) { 725 const size_t size = MIN(0x10000, left); 726 const ssize_t n_read = read_block(S->image_fd, buffer, 727 size); 728 if (n_read == -1) { 729 free(buffer); 730 warn("read of input image failed"); 731 return false; 732 } 733 assert(n_read >= 0); 734 if ((size_t)n_read != size) { 735 free(buffer); 736 warnx("partial read of input image"); 737 return false; 738 } 739 assert((off_t)size <= left); 740 left -= size; 741 } 742 free(buffer); 743 } 744 745 /* Start where we left off. */ 746 S->blkno = blkno; 747 S->offset = last_offset; 748 S->n_checkpointed_blocks = blkno; 749 750 /* Good to go and ready for interruption by a signal. */ 751 S->initialized = 1; 752 753 /* Success! */ 754 return true; 755} 756 757/* 758 * Read a single block, compress it, and write the compressed block. 759 * Return the size of the compressed block. 760 */ 761static uint32_t 762compress_block(int in_fd, int out_fd, uint32_t blkno, uint32_t blocksize, 763 uint32_t readsize, void *uncompbuf, void *compbuf) 764{ 765 766 assert(readsize <= blocksize); 767 assert(blocksize <= MAX_BLOCKSIZE); 768 769 /* Read the uncompressed block. */ 770 const ssize_t n_read = read_block(in_fd, uncompbuf, readsize); 771 if (n_read == -1) 772 err(1, "read block %"PRIu32, blkno); 773 assert(n_read >= 0); 774 if ((size_t)n_read != readsize) 775 errx(1, "partial read of block %"PRIu32": %zu != %"PRIu32, 776 blkno, (size_t)n_read, readsize); 777 778 /* Compress the block. */ 779 /* XXX compression ratio bound */ 780 __CTASSERT(MUL_OK(unsigned long, 2, MAX_BLOCKSIZE)); 781 const unsigned long uncomplen = 782 (VNDCOMPRESS_COMPAT? blocksize : readsize); /* XXX */ 783 unsigned long complen = (uncomplen * 2); 784 const int zerror = compress2(compbuf, &complen, uncompbuf, uncomplen, 785 Z_BEST_COMPRESSION); 786 if (zerror != Z_OK) 787 errx(1, "compressed failed at block %"PRIu32" (%d): %s", blkno, 788 zerror, zError(zerror)); 789 assert(complen <= (uncomplen * 2)); 790 791 /* Write the compressed block. */ 792 const ssize_t n_written = write(out_fd, compbuf, complen); 793 if (n_written == -1) 794 err(1, "write block %"PRIu32, blkno); 795 assert(n_written >= 0); 796 if ((size_t)n_written != complen) 797 errx(1, "partial write of block %"PRIu32": %zu != %lu", 798 blkno, (size_t)n_written, complen); 799 800 return (size_t)n_written; 801} 802 803/* 804 * Checkpoint if appropriate. 805 */ 806static void 807compress_maybe_checkpoint(struct compress_state *S) 808{ 809 810 if ((0 < S->checkpoint_blocks) && (0 < S->blkno) && 811 ((S->blkno % S->checkpoint_blocks) == 0)) { 812 assert(S->offset <= OFF_MAX); 813 assert((off_t)S->offset == lseek(S->cloop2_fd, 0, SEEK_CUR)); 814 compress_checkpoint(S); 815 } 816} 817 818/* 819 * Write the prefix of the offset table that we have filled so far. 820 * 821 * We fsync the data blocks we have written, and then write the offset 822 * table, and then fsync the offset table and file metadata. This 823 * should help to avoid offset tables that point at garbage data. 824 * 825 * This may be called from a signal handler, so it must not use stdio, 826 * malloc, &c. -- it may only (a) handle signal-safe state in S, and 827 * (b) do file descriptor I/O / fsync. 828 * 829 * XXX This requires further thought and heavy testing to be sure. 830 * 831 * XXX Should have an option to suppress fsync. 832 * 833 * XXX Should have an option to fail on fsync failures. 834 * 835 * XXX Would be nice if we could just do a barrier rather than an 836 * fsync. 837 * 838 * XXX How might we automatically test the fsyncs? 839 */ 840static void 841compress_checkpoint(struct compress_state *S) 842{ 843 844 assert(S->blkno < S->n_offsets); 845 const uint32_t n_offsets = (S->blkno + 1); 846 assert(n_offsets <= S->n_offsets); 847 848 assert(S->offset <= OFF_MAX); 849 assert((off_t)S->offset <= lseek(S->cloop2_fd, 0, SEEK_CUR)); 850 851 /* Make sure the data hits the disk before we say it's ready. */ 852 if (fsync_range(S->cloop2_fd, (FFILESYNC | FDISKSYNC), 0, S->offset) 853 == -1) 854 warn_ss("fsync of output failed"); 855 856 /* Say the data blocks are ready. */ 857 offtab_checkpoint(&S->offtab, n_offsets, 858 (S->n_checkpointed_blocks == 0? OFFTAB_CHECKPOINT_SYNC : 0)); 859 860 /* 861 * If this is the first checkpoint, initialize the header. 862 * Signal handler can race with main code here, but it is 863 * harmless -- just an extra fsync and write of the header, 864 * which are both idempotent. 865 * 866 * Once we have synchronously checkpointed the offset table, 867 * subsequent writes will preserve a valid state. 868 */ 869 if (S->n_checkpointed_blocks == 0) { 870 static const struct cloop2_header zero_header; 871 struct cloop2_header header = zero_header; 872 873 /* Format the header. */ 874 __CTASSERT(sizeof(cloop2_magic) <= sizeof(header.cl2h_magic)); 875 (void)memcpy(header.cl2h_magic, cloop2_magic, 876 sizeof(cloop2_magic)); 877 header.cl2h_blocksize = htobe32(S->blocksize); 878 header.cl2h_n_blocks = htobe32(S->n_blocks); 879 880 /* Write the header. */ 881 const ssize_t h_written = pwrite(S->cloop2_fd, &header, 882 sizeof(header), 0); 883 if (h_written == -1) 884 err_ss(1, "write header"); 885 assert(h_written >= 0); 886 if ((size_t)h_written != sizeof(header)) 887 errx_ss(1, "partial write of header: %zu != %zu", 888 (size_t)h_written, sizeof(header)); 889 } 890 891 /* Record how many blocks we've checkpointed. */ 892 { 893 sigset_t old_sigmask; 894 block_signals(&old_sigmask); 895 S->n_checkpointed_blocks = S->blkno; 896 restore_sigmask(&old_sigmask); 897 } 898} 899 900/* 901 * Release everything we allocated in compress_init. 902 */ 903static void 904compress_exit(struct compress_state *S) 905{ 906 907 /* Done with the offset table. Destroy it. */ 908 offtab_destroy(&S->offtab); 909 910 /* Done with the files. Close them. */ 911 if (close(S->cloop2_fd) == -1) 912 warn("close(cloop2 fd)"); 913 if (close(S->image_fd) == -1) 914 warn("close(image fd)"); 915} 916