pax.c revision 99110
11556Srgrimes/*- 21556Srgrimes * Copyright (c) 1992 Keith Muller. 31556Srgrimes * Copyright (c) 1992, 1993 41556Srgrimes * The Regents of the University of California. All rights reserved. 51556Srgrimes * 61556Srgrimes * This code is derived from software contributed to Berkeley by 71556Srgrimes * Keith Muller of the University of California, San Diego. 81556Srgrimes * 91556Srgrimes * Redistribution and use in source and binary forms, with or without 101556Srgrimes * modification, are permitted provided that the following conditions 111556Srgrimes * are met: 121556Srgrimes * 1. Redistributions of source code must retain the above copyright 131556Srgrimes * notice, this list of conditions and the following disclaimer. 141556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151556Srgrimes * notice, this list of conditions and the following disclaimer in the 161556Srgrimes * documentation and/or other materials provided with the distribution. 171556Srgrimes * 3. All advertising materials mentioning features or use of this software 181556Srgrimes * must display the following acknowledgement: 191556Srgrimes * This product includes software developed by the University of 201556Srgrimes * California, Berkeley and its contributors. 211556Srgrimes * 4. Neither the name of the University nor the names of its contributors 221556Srgrimes * may be used to endorse or promote products derived from this software 231556Srgrimes * without specific prior written permission. 241556Srgrimes * 251556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281556Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351556Srgrimes * SUCH DAMAGE. 361556Srgrimes */ 371556Srgrimes 381556Srgrimes#ifndef lint 3920420Sstevestatic char const copyright[] = 401556Srgrimes"@(#) Copyright (c) 1992, 1993\n\ 411556Srgrimes The Regents of the University of California. All rights reserved.\n"; 421556Srgrimes#endif /* not lint */ 431556Srgrimes 441556Srgrimes#ifndef lint 4536049Scharnier#if 0 4636049Scharnierstatic char sccsid[] = "@(#)pax.c 8.2 (Berkeley) 4/18/94"; 4736049Scharnier#endif 481556Srgrimes#endif /* not lint */ 4999110Sobrien#include <sys/cdefs.h> 5099110Sobrien__FBSDID("$FreeBSD: head/bin/pax/pax.c 99110 2002-06-30 05:15:05Z obrien $"); 511556Srgrimes 521556Srgrimes#include <sys/types.h> 531556Srgrimes#include <sys/stat.h> 541556Srgrimes#include <sys/time.h> 551556Srgrimes#include <sys/resource.h> 5676286Skris#include <err.h> 5731666Seivind#include <errno.h> 5876351Skris#include <fcntl.h> 5931666Seivind#include <locale.h> 6076016Skris#include <paths.h> 611556Srgrimes#include <signal.h> 6231666Seivind#include <stdio.h> 6331666Seivind#include <stdlib.h> 6478732Sdd#include <string.h> 651556Srgrimes#include <unistd.h> 661556Srgrimes#include "pax.h" 671556Srgrimes#include "extern.h" 6890110Simpstatic int gen_init(void); 691556Srgrimes 701556Srgrimes/* 711556Srgrimes * PAX main routines, general globals and some simple start up routines 721556Srgrimes */ 731556Srgrimes 741556Srgrimes/* 751556Srgrimes * Variables that can be accessed by any routine within pax 761556Srgrimes */ 771556Srgrimesint act = DEFOP; /* read/write/append/copy */ 781556SrgrimesFSUB *frmt = NULL; /* archive format type */ 791556Srgrimesint cflag; /* match all EXCEPT pattern/file */ 8076351Skrisint cwdfd; /* starting cwd */ 811556Srgrimesint dflag; /* directory member match only */ 821556Srgrimesint iflag; /* interactive file/archive rename */ 831556Srgrimesint kflag; /* do not overwrite existing files */ 841556Srgrimesint lflag; /* use hard links when possible */ 851556Srgrimesint nflag; /* select first archive member match */ 861556Srgrimesint tflag; /* restore access time after read */ 871556Srgrimesint uflag; /* ignore older modification time files */ 881556Srgrimesint vflag; /* produce verbose output */ 891556Srgrimesint Dflag; /* same as uflag except inode change time */ 901556Srgrimesint Hflag; /* follow command line symlinks (write only) */ 911556Srgrimesint Lflag; /* follow symlinks when writing */ 921556Srgrimesint Xflag; /* archive files with same device id only */ 931556Srgrimesint Yflag; /* same as Dflg except after name mode */ 941556Srgrimesint Zflag; /* same as uflg except after name mode */ 951556Srgrimesint vfpart; /* is partial verbose output in progress */ 961556Srgrimesint patime = 1; /* preserve file access time */ 971556Srgrimesint pmtime = 1; /* preserve file modification times */ 9876351Skrisint nodirs; /* do not create directories as needed */ 991556Srgrimesint pmode; /* preserve file mode bits */ 1001556Srgrimesint pids; /* preserve file uid/gid */ 10176351Skrisint rmleadslash = 0; /* remove leading '/' from pathnames */ 1021556Srgrimesint exit_val; /* exit value */ 1031556Srgrimesint docrc; /* check/create file crc */ 1041556Srgrimeschar *dirptr; /* destination dir in a copy */ 1051556Srgrimeschar *argv0; /* root of argv[0] */ 10676351Skrissigset_t s_mask; /* signal mask for cleanup critical sect */ 10781601SpeterFILE *listf; /* file pointer to print file list to */ 10876016Skrischar *tempfile; /* tempfile to use for mkstemp(3) */ 10976016Skrischar *tempbase; /* basename of tempfile to use for mkstemp(3) */ 1101556Srgrimes 1111556Srgrimes/* 1121556Srgrimes * PAX - Portable Archive Interchange 1131556Srgrimes * 1141556Srgrimes * A utility to read, write, and write lists of the members of archive 1151556Srgrimes * files and copy directory hierarchies. A variety of archive formats 1161556Srgrimes * are supported (some are described in POSIX 1003.1 10.1): 1171556Srgrimes * 1181556Srgrimes * ustar - 10.1.1 extended tar interchange format 1191556Srgrimes * cpio - 10.1.2 extended cpio interchange format 1201556Srgrimes * tar - old BSD 4.3 tar format 1211556Srgrimes * binary cpio - old cpio with binary header format 1221556Srgrimes * sysVR4 cpio - with and without CRC 1231556Srgrimes * 1241556Srgrimes * This version is a superset of IEEE Std 1003.2b-d3 1251556Srgrimes * 1261556Srgrimes * Summary of Extensions to the IEEE Standard: 1271556Srgrimes * 1281556Srgrimes * 1 READ ENHANCEMENTS 1291556Srgrimes * 1.1 Operations which read archives will continue to operate even when 1308855Srgrimes * processing archives which may be damaged, truncated, or fail to meet 1311556Srgrimes * format specs in several different ways. Damaged sections of archives 1321556Srgrimes * are detected and avoided if possible. Attempts will be made to resync 1331556Srgrimes * archive read operations even with badly damaged media. 1341556Srgrimes * 1.2 Blocksize requirements are not strictly enforced on archive read. 1351556Srgrimes * Tapes which have variable sized records can be read without errors. 1361556Srgrimes * 1.3 The user can specify via the non-standard option flag -E if error 1371556Srgrimes * resync operation should stop on a media error, try a specified number 1381556Srgrimes * of times to correct, or try to correct forever. 1391556Srgrimes * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks 1401556Srgrimes * of all zeros will be restored with holes appropriate for the target 1411556Srgrimes * filesystem 1421556Srgrimes * 1.5 The user is notified whenever something is found during archive 1431556Srgrimes * read operations which violates spec (but the read will continue). 1441556Srgrimes * 1.6 Multiple archive volumes can be read and may span over different 1458855Srgrimes * archive devices 1461556Srgrimes * 1.7 Rigidly restores all file attributes exactly as they are stored on the 1471556Srgrimes * archive. 1481556Srgrimes * 1.8 Modification change time ranges can be specified via multiple -T 1491556Srgrimes * options. These allow a user to select files whose modification time 1501556Srgrimes * lies within a specific time range. 1511556Srgrimes * 1.9 Files can be selected based on owner (user name or uid) via one or more 1521556Srgrimes * -U options. 1531556Srgrimes * 1.10 Files can be selected based on group (group name or gid) via one o 1541556Srgrimes * more -G options. 15546684Skris * 1.11 File modification time can be checked against existing file after 1561556Srgrimes * name modification (-Z) 1571556Srgrimes * 1581556Srgrimes * 2 WRITE ENHANCEMENTS 1591556Srgrimes * 2.1 Write operation will stop instead of allowing a user to create a flawed 1601556Srgrimes * flawed archive (due to any problem). 16146684Skris * 2.2 Archives written by pax are forced to strictly conform to both the 16246684Skris * archive and pax the specific format specifications. 1631556Srgrimes * 2.3 Blocking size and format is rigidly enforced on writes. 1641556Srgrimes * 2.4 Formats which may exhibit header overflow problems (they have fields 16596702Strhodes * too small for large filesystems, such as inode number storage), use 1661556Srgrimes * routines designed to repair this problem. These techniques still 1671556Srgrimes * conform to both pax and format specifications, but no longer truncate 1681556Srgrimes * these fields. This removes any restrictions on using these archive 16996702Strhodes * formats on large filesystems. 1701556Srgrimes * 2.5 Multiple archive volumes can be written and may span over different 1718855Srgrimes * archive devices 1721556Srgrimes * 2.6 A archive volume record limit allows the user to specify the number 1731556Srgrimes * of bytes stored on an archive volume. When reached the user is 1741556Srgrimes * prompted for the next archive volume. This is specified with the 17546684Skris * non-standard -B flag. The limit is rounded up to the next blocksize. 1761556Srgrimes * 2.7 All archive padding during write use zero filled sections. This makes 1771556Srgrimes * it much easier to pull data out of flawed archive during read 1781556Srgrimes * operations. 1791556Srgrimes * 2.8 Access time reset with the -t applies to all file nodes (including 1801556Srgrimes * directories). 1811556Srgrimes * 2.9 Symbolic links can be followed with -L (optional in the spec). 1821556Srgrimes * 2.10 Modification or inode change time ranges can be specified via 1831556Srgrimes * multiple -T options. These allow a user to select files whose 1841556Srgrimes * modification or inode change time lies within a specific time range. 1851556Srgrimes * 2.11 Files can be selected based on owner (user name or uid) via one or more 1861556Srgrimes * -U options. 1871556Srgrimes * 2.12 Files can be selected based on group (group name or gid) via one o 1881556Srgrimes * more -G options. 1891556Srgrimes * 2.13 Symlinks which appear on the command line can be followed (without 1901556Srgrimes * following other symlinks; -H flag) 1911556Srgrimes * 1921556Srgrimes * 3 COPY ENHANCEMENTS 1931556Srgrimes * 3.1 Sparse files (lseek holes) can be copied without expanding the holes 1941556Srgrimes * into zero filled blocks. The file copy is created with holes which are 1951556Srgrimes * appropriate for the target filesystem 1961556Srgrimes * 3.2 Access time as well as modification time on copied file trees can be 1971556Srgrimes * preserved with the appropriate -p options. 1981556Srgrimes * 3.3 Access time reset with the -t applies to all file nodes (including 1991556Srgrimes * directories). 2001556Srgrimes * 3.4 Symbolic links can be followed with -L (optional in the spec). 2011556Srgrimes * 3.5 Modification or inode change time ranges can be specified via 2021556Srgrimes * multiple -T options. These allow a user to select files whose 2031556Srgrimes * modification or inode change time lies within a specific time range. 2041556Srgrimes * 3.6 Files can be selected based on owner (user name or uid) via one or more 2051556Srgrimes * -U options. 2061556Srgrimes * 3.7 Files can be selected based on group (group name or gid) via one o 2071556Srgrimes * more -G options. 2081556Srgrimes * 3.8 Symlinks which appear on the command line can be followed (without 2091556Srgrimes * following other symlinks; -H flag) 21046684Skris * 3.9 File inode change time can be checked against existing file before 2111556Srgrimes * name modification (-D) 21246684Skris * 3.10 File inode change time can be checked against existing file after 2131556Srgrimes * name modification (-Y) 21446684Skris * 3.11 File modification time can be checked against existing file after 2151556Srgrimes * name modification (-Z) 2161556Srgrimes * 2171556Srgrimes * 4 GENERAL ENHANCEMENTS 2188855Srgrimes * 4.1 Internal structure is designed to isolate format dependent and 2191556Srgrimes * independent functions. Formats are selected via a format driver table. 2201556Srgrimes * This encourages the addition of new archive formats by only having to 2211556Srgrimes * write those routines which id, read and write the archive header. 2221556Srgrimes */ 2231556Srgrimes 2241556Srgrimes/* 2251556Srgrimes * main() 2261556Srgrimes * parse options, set up and operate as specified by the user. 2271556Srgrimes * any operational flaw will set exit_val to non-zero 2281556Srgrimes * Return: 0 if ok, 1 otherwise 2291556Srgrimes */ 2301556Srgrimes 2311556Srgrimesint 23290110Simpmain(int argc, char *argv[]) 2331556Srgrimes{ 23476016Skris char *tmpdir; 23576016Skris size_t tdlen; 23676016Skris 23717517Sache (void) setlocale(LC_ALL, ""); 23881601Speter listf = stderr; 23976351Skris /* 24076351Skris * Keep a reference to cwd, so we can always come back home. 24176351Skris */ 24276351Skris cwdfd = open(".", O_RDONLY); 24376351Skris if (cwdfd < 0) { 24476351Skris syswarn(0, errno, "Can't open current working directory."); 24576351Skris return(exit_val); 24676351Skris } 24776016Skris 2481556Srgrimes /* 24976016Skris * Where should we put temporary files? 25076016Skris */ 25176016Skris if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 25276016Skris tmpdir = _PATH_TMP; 25376016Skris tdlen = strlen(tmpdir); 25476016Skris while(tdlen > 0 && tmpdir[tdlen - 1] == '/') 25576016Skris tdlen--; 25676016Skris tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); 25776016Skris if (tempfile == NULL) { 25876017Skris paxwarn(1, "Cannot allocate memory for temp file name."); 25976016Skris return(exit_val); 26076016Skris } 26176016Skris if (tdlen) 26276016Skris memcpy(tempfile, tmpdir, tdlen); 26376016Skris tempbase = tempfile + tdlen; 26476016Skris *tempbase++ = '/'; 26576016Skris 26676016Skris /* 2671556Srgrimes * parse options, determine operational mode, general init 2681556Srgrimes */ 2691556Srgrimes options(argc, argv); 27076019Skris if ((gen_init() < 0) || (tty_init() < 0)) 2711556Srgrimes return(exit_val); 2721556Srgrimes 2731556Srgrimes /* 2748855Srgrimes * select a primary operation mode 2751556Srgrimes */ 2761556Srgrimes switch(act) { 2771556Srgrimes case EXTRACT: 2781556Srgrimes extract(); 2791556Srgrimes break; 2801556Srgrimes case ARCHIVE: 2811556Srgrimes archive(); 2821556Srgrimes break; 2831556Srgrimes case APPND: 28476286Skris if (gzip_program != NULL) 28576286Skris err(1, "can not gzip while appending"); 2861556Srgrimes append(); 2871556Srgrimes break; 2881556Srgrimes case COPY: 2891556Srgrimes copy(); 2901556Srgrimes break; 2911556Srgrimes default: 2921556Srgrimes case LIST: 2931556Srgrimes list(); 2941556Srgrimes break; 2951556Srgrimes } 2961556Srgrimes return(exit_val); 2971556Srgrimes} 2981556Srgrimes 2991556Srgrimes/* 3001556Srgrimes * sig_cleanup() 3011556Srgrimes * when interrupted we try to do whatever delayed processing we can. 3021556Srgrimes * This is not critical, but we really ought to limit our damage when we 3031556Srgrimes * are aborted by the user. 3041556Srgrimes * Return: 3051556Srgrimes * never.... 3061556Srgrimes */ 3071556Srgrimes 3081556Srgrimesvoid 3091556Srgrimessig_cleanup(int which_sig) 3101556Srgrimes{ 3111556Srgrimes /* 3121556Srgrimes * restore modes and times for any dirs we may have created 3131556Srgrimes * or any dirs we may have read. Set vflag and vfpart so the user 3141556Srgrimes * will clearly see the message on a line by itself. 3151556Srgrimes */ 3161556Srgrimes vflag = vfpart = 1; 3171556Srgrimes if (which_sig == SIGXCPU) 31876017Skris paxwarn(0, "Cpu time limit reached, cleaning up."); 3191556Srgrimes else 32076017Skris paxwarn(0, "Signal caught, cleaning up."); 3211556Srgrimes 3221556Srgrimes ar_close(); 3231556Srgrimes proc_dir(); 3241556Srgrimes if (tflag) 3251556Srgrimes atdir_end(); 3261556Srgrimes exit(1); 3271556Srgrimes} 3281556Srgrimes 3291556Srgrimes/* 3301556Srgrimes * gen_init() 3311556Srgrimes * general setup routines. Not all are required, but they really help 3321556Srgrimes * when dealing with a medium to large sized archives. 3331556Srgrimes */ 3341556Srgrimes 3351556Srgrimesstatic int 3361556Srgrimesgen_init(void) 3371556Srgrimes{ 3381556Srgrimes struct rlimit reslimit; 3391556Srgrimes struct sigaction n_hand; 3401556Srgrimes struct sigaction o_hand; 3411556Srgrimes 3421556Srgrimes /* 3431556Srgrimes * Really needed to handle large archives. We can run out of memory for 3441556Srgrimes * internal tables really fast when we have a whole lot of files... 3451556Srgrimes */ 3461556Srgrimes if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ 3471556Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 3481556Srgrimes (void)setrlimit(RLIMIT_DATA , &reslimit); 3491556Srgrimes } 3501556Srgrimes 3511556Srgrimes /* 3521556Srgrimes * should file size limits be waived? if the os limits us, this is 3531556Srgrimes * needed if we want to write a large archive 3541556Srgrimes */ 3551556Srgrimes if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ 3561556Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 3571556Srgrimes (void)setrlimit(RLIMIT_FSIZE , &reslimit); 3581556Srgrimes } 3591556Srgrimes 3601556Srgrimes /* 3611556Srgrimes * increase the size the stack can grow to 3621556Srgrimes */ 3631556Srgrimes if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ 3641556Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 3651556Srgrimes (void)setrlimit(RLIMIT_STACK , &reslimit); 3661556Srgrimes } 3671556Srgrimes 3681556Srgrimes /* 3691556Srgrimes * not really needed, but doesn't hurt 3701556Srgrimes */ 3711556Srgrimes if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ 3721556Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 3731556Srgrimes (void)setrlimit(RLIMIT_RSS , &reslimit); 3741556Srgrimes } 3751556Srgrimes 3761556Srgrimes /* 3771556Srgrimes * signal handling to reset stored directory times and modes. Since 3781556Srgrimes * we deal with broken pipes via failed writes we ignore it. We also 3791556Srgrimes * deal with any file size limit thorugh failed writes. Cpu time 3801556Srgrimes * limits are caught and a cleanup is forced. 3811556Srgrimes */ 3821556Srgrimes if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || 3831556Srgrimes (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || 3841556Srgrimes (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || 3851556Srgrimes (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { 38676017Skris paxwarn(1, "Unable to set up signal mask"); 3871556Srgrimes return(-1); 3881556Srgrimes } 38976351Skris memset(&n_hand, 0, sizeof n_hand); 3901556Srgrimes n_hand.sa_mask = s_mask; 3911556Srgrimes n_hand.sa_flags = 0; 3921556Srgrimes n_hand.sa_handler = sig_cleanup; 3931556Srgrimes 3941556Srgrimes if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) && 3958855Srgrimes (o_hand.sa_handler == SIG_IGN) && 3961556Srgrimes (sigaction(SIGHUP, &o_hand, &o_hand) < 0)) 3971556Srgrimes goto out; 3981556Srgrimes 3991556Srgrimes if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) && 4008855Srgrimes (o_hand.sa_handler == SIG_IGN) && 4011556Srgrimes (sigaction(SIGTERM, &o_hand, &o_hand) < 0)) 4021556Srgrimes goto out; 4031556Srgrimes 4041556Srgrimes if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) && 4058855Srgrimes (o_hand.sa_handler == SIG_IGN) && 4061556Srgrimes (sigaction(SIGINT, &o_hand, &o_hand) < 0)) 4071556Srgrimes goto out; 4081556Srgrimes 4091556Srgrimes if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) && 4108855Srgrimes (o_hand.sa_handler == SIG_IGN) && 4111556Srgrimes (sigaction(SIGQUIT, &o_hand, &o_hand) < 0)) 4121556Srgrimes goto out; 4131556Srgrimes 4141556Srgrimes if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) && 4158855Srgrimes (o_hand.sa_handler == SIG_IGN) && 4161556Srgrimes (sigaction(SIGXCPU, &o_hand, &o_hand) < 0)) 4171556Srgrimes goto out; 4181556Srgrimes 4191556Srgrimes n_hand.sa_handler = SIG_IGN; 4201556Srgrimes if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) || 4211556Srgrimes (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0)) 4221556Srgrimes goto out; 4231556Srgrimes return(0); 4241556Srgrimes 4251556Srgrimes out: 42676017Skris syswarn(1, errno, "Unable to set up signal handler"); 4271556Srgrimes return(-1); 4281556Srgrimes} 429