11556Srgrimes/*-
21556Srgrimes * Copyright (c) 1992 Keith Muller.
31556Srgrimes * Copyright (c) 1992, 1993
41556Srgrimes *	The Regents of the University of California.  All rights reserved.
51556Srgrimes *
61556Srgrimes * This code is derived from software contributed to Berkeley by
71556Srgrimes * Keith Muller of the University of California, San Diego.
81556Srgrimes *
91556Srgrimes * Redistribution and use in source and binary forms, with or without
101556Srgrimes * modification, are permitted provided that the following conditions
111556Srgrimes * are met:
121556Srgrimes * 1. Redistributions of source code must retain the above copyright
131556Srgrimes *    notice, this list of conditions and the following disclaimer.
141556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
151556Srgrimes *    notice, this list of conditions and the following disclaimer in the
161556Srgrimes *    documentation and/or other materials provided with the distribution.
171556Srgrimes * 4. Neither the name of the University nor the names of its contributors
181556Srgrimes *    may be used to endorse or promote products derived from this software
191556Srgrimes *    without specific prior written permission.
201556Srgrimes *
211556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311556Srgrimes * SUCH DAMAGE.
321556Srgrimes */
331556Srgrimes
341556Srgrimes#ifndef lint
3536049Scharnier#if 0
3636049Scharnierstatic char sccsid[] = "@(#)tables.c	8.1 (Berkeley) 5/31/93";
3736049Scharnier#endif
381556Srgrimes#endif /* not lint */
3999110Sobrien#include <sys/cdefs.h>
4099110Sobrien__FBSDID("$FreeBSD$");
411556Srgrimes
421556Srgrimes#include <sys/types.h>
431556Srgrimes#include <sys/time.h>
441556Srgrimes#include <sys/stat.h>
451556Srgrimes#include <sys/fcntl.h>
4631666Seivind#include <errno.h>
471556Srgrimes#include <stdio.h>
4831666Seivind#include <stdlib.h>
491556Srgrimes#include <string.h>
501556Srgrimes#include <unistd.h>
511556Srgrimes#include "pax.h"
521556Srgrimes#include "tables.h"
531556Srgrimes#include "extern.h"
541556Srgrimes
551556Srgrimes/*
561556Srgrimes * Routines for controlling the contents of all the different databases pax
571556Srgrimes * keeps. Tables are dynamically created only when they are needed. The
581556Srgrimes * goal was speed and the ability to work with HUGE archives. The databases
591556Srgrimes * were kept simple, but do have complex rules for when the contents change.
6046684Skris * As of this writing, the POSIX library functions were more complex than
611556Srgrimes * needed for this application (pax databases have very short lifetimes and
621556Srgrimes * do not survive after pax is finished). Pax is required to handle very
631556Srgrimes * large archives. These database routines carefully combine memory usage and
641556Srgrimes * temporary file storage in ways which will not significantly impact runtime
651556Srgrimes * performance while allowing the largest possible archives to be handled.
6646684Skris * Trying to force the fit to the POSIX databases routines was not considered
671556Srgrimes * time well spent.
681556Srgrimes */
691556Srgrimes
701556Srgrimesstatic HRDLNK **ltab = NULL;	/* hard link table for detecting hard links */
711556Srgrimesstatic FTM **ftab = NULL;	/* file time table for updating arch */
721556Srgrimesstatic NAMT **ntab = NULL;	/* interactive rename storage table */
731556Srgrimesstatic DEVT **dtab = NULL;	/* device/inode mapping tables */
741556Srgrimesstatic ATDIR **atab = NULL;	/* file tree directory time reset table */
751556Srgrimesstatic int dirfd = -1;		/* storage for setting created dir time/mode */
761556Srgrimesstatic u_long dircnt;		/* entries in dir time/mode storage */
771556Srgrimesstatic int ffd = -1;		/* tmp file for file time table name storage */
781556Srgrimes
7990110Simpstatic DEVT *chk_dev(dev_t, int);
801556Srgrimes
811556Srgrimes/*
821556Srgrimes * hard link table routines
831556Srgrimes *
848855Srgrimes * The hard link table tries to detect hard links to files using the device and
851556Srgrimes * inode values. We do this when writing an archive, so we can tell the format
861556Srgrimes * write routine that this file is a hard link to another file. The format
871556Srgrimes * write routine then can store this file in whatever way it wants (as a hard
881556Srgrimes * link if the format supports that like tar, or ignore this info like cpio).
891556Srgrimes * (Actually a field in the format driver table tells us if the format wants
901556Srgrimes * hard link info. if not, we do not waste time looking for them). We also use
911556Srgrimes * the same table when reading an archive. In that situation, this table is
921556Srgrimes * used by the format read routine to detect hard links from stored dev and
931556Srgrimes * inode numbers (like cpio). This will allow pax to create a link when one
941556Srgrimes * can be detected by the archive format.
951556Srgrimes */
961556Srgrimes
971556Srgrimes/*
981556Srgrimes * lnk_start
991556Srgrimes *	Creates the hard link table.
1001556Srgrimes * Return:
1011556Srgrimes *	0 if created, -1 if failure
1021556Srgrimes */
1031556Srgrimes
1041556Srgrimesint
1051556Srgrimeslnk_start(void)
1061556Srgrimes{
1071556Srgrimes	if (ltab != NULL)
1081556Srgrimes		return(0);
1091556Srgrimes 	if ((ltab = (HRDLNK **)calloc(L_TAB_SZ, sizeof(HRDLNK *))) == NULL) {
11076019Skris		paxwarn(1, "Cannot allocate memory for hard link table");
11176019Skris		return(-1);
11276019Skris	}
1131556Srgrimes	return(0);
1141556Srgrimes}
1151556Srgrimes
1161556Srgrimes/*
1171556Srgrimes * chk_lnk()
1181556Srgrimes *	Looks up entry in hard link hash table. If found, it copies the name
1191556Srgrimes *	of the file it is linked to (we already saw that file) into ln_name.
1201556Srgrimes *	lnkcnt is decremented and if goes to 1 the node is deleted from the
1211556Srgrimes *	database. (We have seen all the links to this file). If not found,
1221556Srgrimes *	we add the file to the database if it has the potential for having
1231556Srgrimes *	hard links to other files we may process (it has a link count > 1)
1241556Srgrimes * Return:
1251556Srgrimes *	if found returns 1; if not found returns 0; -1 on error
1261556Srgrimes */
1271556Srgrimes
1281556Srgrimesint
12990113Simpchk_lnk(ARCHD *arcn)
1301556Srgrimes{
13190113Simp	HRDLNK *pt;
13290113Simp	HRDLNK **ppt;
13390113Simp	u_int indx;
1341556Srgrimes
1351556Srgrimes	if (ltab == NULL)
1361556Srgrimes		return(-1);
1371556Srgrimes	/*
1381556Srgrimes	 * ignore those nodes that cannot have hard links
1391556Srgrimes	 */
1401556Srgrimes	if ((arcn->type == PAX_DIR) || (arcn->sb.st_nlink <= 1))
1411556Srgrimes		return(0);
1421556Srgrimes
1431556Srgrimes	/*
1441556Srgrimes	 * hash inode number and look for this file
1451556Srgrimes	 */
1461556Srgrimes	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
1471556Srgrimes	if ((pt = ltab[indx]) != NULL) {
1481556Srgrimes		/*
1491556Srgrimes		 * it's hash chain in not empty, walk down looking for it
1501556Srgrimes		 */
1511556Srgrimes		ppt = &(ltab[indx]);
1521556Srgrimes		while (pt != NULL) {
1531556Srgrimes			if ((pt->ino == arcn->sb.st_ino) &&
1541556Srgrimes			    (pt->dev == arcn->sb.st_dev))
1551556Srgrimes				break;
1561556Srgrimes			ppt = &(pt->fow);
1571556Srgrimes			pt = pt->fow;
1581556Srgrimes		}
1591556Srgrimes
1601556Srgrimes		if (pt != NULL) {
1611556Srgrimes			/*
1621556Srgrimes			 * found a link. set the node type and copy in the
1631556Srgrimes			 * name of the file it is to link to. we need to
1641556Srgrimes			 * handle hardlinks to regular files differently than
1651556Srgrimes			 * other links.
1661556Srgrimes			 */
1671556Srgrimes			arcn->ln_nlen = l_strncpy(arcn->ln_name, pt->name,
16876351Skris				sizeof(arcn->ln_name) - 1);
16976351Skris			arcn->ln_name[arcn->ln_nlen] = '\0';
1701556Srgrimes			if (arcn->type == PAX_REG)
1711556Srgrimes				arcn->type = PAX_HRG;
1721556Srgrimes			else
1731556Srgrimes				arcn->type = PAX_HLK;
1741556Srgrimes
1751556Srgrimes			/*
1761556Srgrimes			 * if we have found all the links to this file, remove
1771556Srgrimes			 * it from the database
1781556Srgrimes			 */
1791556Srgrimes			if (--pt->nlink <= 1) {
1801556Srgrimes				*ppt = pt->fow;
181169993Sbrian				free(pt->name);
182169993Sbrian				free(pt);
1831556Srgrimes			}
1841556Srgrimes			return(1);
1851556Srgrimes		}
1861556Srgrimes	}
1871556Srgrimes
1881556Srgrimes	/*
1891556Srgrimes	 * we never saw this file before. It has links so we add it to the
1901556Srgrimes	 * front of this hash chain
1911556Srgrimes	 */
1921556Srgrimes	if ((pt = (HRDLNK *)malloc(sizeof(HRDLNK))) != NULL) {
1931556Srgrimes		if ((pt->name = strdup(arcn->name)) != NULL) {
1941556Srgrimes			pt->dev = arcn->sb.st_dev;
1951556Srgrimes			pt->ino = arcn->sb.st_ino;
1961556Srgrimes			pt->nlink = arcn->sb.st_nlink;
1971556Srgrimes			pt->fow = ltab[indx];
1981556Srgrimes			ltab[indx] = pt;
1991556Srgrimes			return(0);
2001556Srgrimes		}
201169993Sbrian		free(pt);
2021556Srgrimes	}
2031556Srgrimes
20476017Skris	paxwarn(1, "Hard link table out of memory");
2051556Srgrimes	return(-1);
2061556Srgrimes}
2071556Srgrimes
2081556Srgrimes/*
2091556Srgrimes * purg_lnk
2101556Srgrimes *	remove reference for a file that we may have added to the data base as
2111556Srgrimes *	a potential source for hard links. We ended up not using the file, so
212222177Suqs *	we do not want to accidentally point another file at it later on.
2131556Srgrimes */
2141556Srgrimes
2151556Srgrimesvoid
21690113Simppurg_lnk(ARCHD *arcn)
2171556Srgrimes{
21890113Simp	HRDLNK *pt;
21990113Simp	HRDLNK **ppt;
22090113Simp	u_int indx;
2211556Srgrimes
2221556Srgrimes	if (ltab == NULL)
2231556Srgrimes		return;
2241556Srgrimes	/*
2251556Srgrimes	 * do not bother to look if it could not be in the database
2261556Srgrimes	 */
2271556Srgrimes	if ((arcn->sb.st_nlink <= 1) || (arcn->type == PAX_DIR) ||
2281556Srgrimes	    (arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
2291556Srgrimes		return;
2301556Srgrimes
2311556Srgrimes	/*
2321556Srgrimes	 * find the hash chain for this inode value, if empty return
2331556Srgrimes	 */
2341556Srgrimes	indx = ((unsigned)arcn->sb.st_ino) % L_TAB_SZ;
2351556Srgrimes	if ((pt = ltab[indx]) == NULL)
2361556Srgrimes		return;
2371556Srgrimes
2381556Srgrimes	/*
2391556Srgrimes	 * walk down the list looking for the inode/dev pair, unlink and
2401556Srgrimes	 * free if found
2411556Srgrimes	 */
2421556Srgrimes	ppt = &(ltab[indx]);
2431556Srgrimes	while (pt != NULL) {
2441556Srgrimes		if ((pt->ino == arcn->sb.st_ino) &&
2451556Srgrimes		    (pt->dev == arcn->sb.st_dev))
2461556Srgrimes			break;
2471556Srgrimes		ppt = &(pt->fow);
2481556Srgrimes		pt = pt->fow;
2491556Srgrimes	}
2501556Srgrimes	if (pt == NULL)
2511556Srgrimes		return;
2521556Srgrimes
2531556Srgrimes	/*
2541556Srgrimes	 * remove and free it
2551556Srgrimes	 */
2561556Srgrimes	*ppt = pt->fow;
257169993Sbrian	free(pt->name);
258169993Sbrian	free(pt);
2591556Srgrimes}
2601556Srgrimes
2611556Srgrimes/*
2621556Srgrimes * lnk_end()
263108533Sschweikh *	Pull apart an existing link table so we can reuse it. We do this between
2641556Srgrimes *	read and write phases of append with update. (The format may have
2651556Srgrimes *	used the link table, and we need to start with a fresh table for the
266108533Sschweikh *	write phase).
2671556Srgrimes */
2681556Srgrimes
2691556Srgrimesvoid
2701556Srgrimeslnk_end(void)
2711556Srgrimes{
27290113Simp	int i;
27390113Simp	HRDLNK *pt;
27490113Simp	HRDLNK *ppt;
2751556Srgrimes
2761556Srgrimes	if (ltab == NULL)
2771556Srgrimes		return;
2781556Srgrimes
2791556Srgrimes	for (i = 0; i < L_TAB_SZ; ++i) {
2801556Srgrimes		if (ltab[i] == NULL)
2811556Srgrimes			continue;
2821556Srgrimes		pt = ltab[i];
2831556Srgrimes		ltab[i] = NULL;
2841556Srgrimes
2851556Srgrimes		/*
2861556Srgrimes		 * free up each entry on this chain
2871556Srgrimes		 */
2881556Srgrimes		while (pt != NULL) {
2891556Srgrimes			ppt = pt;
2901556Srgrimes			pt = ppt->fow;
291169993Sbrian			free(ppt->name);
292169993Sbrian			free(ppt);
2931556Srgrimes		}
2941556Srgrimes	}
2951556Srgrimes	return;
2961556Srgrimes}
2971556Srgrimes
2981556Srgrimes/*
2991556Srgrimes * modification time table routines
3001556Srgrimes *
3011556Srgrimes * The modification time table keeps track of last modification times for all
3021556Srgrimes * files stored in an archive during a write phase when -u is set. We only
3031556Srgrimes * add a file to the archive if it is newer than a file with the same name
3041556Srgrimes * already stored on the archive (if there is no other file with the same
3051556Srgrimes * name on the archive it is added). This applies to writes and appends.
3061556Srgrimes * An append with an -u must read the archive and store the modification time
3071556Srgrimes * for every file on that archive before starting the write phase. It is clear
3081556Srgrimes * that this is one HUGE database. To save memory space, the actual file names
309222177Suqs * are stored in a scratch file and indexed by an in memory hash table. The
3101556Srgrimes * hash table is indexed by hashing the file path. The nodes in the table store
3118855Srgrimes * the length of the filename and the lseek offset within the scratch file
3121556Srgrimes * where the actual name is stored. Since there are never any deletions to this
313108533Sschweikh * table, fragmentation of the scratch file is never an issue. Lookups seem to
3148855Srgrimes * not exhibit any locality at all (files in the database are rarely
3151556Srgrimes * looked up more than once...). So caching is just a waste of memory. The
316222177Suqs * only limitation is the amount of scratch file space available to store the
3171556Srgrimes * path names.
3181556Srgrimes */
3191556Srgrimes
3201556Srgrimes/*
3211556Srgrimes * ftime_start()
3221556Srgrimes *	create the file time hash table and open for read/write the scratch
3231556Srgrimes *	file. (after created it is unlinked, so when we exit we leave
3241556Srgrimes *	no witnesses).
3251556Srgrimes * Return:
3261556Srgrimes *	0 if the table and file was created ok, -1 otherwise
3271556Srgrimes */
3281556Srgrimes
3291556Srgrimesint
3301556Srgrimesftime_start(void)
3311556Srgrimes{
33276019Skris
3331556Srgrimes	if (ftab != NULL)
3341556Srgrimes		return(0);
3351556Srgrimes 	if ((ftab = (FTM **)calloc(F_TAB_SZ, sizeof(FTM *))) == NULL) {
33676019Skris		paxwarn(1, "Cannot allocate memory for file time table");
33776019Skris		return(-1);
33876019Skris	}
3391556Srgrimes
3401556Srgrimes	/*
3411556Srgrimes	 * get random name and create temporary scratch file, unlink name
3421556Srgrimes	 * so it will get removed on exit
3431556Srgrimes	 */
34476016Skris	memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
34576016Skris	if ((ffd = mkstemp(tempfile)) < 0) {
34676017Skris		syswarn(1, errno, "Unable to create temporary file: %s",
34776016Skris		    tempfile);
3481556Srgrimes		return(-1);
3491556Srgrimes	}
35076016Skris	(void)unlink(tempfile);
3511556Srgrimes
3521556Srgrimes	return(0);
3531556Srgrimes}
3541556Srgrimes
3551556Srgrimes/*
3561556Srgrimes * chk_ftime()
3571556Srgrimes *	looks up entry in file time hash table. If not found, the file is
3581556Srgrimes *	added to the hash table and the file named stored in the scratch file.
3591556Srgrimes *	If a file with the same name is found, the file times are compared and
3601556Srgrimes *	the most recent file time is retained. If the new file was younger (or
3611556Srgrimes *	was not in the database) the new file is selected for storage.
3621556Srgrimes * Return:
3631556Srgrimes *	0 if file should be added to the archive, 1 if it should be skipped,
3641556Srgrimes *	-1 on error
3651556Srgrimes */
3661556Srgrimes
3671556Srgrimesint
36890113Simpchk_ftime(ARCHD *arcn)
3691556Srgrimes{
37090113Simp	FTM *pt;
37190113Simp	int namelen;
37290113Simp	u_int indx;
3731556Srgrimes	char ckname[PAXPATHLEN+1];
3741556Srgrimes
3751556Srgrimes	/*
3761556Srgrimes	 * no info, go ahead and add to archive
3771556Srgrimes	 */
3781556Srgrimes	if (ftab == NULL)
3791556Srgrimes		return(0);
3801556Srgrimes
3811556Srgrimes	/*
3821556Srgrimes	 * hash the pathname and look up in table
3831556Srgrimes	 */
3841556Srgrimes	namelen = arcn->nlen;
3851556Srgrimes	indx = st_hash(arcn->name, namelen, F_TAB_SZ);
3861556Srgrimes	if ((pt = ftab[indx]) != NULL) {
3871556Srgrimes		/*
3881556Srgrimes		 * the hash chain is not empty, walk down looking for match
3891556Srgrimes		 * only read up the path names if the lengths match, speeds
3901556Srgrimes		 * up the search a lot
3911556Srgrimes		 */
3921556Srgrimes		while (pt != NULL) {
3931556Srgrimes			if (pt->namelen == namelen) {
3941556Srgrimes				/*
3951556Srgrimes				 * potential match, have to read the name
3961556Srgrimes				 * from the scratch file.
3971556Srgrimes				 */
3981556Srgrimes				if (lseek(ffd,pt->seek,SEEK_SET) != pt->seek) {
39976017Skris					syswarn(1, errno,
4001556Srgrimes					    "Failed ftime table seek");
4011556Srgrimes					return(-1);
4021556Srgrimes				}
4031556Srgrimes				if (read(ffd, ckname, namelen) != namelen) {
40476017Skris					syswarn(1, errno,
4051556Srgrimes					    "Failed ftime table read");
4061556Srgrimes					return(-1);
4071556Srgrimes				}
4081556Srgrimes
4091556Srgrimes				/*
4101556Srgrimes				 * if the names match, we are done
4111556Srgrimes				 */
4121556Srgrimes				if (!strncmp(ckname, arcn->name, namelen))
4131556Srgrimes					break;
4141556Srgrimes			}
4151556Srgrimes
4161556Srgrimes			/*
4171556Srgrimes			 * try the next entry on the chain
4181556Srgrimes			 */
4191556Srgrimes			pt = pt->fow;
4201556Srgrimes		}
4211556Srgrimes
4221556Srgrimes		if (pt != NULL) {
4231556Srgrimes			/*
4241556Srgrimes			 * found the file, compare the times, save the newer
4251556Srgrimes			 */
4261556Srgrimes			if (arcn->sb.st_mtime > pt->mtime) {
4271556Srgrimes				/*
4281556Srgrimes				 * file is newer
4291556Srgrimes				 */
4301556Srgrimes				pt->mtime = arcn->sb.st_mtime;
4311556Srgrimes				return(0);
4328855Srgrimes			}
4331556Srgrimes			/*
4341556Srgrimes			 * file is older
4351556Srgrimes			 */
4361556Srgrimes			return(1);
4371556Srgrimes		}
4381556Srgrimes	}
4391556Srgrimes
4401556Srgrimes	/*
4411556Srgrimes	 * not in table, add it
4421556Srgrimes	 */
4431556Srgrimes	if ((pt = (FTM *)malloc(sizeof(FTM))) != NULL) {
4441556Srgrimes		/*
4451556Srgrimes		 * add the name at the end of the scratch file, saving the
4461556Srgrimes		 * offset. add the file to the head of the hash chain
4471556Srgrimes		 */
4481556Srgrimes		if ((pt->seek = lseek(ffd, (off_t)0, SEEK_END)) >= 0) {
4491556Srgrimes			if (write(ffd, arcn->name, namelen) == namelen) {
4501556Srgrimes				pt->mtime = arcn->sb.st_mtime;
4511556Srgrimes				pt->namelen = namelen;
4521556Srgrimes				pt->fow = ftab[indx];
4531556Srgrimes				ftab[indx] = pt;
4541556Srgrimes				return(0);
4551556Srgrimes			}
45676017Skris			syswarn(1, errno, "Failed write to file time table");
4578855Srgrimes		} else
45876017Skris			syswarn(1, errno, "Failed seek on file time table");
4591556Srgrimes	} else
46076017Skris		paxwarn(1, "File time table ran out of memory");
4611556Srgrimes
4621556Srgrimes	if (pt != NULL)
463169993Sbrian		free(pt);
4641556Srgrimes	return(-1);
4651556Srgrimes}
4661556Srgrimes
4671556Srgrimes/*
4681556Srgrimes * Interactive rename table routines
4691556Srgrimes *
4701556Srgrimes * The interactive rename table keeps track of the new names that the user
47146684Skris * assigns to files from tty input. Since this map is unique for each file
4721556Srgrimes * we must store it in case there is a reference to the file later in archive
4731556Srgrimes * (a link). Otherwise we will be unable to find the file we know was
4741556Srgrimes * extracted. The remapping of these files is stored in a memory based hash
4751556Srgrimes * table (it is assumed since input must come from /dev/tty, it is unlikely to
4761556Srgrimes * be a very large table).
4771556Srgrimes */
4781556Srgrimes
4791556Srgrimes/*
4801556Srgrimes * name_start()
4811556Srgrimes *	create the interactive rename table
4821556Srgrimes * Return:
4831556Srgrimes *	0 if successful, -1 otherwise
4841556Srgrimes */
4851556Srgrimes
4861556Srgrimesint
4871556Srgrimesname_start(void)
4881556Srgrimes{
4891556Srgrimes	if (ntab != NULL)
4901556Srgrimes		return(0);
4911556Srgrimes 	if ((ntab = (NAMT **)calloc(N_TAB_SZ, sizeof(NAMT *))) == NULL) {
49276019Skris		paxwarn(1, "Cannot allocate memory for interactive rename table");
49376019Skris		return(-1);
49476019Skris	}
4951556Srgrimes	return(0);
4961556Srgrimes}
4971556Srgrimes
4981556Srgrimes/*
4991556Srgrimes * add_name()
5001556Srgrimes *	add the new name to old name mapping just created by the user.
5011556Srgrimes *	If an old name mapping is found (there may be duplicate names on an
5021556Srgrimes *	archive) only the most recent is kept.
5031556Srgrimes * Return:
5041556Srgrimes *	0 if added, -1 otherwise
5051556Srgrimes */
5061556Srgrimes
5071556Srgrimesint
50890113Simpadd_name(char *oname, int onamelen, char *nname)
5091556Srgrimes{
51090113Simp	NAMT *pt;
51190113Simp	u_int indx;
5121556Srgrimes
5131556Srgrimes	if (ntab == NULL) {
5141556Srgrimes		/*
5151556Srgrimes		 * should never happen
5161556Srgrimes		 */
51776017Skris		paxwarn(0, "No interactive rename table, links may fail\n");
5188855Srgrimes		return(0);
5191556Srgrimes	}
5201556Srgrimes
5211556Srgrimes	/*
5221556Srgrimes	 * look to see if we have already mapped this file, if so we
5231556Srgrimes	 * will update it
5241556Srgrimes	 */
5251556Srgrimes	indx = st_hash(oname, onamelen, N_TAB_SZ);
5261556Srgrimes	if ((pt = ntab[indx]) != NULL) {
5271556Srgrimes		/*
5281556Srgrimes		 * look down the has chain for the file
5291556Srgrimes		 */
5301556Srgrimes		while ((pt != NULL) && (strcmp(oname, pt->oname) != 0))
5311556Srgrimes			pt = pt->fow;
5321556Srgrimes
5331556Srgrimes		if (pt != NULL) {
5341556Srgrimes			/*
5351556Srgrimes			 * found an old mapping, replace it with the new one
5361556Srgrimes			 * the user just input (if it is different)
5371556Srgrimes			 */
5381556Srgrimes			if (strcmp(nname, pt->nname) == 0)
5391556Srgrimes				return(0);
5401556Srgrimes
541169993Sbrian			free(pt->nname);
5421556Srgrimes			if ((pt->nname = strdup(nname)) == NULL) {
54376017Skris				paxwarn(1, "Cannot update rename table");
5441556Srgrimes				return(-1);
5451556Srgrimes			}
5461556Srgrimes			return(0);
5471556Srgrimes		}
5481556Srgrimes	}
5491556Srgrimes
5501556Srgrimes	/*
5511556Srgrimes	 * this is a new mapping, add it to the table
5521556Srgrimes	 */
5531556Srgrimes	if ((pt = (NAMT *)malloc(sizeof(NAMT))) != NULL) {
5541556Srgrimes		if ((pt->oname = strdup(oname)) != NULL) {
5551556Srgrimes			if ((pt->nname = strdup(nname)) != NULL) {
5561556Srgrimes				pt->fow = ntab[indx];
5571556Srgrimes				ntab[indx] = pt;
5581556Srgrimes				return(0);
5591556Srgrimes			}
560169993Sbrian			free(pt->oname);
5611556Srgrimes		}
562169993Sbrian		free(pt);
5631556Srgrimes	}
56476017Skris	paxwarn(1, "Interactive rename table out of memory");
5651556Srgrimes	return(-1);
5661556Srgrimes}
5671556Srgrimes
5681556Srgrimes/*
5691556Srgrimes * sub_name()
5701556Srgrimes *	look up a link name to see if it points at a file that has been
5711556Srgrimes *	remapped by the user. If found, the link is adjusted to contain the
5721556Srgrimes *	new name (oname is the link to name)
5731556Srgrimes */
5741556Srgrimes
5751556Srgrimesvoid
57690113Simpsub_name(char *oname, int *onamelen, size_t onamesize)
5771556Srgrimes{
57890113Simp	NAMT *pt;
57990113Simp	u_int indx;
5801556Srgrimes
5811556Srgrimes	if (ntab == NULL)
5821556Srgrimes		return;
5831556Srgrimes	/*
5841556Srgrimes	 * look the name up in the hash table
5851556Srgrimes	 */
5861556Srgrimes	indx = st_hash(oname, *onamelen, N_TAB_SZ);
5871556Srgrimes	if ((pt = ntab[indx]) == NULL)
5881556Srgrimes		return;
5891556Srgrimes
5901556Srgrimes	while (pt != NULL) {
5911556Srgrimes		/*
59276017Skris		 * walk down the hash chain looking for a match
5931556Srgrimes		 */
5941556Srgrimes		if (strcmp(oname, pt->oname) == 0) {
5951556Srgrimes			/*
5961556Srgrimes			 * found it, replace it with the new name
5971556Srgrimes			 * and return (we know that oname has enough space)
5981556Srgrimes			 */
59976351Skris			*onamelen = l_strncpy(oname, pt->nname, onamesize - 1);
60076351Skris			oname[*onamelen] = '\0';
6011556Srgrimes			return;
6021556Srgrimes		}
6031556Srgrimes		pt = pt->fow;
6041556Srgrimes	}
6051556Srgrimes
6061556Srgrimes	/*
6071556Srgrimes	 * no match, just return
6081556Srgrimes	 */
6091556Srgrimes	return;
6101556Srgrimes}
6118855Srgrimes
6121556Srgrimes/*
6131556Srgrimes * device/inode mapping table routines
6141556Srgrimes * (used with formats that store device and inodes fields)
6151556Srgrimes *
616108533Sschweikh * device/inode mapping tables remap the device field in an archive header. The
6171556Srgrimes * device/inode fields are used to determine when files are hard links to each
6181556Srgrimes * other. However these values have very little meaning outside of that. This
6191556Srgrimes * database is used to solve one of two different problems.
6201556Srgrimes *
6211556Srgrimes * 1) when files are appended to an archive, while the new files may have hard
6221556Srgrimes * links to each other, you cannot determine if they have hard links to any
6231556Srgrimes * file already stored on the archive from a prior run of pax. We must assume
6241556Srgrimes * that these inode/device pairs are unique only within a SINGLE run of pax
6251556Srgrimes * (which adds a set of files to an archive). So we have to make sure the
6261556Srgrimes * inode/dev pairs we add each time are always unique. We do this by observing
6271556Srgrimes * while the inode field is very dense, the use of the dev field is fairly
6281556Srgrimes * sparse. Within each run of pax, we remap any device number of a new archive
6291556Srgrimes * member that has a device number used in a prior run and already stored in a
6301556Srgrimes * file on the archive. During the read phase of the append, we store the
6311556Srgrimes * device numbers used and mark them to not be used by any file during the
6321556Srgrimes * write phase. If during write we go to use one of those old device numbers,
6331556Srgrimes * we remap it to a new value.
6341556Srgrimes *
6351556Srgrimes * 2) Often the fields in the archive header used to store these values are
6361556Srgrimes * too small to store the entire value. The result is an inode or device value
6371556Srgrimes * which can be truncated. This really can foul up an archive. With truncation
6381556Srgrimes * we end up creating links between files that are really not links (after
6391556Srgrimes * truncation the inodes are the same value). We address that by detecting
6401556Srgrimes * truncation and forcing a remap of the device field to split truncated
6411556Srgrimes * inodes away from each other. Each truncation creates a pattern of bits that
6421556Srgrimes * are removed. We use this pattern of truncated bits to partition the inodes
6431556Srgrimes * on a single device to many different devices (each one represented by the
6441556Srgrimes * truncated bit pattern). All inodes on the same device that have the same
6451556Srgrimes * truncation pattern are mapped to the same new device. Two inodes that
6461556Srgrimes * truncate to the same value clearly will always have different truncation
6471556Srgrimes * bit patterns, so they will be split from away each other. When we spot
6481556Srgrimes * device truncation we remap the device number to a non truncated value.
6491556Srgrimes * (for more info see table.h for the data structures involved).
6501556Srgrimes */
6511556Srgrimes
6521556Srgrimes/*
6531556Srgrimes * dev_start()
6541556Srgrimes *	create the device mapping table
6551556Srgrimes * Return:
6561556Srgrimes *	0 if successful, -1 otherwise
6571556Srgrimes */
6581556Srgrimes
6591556Srgrimesint
6601556Srgrimesdev_start(void)
6611556Srgrimes{
6621556Srgrimes	if (dtab != NULL)
6631556Srgrimes		return(0);
6641556Srgrimes 	if ((dtab = (DEVT **)calloc(D_TAB_SZ, sizeof(DEVT *))) == NULL) {
66576019Skris		paxwarn(1, "Cannot allocate memory for device mapping table");
66676019Skris		return(-1);
66776019Skris	}
6681556Srgrimes	return(0);
6691556Srgrimes}
6701556Srgrimes
6711556Srgrimes/*
6721556Srgrimes * add_dev()
6731556Srgrimes *	add a device number to the table. this will force the device to be
6741556Srgrimes *	remapped to a new value if it be used during a write phase. This
6751556Srgrimes *	function is called during the read phase of an append to prohibit the
6761556Srgrimes *	use of any device number already in the archive.
6771556Srgrimes * Return:
6781556Srgrimes *	0 if added ok, -1 otherwise
6791556Srgrimes */
6801556Srgrimes
6811556Srgrimesint
68290113Simpadd_dev(ARCHD *arcn)
6831556Srgrimes{
6841556Srgrimes	if (chk_dev(arcn->sb.st_dev, 1) == NULL)
6851556Srgrimes		return(-1);
6861556Srgrimes	return(0);
6871556Srgrimes}
6881556Srgrimes
6891556Srgrimes/*
6901556Srgrimes * chk_dev()
6911556Srgrimes *	check for a device value in the device table. If not found and the add
6921556Srgrimes *	flag is set, it is added. This does NOT assign any mapping values, just
6931556Srgrimes *	adds the device number as one that need to be remapped. If this device
69446684Skris *	is already mapped, just return with a pointer to that entry.
6951556Srgrimes * Return:
6961556Srgrimes *	pointer to the entry for this device in the device map table. Null
6971556Srgrimes *	if the add flag is not set and the device is not in the table (it is
6981556Srgrimes *	not been seen yet). If add is set and the device cannot be added, null
6991556Srgrimes *	is returned (indicates an error).
7001556Srgrimes */
7011556Srgrimes
7021556Srgrimesstatic DEVT *
7031556Srgrimeschk_dev(dev_t dev, int add)
7041556Srgrimes{
70590113Simp	DEVT *pt;
70690113Simp	u_int indx;
7071556Srgrimes
7081556Srgrimes	if (dtab == NULL)
7091556Srgrimes		return(NULL);
7101556Srgrimes	/*
7111556Srgrimes	 * look to see if this device is already in the table
7121556Srgrimes	 */
7131556Srgrimes	indx = ((unsigned)dev) % D_TAB_SZ;
7141556Srgrimes	if ((pt = dtab[indx]) != NULL) {
7151556Srgrimes		while ((pt != NULL) && (pt->dev != dev))
7161556Srgrimes			pt = pt->fow;
7171556Srgrimes
7181556Srgrimes		/*
7191556Srgrimes		 * found it, return a pointer to it
7201556Srgrimes		 */
7211556Srgrimes		if (pt != NULL)
7221556Srgrimes			return(pt);
7231556Srgrimes	}
7241556Srgrimes
7251556Srgrimes	/*
7261556Srgrimes	 * not in table, we add it only if told to as this may just be a check
7271556Srgrimes	 * to see if a device number is being used.
7281556Srgrimes	 */
7291556Srgrimes	if (add == 0)
7301556Srgrimes		return(NULL);
7311556Srgrimes
7321556Srgrimes	/*
7331556Srgrimes	 * allocate a node for this device and add it to the front of the hash
7341556Srgrimes	 * chain. Note we do not assign remaps values here, so the pt->list
7351556Srgrimes	 * list must be NULL.
7361556Srgrimes	 */
7371556Srgrimes	if ((pt = (DEVT *)malloc(sizeof(DEVT))) == NULL) {
73876017Skris		paxwarn(1, "Device map table out of memory");
7391556Srgrimes		return(NULL);
7401556Srgrimes	}
7411556Srgrimes	pt->dev = dev;
7421556Srgrimes	pt->list = NULL;
7431556Srgrimes	pt->fow = dtab[indx];
7441556Srgrimes	dtab[indx] = pt;
7451556Srgrimes	return(pt);
7461556Srgrimes}
7471556Srgrimes/*
7481556Srgrimes * map_dev()
7491556Srgrimes *	given an inode and device storage mask (the mask has a 1 for each bit
7501556Srgrimes *	the archive format is able to store in a header), we check for inode
7511556Srgrimes *	and device truncation and remap the device as required. Device mapping
7521556Srgrimes *	can also occur when during the read phase of append a device number was
7531556Srgrimes *	seen (and was marked as do not use during the write phase). WE ASSUME
7541556Srgrimes *	that unsigned longs are the same size or bigger than the fields used
7551556Srgrimes *	for ino_t and dev_t. If not the types will have to be changed.
7561556Srgrimes * Return:
7571556Srgrimes *	0 if all ok, -1 otherwise.
7581556Srgrimes */
7591556Srgrimes
7601556Srgrimesint
76190113Simpmap_dev(ARCHD *arcn, u_long dev_mask, u_long ino_mask)
7621556Srgrimes{
76390113Simp	DEVT *pt;
76490113Simp	DLIST *dpt;
7651556Srgrimes	static dev_t lastdev = 0;	/* next device number to try */
7661556Srgrimes	int trc_ino = 0;
7671556Srgrimes	int trc_dev = 0;
7681556Srgrimes	ino_t trunc_bits = 0;
7691556Srgrimes	ino_t nino;
7701556Srgrimes
7711556Srgrimes	if (dtab == NULL)
7721556Srgrimes		return(0);
7731556Srgrimes	/*
7741556Srgrimes	 * check for device and inode truncation, and extract the truncated
7758855Srgrimes	 * bit pattern.
7761556Srgrimes	 */
7771556Srgrimes	if ((arcn->sb.st_dev & (dev_t)dev_mask) != arcn->sb.st_dev)
7781556Srgrimes		++trc_dev;
7791556Srgrimes	if ((nino = arcn->sb.st_ino & (ino_t)ino_mask) != arcn->sb.st_ino) {
7801556Srgrimes		++trc_ino;
7811556Srgrimes		trunc_bits = arcn->sb.st_ino & (ino_t)(~ino_mask);
7821556Srgrimes	}
7831556Srgrimes
7841556Srgrimes	/*
7851556Srgrimes	 * see if this device is already being mapped, look up the device
7861556Srgrimes	 * then find the truncation bit pattern which applies
7871556Srgrimes	 */
7881556Srgrimes	if ((pt = chk_dev(arcn->sb.st_dev, 0)) != NULL) {
7891556Srgrimes		/*
7901556Srgrimes		 * this device is already marked to be remapped
7911556Srgrimes		 */
7921556Srgrimes		for (dpt = pt->list; dpt != NULL; dpt = dpt->fow)
7931556Srgrimes			if (dpt->trunc_bits == trunc_bits)
7941556Srgrimes				break;
7951556Srgrimes
7961556Srgrimes		if (dpt != NULL) {
7971556Srgrimes			/*
7981556Srgrimes			 * we are being remapped for this device and pattern
7991556Srgrimes			 * change the device number to be stored and return
8001556Srgrimes			 */
8011556Srgrimes			arcn->sb.st_dev = dpt->dev;
8021556Srgrimes			arcn->sb.st_ino = nino;
8031556Srgrimes			return(0);
8041556Srgrimes		}
8051556Srgrimes	} else {
8061556Srgrimes		/*
8071556Srgrimes		 * this device is not being remapped YET. if we do not have any
8081556Srgrimes		 * form of truncation, we do not need a remap
8091556Srgrimes		 */
8101556Srgrimes		if (!trc_ino && !trc_dev)
8111556Srgrimes			return(0);
8121556Srgrimes
8131556Srgrimes		/*
8141556Srgrimes		 * we have truncation, have to add this as a device to remap
8151556Srgrimes		 */
8161556Srgrimes		if ((pt = chk_dev(arcn->sb.st_dev, 1)) == NULL)
8171556Srgrimes			goto bad;
8181556Srgrimes
8191556Srgrimes		/*
8201556Srgrimes		 * if we just have a truncated inode, we have to make sure that
8211556Srgrimes		 * all future inodes that do not truncate (they have the
8221556Srgrimes		 * truncation pattern of all 0's) continue to map to the same
8231556Srgrimes		 * device number. We probably have already written inodes with
8241556Srgrimes		 * this device number to the archive with the truncation
8251556Srgrimes		 * pattern of all 0's. So we add the mapping for all 0's to the
8261556Srgrimes		 * same device number.
8271556Srgrimes		 */
8281556Srgrimes		if (!trc_dev && (trunc_bits != 0)) {
8291556Srgrimes			if ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL)
8301556Srgrimes				goto bad;
8311556Srgrimes			dpt->trunc_bits = 0;
8321556Srgrimes			dpt->dev = arcn->sb.st_dev;
8331556Srgrimes			dpt->fow = pt->list;
8341556Srgrimes			pt->list = dpt;
8351556Srgrimes		}
8361556Srgrimes	}
8371556Srgrimes
8381556Srgrimes	/*
8391556Srgrimes	 * look for a device number not being used. We must watch for wrap
8401556Srgrimes	 * around on lastdev (so we do not get stuck looking forever!)
8411556Srgrimes	 */
8421556Srgrimes	while (++lastdev > 0) {
8431556Srgrimes		if (chk_dev(lastdev, 0) != NULL)
8441556Srgrimes			continue;
8451556Srgrimes		/*
8461556Srgrimes		 * found an unused value. If we have reached truncation point
8471556Srgrimes		 * for this format we are hosed, so we give up. Otherwise we
8481556Srgrimes		 * mark it as being used.
8491556Srgrimes		 */
8501556Srgrimes		if (((lastdev & ((dev_t)dev_mask)) != lastdev) ||
8511556Srgrimes		    (chk_dev(lastdev, 1) == NULL))
8521556Srgrimes			goto bad;
8531556Srgrimes		break;
8541556Srgrimes	}
8551556Srgrimes
8561556Srgrimes	if ((lastdev <= 0) || ((dpt = (DLIST *)malloc(sizeof(DLIST))) == NULL))
8571556Srgrimes		goto bad;
8581556Srgrimes
8591556Srgrimes	/*
8601556Srgrimes	 * got a new device number, store it under this truncation pattern.
8611556Srgrimes	 * change the device number this file is being stored with.
8621556Srgrimes	 */
8631556Srgrimes	dpt->trunc_bits = trunc_bits;
8641556Srgrimes	dpt->dev = lastdev;
8651556Srgrimes	dpt->fow = pt->list;
8661556Srgrimes	pt->list = dpt;
8671556Srgrimes	arcn->sb.st_dev = lastdev;
8681556Srgrimes	arcn->sb.st_ino = nino;
8691556Srgrimes	return(0);
8701556Srgrimes
8711556Srgrimes    bad:
87276017Skris	paxwarn(1, "Unable to fix truncated inode/device field when storing %s",
8731556Srgrimes	    arcn->name);
87476017Skris	paxwarn(0, "Archive may create improper hard links when extracted");
8751556Srgrimes	return(0);
8761556Srgrimes}
8771556Srgrimes
8781556Srgrimes/*
8791556Srgrimes * directory access/mod time reset table routines (for directories READ by pax)
8801556Srgrimes *
8811556Srgrimes * The pax -t flag requires that access times of archive files to be the same
8821556Srgrimes * before being read by pax. For regular files, access time is restored after
8831556Srgrimes * the file has been copied. This database provides the same functionality for
8841556Srgrimes * directories read during file tree traversal. Restoring directory access time
8851556Srgrimes * is more complex than files since directories may be read several times until
8861556Srgrimes * all the descendants in their subtree are visited by fts. Directory access
8871556Srgrimes * and modification times are stored during the fts pre-order visit (done
8881556Srgrimes * before any descendants in the subtree is visited) and restored after the
8891556Srgrimes * fts post-order visit (after all the descendants have been visited). In the
8901556Srgrimes * case of premature exit from a subtree (like from the effects of -n), any
8911556Srgrimes * directory entries left in this database are reset during final cleanup
8921556Srgrimes * operations of pax. Entries are hashed by inode number for fast lookup.
8931556Srgrimes */
8941556Srgrimes
8951556Srgrimes/*
8961556Srgrimes * atdir_start()
8971556Srgrimes *	create the directory access time database for directories READ by pax.
8981556Srgrimes * Return:
8991556Srgrimes *	0 is created ok, -1 otherwise.
9001556Srgrimes */
9011556Srgrimes
9021556Srgrimesint
9031556Srgrimesatdir_start(void)
9041556Srgrimes{
9051556Srgrimes	if (atab != NULL)
9061556Srgrimes		return(0);
9071556Srgrimes 	if ((atab = (ATDIR **)calloc(A_TAB_SZ, sizeof(ATDIR *))) == NULL) {
90876019Skris		paxwarn(1,"Cannot allocate space for directory access time table");
90976019Skris		return(-1);
91076019Skris	}
9111556Srgrimes	return(0);
9121556Srgrimes}
9131556Srgrimes
9141556Srgrimes
9151556Srgrimes/*
9161556Srgrimes * atdir_end()
9171556Srgrimes *	walk through the directory access time table and reset the access time
9181556Srgrimes *	of any directory who still has an entry left in the database. These
9191556Srgrimes *	entries are for directories READ by pax
9201556Srgrimes */
9211556Srgrimes
9221556Srgrimesvoid
9231556Srgrimesatdir_end(void)
9241556Srgrimes{
92590113Simp	ATDIR *pt;
92690113Simp	int i;
9271556Srgrimes
9281556Srgrimes	if (atab == NULL)
9291556Srgrimes		return;
9301556Srgrimes	/*
9311556Srgrimes	 * for each non-empty hash table entry reset all the directories
9321556Srgrimes	 * chained there.
9331556Srgrimes	 */
9341556Srgrimes	for (i = 0; i < A_TAB_SZ; ++i) {
9351556Srgrimes		if ((pt = atab[i]) == NULL)
9361556Srgrimes			continue;
9371556Srgrimes		/*
9381556Srgrimes		 * remember to force the times, set_ftime() looks at pmtime
9391556Srgrimes		 * and patime, which only applies to things CREATED by pax,
9401556Srgrimes		 * not read by pax. Read time reset is controlled by -t.
9411556Srgrimes		 */
9421556Srgrimes		for (; pt != NULL; pt = pt->fow)
9431556Srgrimes			set_ftime(pt->name, pt->mtime, pt->atime, 1);
9441556Srgrimes	}
9451556Srgrimes}
9461556Srgrimes
9471556Srgrimes/*
9481556Srgrimes * add_atdir()
9491556Srgrimes *	add a directory to the directory access time table. Table is hashed
9501556Srgrimes *	and chained by inode number. This is for directories READ by pax
9511556Srgrimes */
9521556Srgrimes
9531556Srgrimesvoid
9541556Srgrimesadd_atdir(char *fname, dev_t dev, ino_t ino, time_t mtime, time_t atime)
9551556Srgrimes{
95690113Simp	ATDIR *pt;
95790113Simp	u_int indx;
9581556Srgrimes
9591556Srgrimes	if (atab == NULL)
9601556Srgrimes		return;
9611556Srgrimes
9621556Srgrimes	/*
9638855Srgrimes	 * make sure this directory is not already in the table, if so just
9641556Srgrimes	 * return (the older entry always has the correct time). The only
9651556Srgrimes	 * way this will happen is when the same subtree can be traversed by
9661556Srgrimes	 * different args to pax and the -n option is aborting fts out of a
9671556Srgrimes	 * subtree before all the post-order visits have been made).
9681556Srgrimes	 */
9691556Srgrimes	indx = ((unsigned)ino) % A_TAB_SZ;
9701556Srgrimes	if ((pt = atab[indx]) != NULL) {
9711556Srgrimes		while (pt != NULL) {
9721556Srgrimes			if ((pt->ino == ino) && (pt->dev == dev))
9731556Srgrimes				break;
9741556Srgrimes			pt = pt->fow;
9751556Srgrimes		}
9761556Srgrimes
9771556Srgrimes		/*
9781556Srgrimes		 * oops, already there. Leave it alone.
9791556Srgrimes		 */
9801556Srgrimes		if (pt != NULL)
9811556Srgrimes			return;
9821556Srgrimes	}
9831556Srgrimes
9841556Srgrimes	/*
9851556Srgrimes	 * add it to the front of the hash chain
9861556Srgrimes	 */
9871556Srgrimes	if ((pt = (ATDIR *)malloc(sizeof(ATDIR))) != NULL) {
9881556Srgrimes		if ((pt->name = strdup(fname)) != NULL) {
9891556Srgrimes			pt->dev = dev;
9901556Srgrimes			pt->ino = ino;
9911556Srgrimes			pt->mtime = mtime;
9921556Srgrimes			pt->atime = atime;
9931556Srgrimes			pt->fow = atab[indx];
9941556Srgrimes			atab[indx] = pt;
9951556Srgrimes			return;
9961556Srgrimes		}
997169993Sbrian		free(pt);
9981556Srgrimes	}
9991556Srgrimes
100076017Skris	paxwarn(1, "Directory access time reset table ran out of memory");
10011556Srgrimes	return;
10021556Srgrimes}
10031556Srgrimes
10041556Srgrimes/*
10051556Srgrimes * get_atdir()
10061556Srgrimes *	look up a directory by inode and device number to obtain the access
10071556Srgrimes *	and modification time you want to set to. If found, the modification
10081556Srgrimes *	and access time parameters are set and the entry is removed from the
10091556Srgrimes *	table (as it is no longer needed). These are for directories READ by
10101556Srgrimes *	pax
10111556Srgrimes * Return:
10121556Srgrimes *	0 if found, -1 if not found.
10131556Srgrimes */
10141556Srgrimes
10151556Srgrimesint
10161556Srgrimesget_atdir(dev_t dev, ino_t ino, time_t *mtime, time_t *atime)
10171556Srgrimes{
101890113Simp	ATDIR *pt;
101990113Simp	ATDIR **ppt;
102090113Simp	u_int indx;
10211556Srgrimes
10221556Srgrimes	if (atab == NULL)
10231556Srgrimes		return(-1);
10241556Srgrimes	/*
10251556Srgrimes	 * hash by inode and search the chain for an inode and device match
10261556Srgrimes	 */
10271556Srgrimes	indx = ((unsigned)ino) % A_TAB_SZ;
10281556Srgrimes	if ((pt = atab[indx]) == NULL)
10291556Srgrimes		return(-1);
10301556Srgrimes
10311556Srgrimes	ppt = &(atab[indx]);
10321556Srgrimes	while (pt != NULL) {
10331556Srgrimes		if ((pt->ino == ino) && (pt->dev == dev))
10341556Srgrimes			break;
10351556Srgrimes		/*
10361556Srgrimes		 * no match, go to next one
10371556Srgrimes		 */
10381556Srgrimes		ppt = &(pt->fow);
10391556Srgrimes		pt = pt->fow;
10401556Srgrimes	}
10411556Srgrimes
10421556Srgrimes	/*
10431556Srgrimes	 * return if we did not find it.
10441556Srgrimes	 */
10451556Srgrimes	if (pt == NULL)
10461556Srgrimes		return(-1);
10471556Srgrimes
10481556Srgrimes	/*
10491556Srgrimes	 * found it. return the times and remove the entry from the table.
10501556Srgrimes	 */
10511556Srgrimes	*ppt = pt->fow;
10521556Srgrimes	*mtime = pt->mtime;
10531556Srgrimes	*atime = pt->atime;
1054169993Sbrian	free(pt->name);
1055169993Sbrian	free(pt);
10561556Srgrimes	return(0);
10571556Srgrimes}
10581556Srgrimes
10591556Srgrimes/*
10601556Srgrimes * directory access mode and time storage routines (for directories CREATED
10611556Srgrimes * by pax).
10621556Srgrimes *
10631556Srgrimes * Pax requires that extracted directories, by default, have their access/mod
10641556Srgrimes * times and permissions set to the values specified in the archive. During the
10651556Srgrimes * actions of extracting (and creating the destination subtree during -rw copy)
10661556Srgrimes * directories extracted may be modified after being created. Even worse is
10671556Srgrimes * that these directories may have been created with file permissions which
10681556Srgrimes * prohibits any descendants of these directories from being extracted. When
10691556Srgrimes * directories are created by pax, access rights may be added to permit the
10701556Srgrimes * creation of files in their subtree. Every time pax creates a directory, the
10711556Srgrimes * times and file permissions specified by the archive are stored. After all
10721556Srgrimes * files have been extracted (or copied), these directories have their times
10731556Srgrimes * and file modes reset to the stored values. The directory info is restored in
10741556Srgrimes * reverse order as entries were added to the data file from root to leaf. To
10751556Srgrimes * restore atime properly, we must go backwards. The data file consists of
10761556Srgrimes * records with two parts, the file name followed by a DIRDATA trailer. The
10771556Srgrimes * fixed sized trailer contains the size of the name plus the off_t location in
10781556Srgrimes * the file. To restore we work backwards through the file reading the trailer
10791556Srgrimes * then the file name.
10801556Srgrimes */
10811556Srgrimes
10821556Srgrimes/*
10831556Srgrimes * dir_start()
10841556Srgrimes *	set up the directory time and file mode storage for directories CREATED
10851556Srgrimes *	by pax.
10861556Srgrimes * Return:
10871556Srgrimes *	0 if ok, -1 otherwise
10881556Srgrimes */
10891556Srgrimes
10901556Srgrimesint
10911556Srgrimesdir_start(void)
10921556Srgrimes{
109376019Skris
10941556Srgrimes	if (dirfd != -1)
10951556Srgrimes		return(0);
10961556Srgrimes
10971556Srgrimes	/*
10981556Srgrimes	 * unlink the file so it goes away at termination by itself
10991556Srgrimes	 */
110076016Skris	memcpy(tempbase, _TFILE_BASE, sizeof(_TFILE_BASE));
110176016Skris	if ((dirfd = mkstemp(tempfile)) >= 0) {
110276016Skris		(void)unlink(tempfile);
11031556Srgrimes		return(0);
11041556Srgrimes	}
110576017Skris	paxwarn(1, "Unable to create temporary file for directory times: %s",
110676016Skris	    tempfile);
11071556Srgrimes	return(-1);
11081556Srgrimes}
11091556Srgrimes
11101556Srgrimes/*
11111556Srgrimes * add_dir()
11121556Srgrimes *	add the mode and times for a newly CREATED directory
11131556Srgrimes *	name is name of the directory, psb the stat buffer with the data in it,
11141556Srgrimes *	frc_mode is a flag that says whether to force the setting of the mode
11151556Srgrimes *	(ignoring the user set values for preserving file mode). Frc_mode is
11168855Srgrimes *	for the case where we created a file and found that the resulting
11171556Srgrimes *	directory was not writeable and the user asked for file modes to NOT
11181556Srgrimes *	be preserved. (we have to preserve what was created by default, so we
11191556Srgrimes *	have to force the setting at the end. this is stated explicitly in the
11201556Srgrimes *	pax spec)
11211556Srgrimes */
11221556Srgrimes
11231556Srgrimesvoid
11241556Srgrimesadd_dir(char *name, int nlen, struct stat *psb, int frc_mode)
11251556Srgrimes{
11261556Srgrimes	DIRDATA dblk;
11271556Srgrimes
11281556Srgrimes	if (dirfd < 0)
11291556Srgrimes		return;
11301556Srgrimes
11311556Srgrimes	/*
11321556Srgrimes	 * get current position (where file name will start) so we can store it
11331556Srgrimes	 * in the trailer
11341556Srgrimes	 */
11351556Srgrimes	if ((dblk.npos = lseek(dirfd, 0L, SEEK_CUR)) < 0) {
113676017Skris		paxwarn(1,"Unable to store mode and times for directory: %s",name);
11371556Srgrimes		return;
11381556Srgrimes	}
11391556Srgrimes
11401556Srgrimes	/*
11411556Srgrimes	 * write the file name followed by the trailer
11421556Srgrimes	 */
11431556Srgrimes	dblk.nlen = nlen + 1;
11441556Srgrimes	dblk.mode = psb->st_mode & 0xffff;
11451556Srgrimes	dblk.mtime = psb->st_mtime;
11461556Srgrimes	dblk.atime = psb->st_atime;
11471556Srgrimes	dblk.frc_mode = frc_mode;
11481556Srgrimes	if ((write(dirfd, name, dblk.nlen) == dblk.nlen) &&
11491556Srgrimes	    (write(dirfd, (char *)&dblk, sizeof(dblk)) == sizeof(dblk))) {
11501556Srgrimes		++dircnt;
11511556Srgrimes		return;
11521556Srgrimes	}
11531556Srgrimes
115476017Skris	paxwarn(1,"Unable to store mode and times for created directory: %s",name);
11551556Srgrimes	return;
11561556Srgrimes}
11571556Srgrimes
11581556Srgrimes/*
11591556Srgrimes * proc_dir()
11601556Srgrimes *	process all file modes and times stored for directories CREATED
11611556Srgrimes *	by pax
11621556Srgrimes */
11631556Srgrimes
11641556Srgrimesvoid
11651556Srgrimesproc_dir(void)
11661556Srgrimes{
11671556Srgrimes	char name[PAXPATHLEN+1];
11681556Srgrimes	DIRDATA dblk;
11691556Srgrimes	u_long cnt;
11701556Srgrimes
11711556Srgrimes	if (dirfd < 0)
11721556Srgrimes		return;
11731556Srgrimes	/*
11741556Srgrimes	 * read backwards through the file and process each directory
11751556Srgrimes	 */
11761556Srgrimes	for (cnt = 0; cnt < dircnt; ++cnt) {
11771556Srgrimes		/*
11781556Srgrimes		 * read the trailer, then the file name, if this fails
11791556Srgrimes		 * just give up.
11801556Srgrimes		 */
11818855Srgrimes		if (lseek(dirfd, -((off_t)sizeof(dblk)), SEEK_CUR) < 0)
11821556Srgrimes			break;
11831556Srgrimes		if (read(dirfd,(char *)&dblk, sizeof(dblk)) != sizeof(dblk))
11841556Srgrimes			break;
11858855Srgrimes		if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
11861556Srgrimes			break;
11871556Srgrimes		if (read(dirfd, name, dblk.nlen) != dblk.nlen)
11881556Srgrimes			break;
11898855Srgrimes		if (lseek(dirfd, dblk.npos, SEEK_SET) < 0)
11901556Srgrimes			break;
11911556Srgrimes
11921556Srgrimes		/*
11931556Srgrimes		 * frc_mode set, make sure we set the file modes even if
11941556Srgrimes		 * the user didn't ask for it (see file_subs.c for more info)
11951556Srgrimes		 */
11961556Srgrimes		if (pmode || dblk.frc_mode)
11971556Srgrimes			set_pmode(name, dblk.mode);
11981556Srgrimes		if (patime || pmtime)
11991556Srgrimes			set_ftime(name, dblk.mtime, dblk.atime, 0);
12001556Srgrimes	}
12011556Srgrimes
12021556Srgrimes	(void)close(dirfd);
12031556Srgrimes	dirfd = -1;
12041556Srgrimes	if (cnt != dircnt)
120576017Skris		paxwarn(1,"Unable to set mode and times for created directories");
12061556Srgrimes	return;
12071556Srgrimes}
12081556Srgrimes
12091556Srgrimes/*
12101556Srgrimes * database independent routines
12111556Srgrimes */
12121556Srgrimes
12131556Srgrimes/*
12141556Srgrimes * st_hash()
12151556Srgrimes *	hashes filenames to a u_int for hashing into a table. Looks at the tail
12161556Srgrimes *	end of file, as this provides far better distribution than any other
12171556Srgrimes *	part of the name. For performance reasons we only care about the last
12181556Srgrimes *	MAXKEYLEN chars (should be at LEAST large enough to pick off the file
12191556Srgrimes *	name). Was tested on 500,000 name file tree traversal from the root
12201556Srgrimes *	and gave almost a perfectly uniform distribution of keys when used with
12211556Srgrimes *	prime sized tables (MAXKEYLEN was 128 in test). Hashes (sizeof int)
12221556Srgrimes *	chars at a time and pads with 0 for last addition.
12231556Srgrimes * Return:
12241556Srgrimes *	the hash value of the string MOD (%) the table size.
12251556Srgrimes */
12261556Srgrimes
12271556Srgrimesu_int
12281556Srgrimesst_hash(char *name, int len, int tabsz)
12291556Srgrimes{
123090113Simp	char *pt;
123190113Simp	char *dest;
123290113Simp	char *end;
123390113Simp	int i;
123490113Simp	u_int key = 0;
123590113Simp	int steps;
123690113Simp	int res;
12371556Srgrimes	u_int val;
12381556Srgrimes
12391556Srgrimes	/*
12401556Srgrimes	 * only look at the tail up to MAXKEYLEN, we do not need to waste
12411556Srgrimes	 * time here (remember these are pathnames, the tail is what will
12421556Srgrimes	 * spread out the keys)
12431556Srgrimes	 */
12441556Srgrimes	if (len > MAXKEYLEN) {
124576019Skris		pt = &(name[len - MAXKEYLEN]);
12461556Srgrimes		len = MAXKEYLEN;
12471556Srgrimes	} else
12481556Srgrimes		pt = name;
12491556Srgrimes
12501556Srgrimes	/*
12511556Srgrimes	 * calculate the number of u_int size steps in the string and if
12521556Srgrimes	 * there is a runt to deal with
12531556Srgrimes	 */
12541556Srgrimes	steps = len/sizeof(u_int);
12551556Srgrimes	res = len % sizeof(u_int);
12561556Srgrimes
12571556Srgrimes	/*
12581556Srgrimes	 * add up the value of the string in unsigned integer sized pieces
12591556Srgrimes	 * too bad we cannot have unsigned int aligned strings, then we
12601556Srgrimes	 * could avoid the expensive copy.
12611556Srgrimes	 */
12621556Srgrimes	for (i = 0; i < steps; ++i) {
12631556Srgrimes		end = pt + sizeof(u_int);
12641556Srgrimes		dest = (char *)&val;
12651556Srgrimes		while (pt < end)
12661556Srgrimes			*dest++ = *pt++;
12671556Srgrimes		key += val;
12681556Srgrimes	}
12691556Srgrimes
12701556Srgrimes	/*
12711556Srgrimes	 * add in the runt padded with zero to the right
12721556Srgrimes	 */
12731556Srgrimes	if (res) {
12741556Srgrimes		val = 0;
12751556Srgrimes		end = pt + res;
12761556Srgrimes		dest = (char *)&val;
12771556Srgrimes		while (pt < end)
12781556Srgrimes			*dest++ = *pt++;
12791556Srgrimes		key += val;
12801556Srgrimes	}
12811556Srgrimes
12821556Srgrimes	/*
12831556Srgrimes	 * return the result mod the table size
12841556Srgrimes	 */
12851556Srgrimes	return(key % tabsz);
12861556Srgrimes}
1287