pat_rep.c revision 284008
1/*-
2 * Copyright (c) 1992 Keith Muller.
3 * Copyright (c) 1992, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Keith Muller of the University of California, San Diego.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35#if 0
36static char sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
37#endif
38#endif /* not lint */
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: stable/10/bin/pax/pat_rep.c 284008 2015-06-05 00:39:34Z delphij $");
41
42#include <sys/types.h>
43#include <sys/stat.h>
44#include <stdio.h>
45#include <string.h>
46#include <stdlib.h>
47#ifdef NET2_REGEX
48#include <regexp.h>
49#else
50#include <regex.h>
51#endif
52#include "pax.h"
53#include "pat_rep.h"
54#include "extern.h"
55
56/*
57 * routines to handle pattern matching, name modification (regular expression
58 * substitution and interactive renames), and destination name modification for
59 * copy (-rw). Both file name and link names are adjusted as required in these
60 * routines.
61 */
62
63#define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
64static PATTERN *pathead = NULL;		/* file pattern match list head */
65static PATTERN *pattail = NULL;		/* file pattern match list tail */
66static REPLACE *rephead = NULL;		/* replacement string list head */
67static REPLACE *reptail = NULL;		/* replacement string list tail */
68
69static int rep_name(char *, int *, int);
70static int tty_rename(ARCHD *);
71static int fix_path(char *, int *, char *, int);
72static int fn_match(char *, char *, char **);
73static char * range_match(char *, int);
74#ifdef NET2_REGEX
75static int resub(regexp *, char *, char *, char *);
76#else
77static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
78#endif
79
80/*
81 * rep_add()
82 *	parses the -s replacement string; compiles the regular expression
83 *	and stores the compiled value and it's replacement string together in
84 *	replacement string list. Input to this function is of the form:
85 *		/old/new/pg
86 *	The first char in the string specifies the delimiter used by this
87 *	replacement string. "Old" is a regular expression in "ed" format which
88 *	is compiled by regcomp() and is applied to filenames. "new" is the
89 *	substitution string; p and g are options flags for printing and global
90 *	replacement (over the single filename)
91 * Return:
92 *	0 if a proper replacement string and regular expression was added to
93 *	the list of replacement patterns; -1 otherwise.
94 */
95
96int
97rep_add(char *str)
98{
99	char *pt1;
100	char *pt2;
101	REPLACE *rep;
102#	ifndef NET2_REGEX
103	int res;
104	char rebuf[BUFSIZ];
105#	endif
106
107	/*
108	 * throw out the bad parameters
109	 */
110	if ((str == NULL) || (*str == '\0')) {
111		paxwarn(1, "Empty replacement string");
112		return(-1);
113	}
114
115	/*
116	 * first character in the string specifies what the delimiter is for
117	 * this expression
118	 */
119	if ((pt1 = strchr(str+1, *str)) == NULL) {
120		paxwarn(1, "Invalid replacement string %s", str);
121		return(-1);
122	}
123
124	/*
125	 * allocate space for the node that handles this replacement pattern
126	 * and split out the regular expression and try to compile it
127	 */
128	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
129		paxwarn(1, "Unable to allocate memory for replacement string");
130		return(-1);
131	}
132
133	*pt1 = '\0';
134#	ifdef NET2_REGEX
135	if ((rep->rcmp = regcomp(str+1)) == NULL) {
136#	else
137	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
138		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
139		paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
140#	endif
141		free(rep);
142		return(-1);
143	}
144
145	/*
146	 * put the delimiter back in case we need an error message and
147	 * locate the delimiter at the end of the replacement string
148	 * we then point the node at the new substitution string
149	 */
150	*pt1++ = *str;
151	if ((pt2 = strchr(pt1, *str)) == NULL) {
152#		ifdef NET2_REGEX
153		free(rep->rcmp);
154#		else
155		regfree(&rep->rcmp);
156#		endif
157		free(rep);
158		paxwarn(1, "Invalid replacement string %s", str);
159		return(-1);
160	}
161
162	*pt2 = '\0';
163	rep->nstr = pt1;
164	pt1 = pt2++;
165	rep->flgs = 0;
166
167	/*
168	 * set the options if any
169	 */
170	while (*pt2 != '\0') {
171		switch(*pt2) {
172		case 'g':
173		case 'G':
174			rep->flgs  |= GLOB;
175			break;
176		case 'p':
177		case 'P':
178			rep->flgs  |= PRNT;
179			break;
180		default:
181#			ifdef NET2_REGEX
182			free(rep->rcmp);
183#			else
184			regfree(&rep->rcmp);
185#			endif
186			free(rep);
187			*pt1 = *str;
188			paxwarn(1, "Invalid replacement string option %s", str);
189			return(-1);
190		}
191		++pt2;
192	}
193
194	/*
195	 * all done, link it in at the end
196	 */
197	rep->fow = NULL;
198	if (rephead == NULL) {
199		reptail = rephead = rep;
200		return(0);
201	}
202	reptail->fow = rep;
203	reptail = rep;
204	return(0);
205}
206
207/*
208 * pat_add()
209 *	add a pattern match to the pattern match list. Pattern matches are used
210 *	to select which archive members are extracted. (They appear as
211 *	arguments to pax in the list and read modes). If no patterns are
212 *	supplied to pax, all members in the archive will be selected (and the
213 *	pattern match list is empty).
214 * Return:
215 *	0 if the pattern was added to the list, -1 otherwise
216 */
217
218int
219pat_add(char *str, char *chdnam)
220{
221	PATTERN *pt;
222
223	/*
224	 * throw out the junk
225	 */
226	if ((str == NULL) || (*str == '\0')) {
227		paxwarn(1, "Empty pattern string");
228		return(-1);
229	}
230
231	/*
232	 * allocate space for the pattern and store the pattern. the pattern is
233	 * part of argv so do not bother to copy it, just point at it. Add the
234	 * node to the end of the pattern list
235	 */
236	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
237		paxwarn(1, "Unable to allocate memory for pattern string");
238		return(-1);
239	}
240
241	pt->pstr = str;
242	pt->pend = NULL;
243	pt->plen = strlen(str);
244	pt->fow = NULL;
245	pt->flgs = 0;
246	pt->chdname = chdnam;
247
248	if (pathead == NULL) {
249		pattail = pathead = pt;
250		return(0);
251	}
252	pattail->fow = pt;
253	pattail = pt;
254	return(0);
255}
256
257/*
258 * pat_chk()
259 *	complain if any the user supplied pattern did not result in a match to
260 *	a selected archive member.
261 */
262
263void
264pat_chk(void)
265{
266	PATTERN *pt;
267	int wban = 0;
268
269	/*
270	 * walk down the list checking the flags to make sure MTCH was set,
271	 * if not complain
272	 */
273	for (pt = pathead; pt != NULL; pt = pt->fow) {
274		if (pt->flgs & MTCH)
275			continue;
276		if (!wban) {
277			paxwarn(1, "WARNING! These patterns were not matched:");
278			++wban;
279		}
280		(void)fprintf(stderr, "%s\n", pt->pstr);
281	}
282}
283
284/*
285 * pat_sel()
286 *	the archive member which matches a pattern was selected. Mark the
287 *	pattern as having selected an archive member. arcn->pat points at the
288 *	pattern that was matched. arcn->pat is set in pat_match()
289 *
290 *	NOTE: When the -c option is used, we are called when there was no match
291 *	by pat_match() (that means we did match before the inverted sense of
292 *	the logic). Now this seems really strange at first, but with -c  we
293 *	need to keep track of those patterns that cause an archive member to NOT
294 *	be selected (it found an archive member with a specified pattern)
295 * Return:
296 *	0 if the pattern pointed at by arcn->pat was tagged as creating a
297 *	match, -1 otherwise.
298 */
299
300int
301pat_sel(ARCHD *arcn)
302{
303	PATTERN *pt;
304	PATTERN **ppt;
305	int len;
306
307	/*
308	 * if no patterns just return
309	 */
310	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
311		return(0);
312
313	/*
314	 * when we are NOT limited to a single match per pattern mark the
315	 * pattern and return
316	 */
317	if (!nflag) {
318		pt->flgs |= MTCH;
319		return(0);
320	}
321
322	/*
323	 * we reach this point only when we allow a single selected match per
324	 * pattern, if the pattern matches a directory and we do not have -d
325	 * (dflag) we are done with this pattern. We may also be handed a file
326	 * in the subtree of a directory. in that case when we are operating
327	 * with -d, this pattern was already selected and we are done
328	 */
329	if (pt->flgs & DIR_MTCH)
330		return(0);
331
332	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
333		/*
334		 * ok we matched a directory and we are allowing
335		 * subtree matches but because of the -n only its children will
336		 * match. This is tagged as a DIR_MTCH type.
337		 * WATCH IT, the code assumes that pt->pend points
338		 * into arcn->name and arcn->name has not been modified.
339		 * If not we will have a big mess. Yup this is another kludge
340		 */
341
342		/*
343		 * if this was a prefix match, remove trailing part of path
344		 * so we can copy it. Future matches will be exact prefix match
345		 */
346		if (pt->pend != NULL)
347			*pt->pend = '\0';
348
349		if ((pt->pstr = strdup(arcn->name)) == NULL) {
350			paxwarn(1, "Pattern select out of memory");
351			if (pt->pend != NULL)
352				*pt->pend = '/';
353			pt->pend = NULL;
354			return(-1);
355		}
356
357		/*
358		 * put the trailing / back in the source string
359		 */
360		if (pt->pend != NULL) {
361			*pt->pend = '/';
362			pt->pend = NULL;
363		}
364		pt->plen = strlen(pt->pstr);
365
366		/*
367		 * strip off any trailing /, this should really never happen
368		 */
369		len = pt->plen - 1;
370		if (*(pt->pstr + len) == '/') {
371			*(pt->pstr + len) = '\0';
372			pt->plen = len;
373		}
374		pt->flgs = DIR_MTCH | MTCH;
375		arcn->pat = pt;
376		return(0);
377	}
378
379	/*
380	 * we are then done with this pattern, so we delete it from the list
381	 * because it can never be used for another match.
382	 * Seems kind of strange to do for a -c, but the pax spec is really
383	 * vague on the interaction of -c -n and -d. We assume that when -c
384	 * and the pattern rejects a member (i.e. it matched it) it is done.
385	 * In effect we place the order of the flags as having -c last.
386	 */
387	pt = pathead;
388	ppt = &pathead;
389	while ((pt != NULL) && (pt != arcn->pat)) {
390		ppt = &(pt->fow);
391		pt = pt->fow;
392	}
393
394	if (pt == NULL) {
395		/*
396		 * should never happen....
397		 */
398		paxwarn(1, "Pattern list inconsistent");
399		return(-1);
400	}
401	*ppt = pt->fow;
402	free(pt);
403	arcn->pat = NULL;
404	return(0);
405}
406
407/*
408 * pat_match()
409 *	see if this archive member matches any supplied pattern, if a match
410 *	is found, arcn->pat is set to point at the potential pattern. Later if
411 *	this archive member is "selected" we process and mark the pattern as
412 *	one which matched a selected archive member (see pat_sel())
413 * Return:
414 *	0 if this archive member should be processed, 1 if it should be
415 *	skipped and -1 if we are done with all patterns (and pax should quit
416 *	looking for more members)
417 */
418
419int
420pat_match(ARCHD *arcn)
421{
422	PATTERN *pt;
423
424	arcn->pat = NULL;
425
426	/*
427	 * if there are no more patterns and we have -n (and not -c) we are
428	 * done. otherwise with no patterns to match, matches all
429	 */
430	if (pathead == NULL) {
431		if (nflag && !cflag)
432			return(-1);
433		return(0);
434	}
435
436	/*
437	 * have to search down the list one at a time looking for a match.
438	 */
439	pt = pathead;
440	while (pt != NULL) {
441		/*
442		 * check for a file name match unless we have DIR_MTCH set in
443		 * this pattern then we want a prefix match
444		 */
445		if (pt->flgs & DIR_MTCH) {
446			/*
447			 * this pattern was matched before to a directory
448			 * as we must have -n set for this (but not -d). We can
449			 * only match CHILDREN of that directory so we must use
450			 * an exact prefix match (no wildcards).
451			 */
452			if ((arcn->name[pt->plen] == '/') &&
453			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
454				break;
455		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
456			break;
457		pt = pt->fow;
458	}
459
460	/*
461	 * return the result, remember that cflag (-c) inverts the sense of a
462	 * match
463	 */
464	if (pt == NULL)
465		return(cflag ? 0 : 1);
466
467	/*
468	 * We had a match, now when we invert the sense (-c) we reject this
469	 * member. However we have to tag the pattern a being successful, (in a
470	 * match, not in selecting an archive member) so we call pat_sel() here.
471	 */
472	arcn->pat = pt;
473	if (!cflag)
474		return(0);
475
476	if (pat_sel(arcn) < 0)
477		return(-1);
478	arcn->pat = NULL;
479	return(1);
480}
481
482/*
483 * fn_match()
484 * Return:
485 *	0 if this archive member should be processed, 1 if it should be
486 *	skipped and -1 if we are done with all patterns (and pax should quit
487 *	looking for more members)
488 *	Note: *pend may be changed to show where the prefix ends.
489 */
490
491static int
492fn_match(char *pattern, char *string, char **pend)
493{
494	char c;
495	char test;
496
497	*pend = NULL;
498	for (;;) {
499		switch (c = *pattern++) {
500		case '\0':
501			/*
502			 * Ok we found an exact match
503			 */
504			if (*string == '\0')
505				return(0);
506
507			/*
508			 * Check if it is a prefix match
509			 */
510			if ((dflag == 1) || (*string != '/'))
511				return(-1);
512
513			/*
514			 * It is a prefix match, remember where the trailing
515			 * / is located
516			 */
517			*pend = string;
518			return(0);
519		case '?':
520			if ((test = *string++) == '\0')
521				return (-1);
522			break;
523		case '*':
524			c = *pattern;
525			/*
526			 * Collapse multiple *'s.
527			 */
528			while (c == '*')
529				c = *++pattern;
530
531			/*
532			 * Optimized hack for pattern with a * at the end
533			 */
534			if (c == '\0')
535				return (0);
536
537			/*
538			 * General case, use recursion.
539			 */
540			while ((test = *string) != '\0') {
541				if (!fn_match(pattern, string, pend))
542					return (0);
543				++string;
544			}
545			return (-1);
546		case '[':
547			/*
548			 * range match
549			 */
550			if (((test = *string++) == '\0') ||
551			    ((pattern = range_match(pattern, test)) == NULL))
552				return (-1);
553			break;
554		case '\\':
555		default:
556			if (c != *string++)
557				return (-1);
558			break;
559		}
560	}
561	/* NOTREACHED */
562}
563
564static char *
565range_match(char *pattern, int test)
566{
567	char c;
568	char c2;
569	int negate;
570	int ok = 0;
571
572	if ((negate = (*pattern == '!')) != 0)
573		++pattern;
574
575	while ((c = *pattern++) != ']') {
576		/*
577		 * Illegal pattern
578		 */
579		if (c == '\0')
580			return (NULL);
581
582		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
583		    (c2 != ']')) {
584			if ((c <= test) && (test <= c2))
585				ok = 1;
586			pattern += 2;
587		} else if (c == test)
588			ok = 1;
589	}
590	return (ok == negate ? NULL : pattern);
591}
592
593/*
594 * mod_name()
595 *	modify a selected file name. first attempt to apply replacement string
596 *	expressions, then apply interactive file rename. We apply replacement
597 *	string expressions to both filenames and file links (if we didn't the
598 *	links would point to the wrong place, and we could never be able to
599 *	move an archive that has a file link in it). When we rename files
600 *	interactively, we store that mapping (old name to user input name) so
601 *	if we spot any file links to the old file name in the future, we will
602 *	know exactly how to fix the file link.
603 * Return:
604 *	0 continue to  process file, 1 skip this file, -1 pax is finished
605 */
606
607int
608mod_name(ARCHD *arcn)
609{
610	int res = 0;
611
612	/*
613	 * Strip off leading '/' if appropriate.
614	 * Currently, this option is only set for the tar format.
615	 */
616	if (rmleadslash && arcn->name[0] == '/') {
617		if (arcn->name[1] == '\0') {
618			arcn->name[0] = '.';
619		} else {
620			(void)memmove(arcn->name, &arcn->name[1],
621			    strlen(arcn->name));
622			arcn->nlen--;
623		}
624		if (rmleadslash < 2) {
625			rmleadslash = 2;
626			paxwarn(0, "Removing leading / from absolute path names in the archive");
627		}
628	}
629	if (rmleadslash && arcn->ln_name[0] == '/' &&
630	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
631		if (arcn->ln_name[1] == '\0') {
632			arcn->ln_name[0] = '.';
633		} else {
634			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
635			    strlen(arcn->ln_name));
636			arcn->ln_nlen--;
637		}
638		if (rmleadslash < 2) {
639			rmleadslash = 2;
640			paxwarn(0, "Removing leading / from absolute path names in the archive");
641		}
642	}
643
644	/*
645	 * IMPORTANT: We have a problem. what do we do with symlinks?
646	 * Modifying a hard link name makes sense, as we know the file it
647	 * points at should have been seen already in the archive (and if it
648	 * wasn't seen because of a read error or a bad archive, we lose
649	 * anyway). But there are no such requirements for symlinks. On one
650	 * hand the symlink that refers to a file in the archive will have to
651	 * be modified to so it will still work at its new location in the
652	 * file system. On the other hand a symlink that points elsewhere (and
653	 * should continue to do so) should not be modified. There is clearly
654	 * no perfect solution here. So we handle them like hardlinks. Clearly
655	 * a replacement made by the interactive rename mapping is very likely
656	 * to be correct since it applies to a single file and is an exact
657	 * match. The regular expression replacements are a little harder to
658	 * justify though. We claim that the symlink name is only likely
659	 * to be replaced when it points within the file tree being moved and
660	 * in that case it should be modified. what we really need to do is to
661	 * call an oracle here. :)
662	 */
663	if (rephead != NULL) {
664		/*
665		 * we have replacement strings, modify the name and the link
666		 * name if any.
667		 */
668		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
669			return(res);
670
671		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
672		    (arcn->type == PAX_HRG)) &&
673		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
674			return(res);
675	}
676
677	if (iflag) {
678		/*
679		 * perform interactive file rename, then map the link if any
680		 */
681		if ((res = tty_rename(arcn)) != 0)
682			return(res);
683		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
684		    (arcn->type == PAX_HRG))
685			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
686	}
687	return(res);
688}
689
690/*
691 * tty_rename()
692 *	Prompt the user for a replacement file name. A "." keeps the old name,
693 *	a empty line skips the file, and an EOF on reading the tty, will cause
694 *	pax to stop processing and exit. Otherwise the file name input, replaces
695 *	the old one.
696 * Return:
697 *	0 process this file, 1 skip this file, -1 we need to exit pax
698 */
699
700static int
701tty_rename(ARCHD *arcn)
702{
703	char tmpname[PAXPATHLEN+2];
704	int res;
705
706	/*
707	 * prompt user for the replacement name for a file, keep trying until
708	 * we get some reasonable input. Archives may have more than one file
709	 * on them with the same name (from updates etc). We print verbose info
710	 * on the file so the user knows what is up.
711	 */
712	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
713
714	for (;;) {
715		ls_tty(arcn);
716		tty_prnt("Input new name, or a \".\" to keep the old name, ");
717		tty_prnt("or a \"return\" to skip this file.\n");
718		tty_prnt("Input > ");
719		if (tty_read(tmpname, sizeof(tmpname)) < 0)
720			return(-1);
721		if (strcmp(tmpname, "..") == 0) {
722			tty_prnt("Try again, illegal file name: ..\n");
723			continue;
724		}
725		if (strlen(tmpname) > PAXPATHLEN) {
726			tty_prnt("Try again, file name too long\n");
727			continue;
728		}
729		break;
730	}
731
732	/*
733	 * empty file name, skips this file. a "." leaves it alone
734	 */
735	if (tmpname[0] == '\0') {
736		tty_prnt("Skipping file.\n");
737		return(1);
738	}
739	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
740		tty_prnt("Processing continues, name unchanged.\n");
741		return(0);
742	}
743
744	/*
745	 * ok the name changed. We may run into links that point at this
746	 * file later. we have to remember where the user sent the file
747	 * in order to repair any links.
748	 */
749	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
750	res = add_name(arcn->name, arcn->nlen, tmpname);
751	arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
752	arcn->name[arcn->nlen] = '\0';
753	if (res < 0)
754		return(-1);
755	return(0);
756}
757
758/*
759 * set_dest()
760 *	fix up the file name and the link name (if any) so this file will land
761 *	in the destination directory (used during copy() -rw).
762 * Return:
763 *	0 if ok, -1 if failure (name too long)
764 */
765
766int
767set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
768{
769	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
770		return(-1);
771
772	/*
773	 * It is really hard to deal with symlinks here, we cannot be sure
774	 * if the name they point was moved (or will be moved). It is best to
775	 * leave them alone.
776	 */
777	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
778		return(0);
779
780	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
781		return(-1);
782	return(0);
783}
784
785/*
786 * fix_path
787 *	concatenate dir_name and or_name and store the result in or_name (if
788 *	it fits). This is one ugly function.
789 * Return:
790 *	0 if ok, -1 if the final name is too long
791 */
792
793static int
794fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
795{
796	char *src;
797	char *dest;
798	char *start;
799	int len;
800
801	/*
802	 * we shift the or_name to the right enough to tack in the dir_name
803	 * at the front. We make sure we have enough space for it all before
804	 * we start. since dest always ends in a slash, we skip of or_name
805	 * if it also starts with one.
806	 */
807	start = or_name;
808	src = start + *or_len;
809	dest = src + dir_len;
810	if (*start == '/') {
811		++start;
812		--dest;
813	}
814	if ((len = dest - or_name) > PAXPATHLEN) {
815		paxwarn(1, "File name %s/%s, too long", dir_name, start);
816		return(-1);
817	}
818	*or_len = len;
819
820	/*
821	 * enough space, shift
822	 */
823	while (src >= start)
824		*dest-- = *src--;
825	src = dir_name + dir_len - 1;
826
827	/*
828	 * splice in the destination directory name
829	 */
830	while (src >= dir_name)
831		*dest-- = *src--;
832
833	*(or_name + len) = '\0';
834	return(0);
835}
836
837/*
838 * rep_name()
839 *	walk down the list of replacement strings applying each one in order.
840 *	when we find one with a successful substitution, we modify the name
841 *	as specified. if required, we print the results. if the resulting name
842 *	is empty, we will skip this archive member. We use the regexp(3)
843 *	routines (regexp() ought to win a prize as having the most cryptic
844 *	library function manual page).
845 *	--Parameters--
846 *	name is the file name we are going to apply the regular expressions to
847 *	(and may be modified)
848 *	nlen is the length of this name (and is modified to hold the length of
849 *	the final string).
850 *	prnt is a flag that says whether to print the final result.
851 * Return:
852 *	0 if substitution was successful, 1 if we are to skip the file (the name
853 *	ended up empty)
854 */
855
856static int
857rep_name(char *name, int *nlen, int prnt)
858{
859	REPLACE *pt;
860	char *inpt;
861	char *outpt;
862	char *endpt;
863	char *rpt;
864	int found = 0;
865	int res;
866#	ifndef NET2_REGEX
867	regmatch_t pm[MAXSUBEXP];
868#	endif
869	char nname[PAXPATHLEN+1];	/* final result of all replacements */
870	char buf1[PAXPATHLEN+1];	/* where we work on the name */
871
872	/*
873	 * copy the name into buf1, where we will work on it. We need to keep
874	 * the orig string around so we can print out the result of the final
875	 * replacement. We build up the final result in nname. inpt points at
876	 * the string we apply the regular expression to. prnt is used to
877	 * suppress printing when we handle replacements on the link field
878	 * (the user already saw that substitution go by)
879	 */
880	pt = rephead;
881	(void)strcpy(buf1, name);
882	inpt = buf1;
883	outpt = nname;
884	endpt = outpt + PAXPATHLEN;
885
886	/*
887	 * try each replacement string in order
888	 */
889	while (pt != NULL) {
890		do {
891			/*
892			 * check for a successful substitution, if not go to
893			 * the next pattern, or cleanup if we were global
894			 */
895#			ifdef NET2_REGEX
896			if (regexec(pt->rcmp, inpt) == 0)
897#			else
898			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
899#			endif
900				break;
901
902			/*
903			 * ok we found one. We have three parts, the prefix
904			 * which did not match, the section that did and the
905			 * tail (that also did not match). Copy the prefix to
906			 * the final output buffer (watching to make sure we
907			 * do not create a string too long).
908			 */
909			found = 1;
910#			ifdef NET2_REGEX
911			rpt = pt->rcmp->startp[0];
912#			else
913			rpt = inpt + pm[0].rm_so;
914#			endif
915
916			while ((inpt < rpt) && (outpt < endpt))
917				*outpt++ = *inpt++;
918			if (outpt == endpt)
919				break;
920
921			/*
922			 * for the second part (which matched the regular
923			 * expression) apply the substitution using the
924			 * replacement string and place it the prefix in the
925			 * final output. If we have problems, skip it.
926			 */
927#			ifdef NET2_REGEX
928			if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
929#			else
930			if ((res = resub(&(pt->rcmp),pm,inpt,pt->nstr,outpt,endpt))
931			    < 0) {
932#			endif
933				if (prnt)
934					paxwarn(1, "Replacement name error %s",
935					    name);
936				return(1);
937			}
938			outpt += res;
939
940			/*
941			 * we set up to look again starting at the first
942			 * character in the tail (of the input string right
943			 * after the last character matched by the regular
944			 * expression (inpt always points at the first char in
945			 * the string to process). If we are not doing a global
946			 * substitution, we will use inpt to copy the tail to
947			 * the final result. Make sure we do not overrun the
948			 * output buffer
949			 */
950#			ifdef NET2_REGEX
951			inpt = pt->rcmp->endp[0];
952#			else
953			inpt += pm[0].rm_eo - pm[0].rm_so;
954#			endif
955
956			if ((outpt == endpt) || (*inpt == '\0'))
957				break;
958
959			/*
960			 * if the user wants global we keep trying to
961			 * substitute until it fails, then we are done.
962			 */
963		} while (pt->flgs & GLOB);
964
965		if (found)
966			break;
967
968		/*
969		 * a successful substitution did NOT occur, try the next one
970		 */
971		pt = pt->fow;
972	}
973
974	if (found) {
975		/*
976		 * we had a substitution, copy the last tail piece (if there is
977		 * room) to the final result
978		 */
979		while ((outpt < endpt) && (*inpt != '\0'))
980			*outpt++ = *inpt++;
981
982		*outpt = '\0';
983		if ((outpt == endpt) && (*inpt != '\0')) {
984			if (prnt)
985				paxwarn(1,"Replacement name too long %s >> %s",
986				    name, nname);
987			return(1);
988		}
989
990		/*
991		 * inform the user of the result if wanted
992		 */
993		if (prnt && (pt->flgs & PRNT)) {
994			if (*nname == '\0')
995				(void)fprintf(stderr,"%s >> <empty string>\n",
996				    name);
997			else
998				(void)fprintf(stderr,"%s >> %s\n", name, nname);
999		}
1000
1001		/*
1002		 * if empty inform the caller this file is to be skipped
1003		 * otherwise copy the new name over the orig name and return
1004		 */
1005		if (*nname == '\0')
1006			return(1);
1007		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1008		name[PAXPATHLEN] = '\0';
1009	}
1010	return(0);
1011}
1012
1013#ifdef NET2_REGEX
1014/*
1015 * resub()
1016 *	apply the replacement to the matched expression. expand out the old
1017 * 	style ed(1) subexpression expansion.
1018 * Return:
1019 *	-1 if error, or the number of characters added to the destination.
1020 */
1021
1022static int
1023resub(regexp *prog, char *src, char *dest, char *destend)
1024{
1025	char *spt;
1026	char *dpt;
1027	char c;
1028	int no;
1029	int len;
1030
1031	spt = src;
1032	dpt = dest;
1033	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1034		if (c == '&')
1035			no = 0;
1036		else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1037			no = *spt++ - '0';
1038		else {
1039 			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1040 				c = *spt++;
1041 			*dpt++ = c;
1042			continue;
1043		}
1044 		if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1045		    ((len = prog->endp[no] - prog->startp[no]) <= 0))
1046			continue;
1047
1048		/*
1049		 * copy the subexpression to the destination.
1050		 * fail if we run out of space or the match string is damaged
1051		 */
1052		if (len > (destend - dpt))
1053			len = destend - dpt;
1054		if (l_strncpy(dpt, prog->startp[no], len) != len)
1055			return(-1);
1056		dpt += len;
1057	}
1058	return(dpt - dest);
1059}
1060
1061#else
1062
1063/*
1064 * resub()
1065 *	apply the replacement to the matched expression. expand out the old
1066 * 	style ed(1) subexpression expansion.
1067 * Return:
1068 *	-1 if error, or the number of characters added to the destination.
1069 */
1070
1071static int
1072resub(regex_t *rp, regmatch_t *pm, char *orig, char *src, char *dest,
1073	char *destend)
1074{
1075	char *spt;
1076	char *dpt;
1077	char c;
1078	regmatch_t *pmpt;
1079	int len;
1080	int subexcnt;
1081
1082	spt =  src;
1083	dpt = dest;
1084	subexcnt = rp->re_nsub;
1085	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1086		/*
1087		 * see if we just have an ordinary replacement character
1088		 * or we refer to a subexpression.
1089		 */
1090		if (c == '&') {
1091			pmpt = pm;
1092		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1093			/*
1094			 * make sure there is a subexpression as specified
1095			 */
1096			if ((len = *spt++ - '0') > subexcnt)
1097				return(-1);
1098			pmpt = pm + len;
1099		} else {
1100 			/*
1101			 * Ordinary character, just copy it
1102			 */
1103 			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1104 				c = *spt++;
1105 			*dpt++ = c;
1106			continue;
1107		}
1108
1109		/*
1110		 * continue if the subexpression is bogus
1111		 */
1112		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1113		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1114			continue;
1115
1116		/*
1117		 * copy the subexpression to the destination.
1118		 * fail if we run out of space or the match string is damaged
1119		 */
1120		if (len > (destend - dpt))
1121			len = destend - dpt;
1122		if (l_strncpy(dpt, orig + pmpt->rm_so, len) != len)
1123			return(-1);
1124		dpt += len;
1125	}
1126	return(dpt - dest);
1127}
1128#endif
1129