pat_rep.c revision 28904
1/*-
2 * Copyright (c) 1992 Keith Muller.
3 * Copyright (c) 1992, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Keith Muller of the University of California, San Diego.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	$Id: pat_rep.c,v 1.9 1997/06/02 06:30:06 charnier Exp $
38 */
39
40#ifndef lint
41static char const sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
42#endif /* not lint */
43
44#include <sys/types.h>
45#include <sys/time.h>
46#include <sys/stat.h>
47#include <sys/param.h>
48#include <stdio.h>
49#include <string.h>
50#include <unistd.h>
51#include <stdlib.h>
52#ifdef NET2_REGEX
53#include <regexp.h>
54#else
55#include <regex.h>
56#endif
57#include "pax.h"
58#include "pat_rep.h"
59#include "extern.h"
60
61/*
62 * routines to handle pattern matching, name modification (regular expression
63 * substitution and interactive renames), and destination name modification for
64 * copy (-rw). Both file name and link names are adjusted as required in these
65 * routines.
66 */
67
68#define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
69static PATTERN *pathead = NULL;		/* file pattern match list head */
70static PATTERN *pattail = NULL;		/* file pattern match list tail */
71static REPLACE *rephead = NULL;		/* replacement string list head */
72static REPLACE *reptail = NULL;		/* replacement string list tail */
73
74static int rep_name __P((char *, int *, int));
75static int tty_rename __P((register ARCHD *));
76static int fix_path __P((char *, int *, char *, int));
77static int fn_match __P((register char *, register char *, char **));
78static char * range_match __P((register char *, register int));
79#ifdef NET2_REGEX
80static int resub __P((regexp *, char *, char *, register char *));
81#else
82static int resub __P((regex_t *, regmatch_t *, char *, char *, char *));
83#endif
84
85/*
86 * rep_add()
87 *	parses the -s replacement string; compiles the regular expression
88 *	and stores the compiled value and it's replacement string together in
89 *	replacement string list. Input to this function is of the form:
90 *		/old/new/pg
91 *	The first char in the string specifies the delimiter used by this
92 *	replacement string. "Old" is a regular expression in "ed" format which
93 *	is compiled by regcomp() and is applied to filenames. "new" is the
94 *	substitution string; p and g are options flags for printing and global
95 *	replacement (over the single filename)
96 * Return:
97 *	0 if a proper replacement string and regular expression was added to
98 *	the list of replacement patterns; -1 otherwise.
99 */
100
101#if __STDC__
102int
103rep_add(register char *str)
104#else
105int
106rep_add(str)
107	register char *str;
108#endif
109{
110	register char *pt1;
111	register char *pt2;
112	register REPLACE *rep;
113#	ifndef NET2_REGEX
114	register int res;
115	char rebuf[BUFSIZ];
116#	endif
117
118	/*
119	 * throw out the bad parameters
120	 */
121	if ((str == NULL) || (*str == '\0')) {
122		pax_warn(1, "Empty replacement string");
123		return(-1);
124	}
125
126	/*
127	 * first character in the string specifies what the delimiter is for
128	 * this expression
129	 */
130	if ((pt1 = strchr(str+1, *str)) == NULL) {
131		pax_warn(1, "Invalid replacement string %s", str);
132		return(-1);
133	}
134
135	/*
136	 * allocate space for the node that handles this replacement pattern
137	 * and split out the regular expression and try to compile it
138	 */
139	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
140		pax_warn(1, "Unable to allocate memory for replacement string");
141		return(-1);
142	}
143
144	*pt1 = '\0';
145#	ifdef NET2_REGEX
146	if ((rep->rcmp = regcomp(str+1)) == NULL) {
147#	else
148	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
149		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
150		pax_warn(1, "%s while compiling regular expression %s", rebuf, str);
151#	endif
152		(void)free((char *)rep);
153		return(-1);
154	}
155
156	/*
157	 * put the delimiter back in case we need an error message and
158	 * locate the delimiter at the end of the replacement string
159	 * we then point the node at the new substitution string
160	 */
161	*pt1++ = *str;
162	if ((pt2 = strchr(pt1, *str)) == NULL) {
163#		ifdef NET2_REGEX
164		(void)free((char *)rep->rcmp);
165#		else
166		regfree(&(rep->rcmp));
167#		endif
168		(void)free((char *)rep);
169		pax_warn(1, "Invalid replacement string %s", str);
170		return(-1);
171	}
172
173	*pt2 = '\0';
174	rep->nstr = pt1;
175	pt1 = pt2++;
176	rep->flgs = 0;
177
178	/*
179	 * set the options if any
180	 */
181	while (*pt2 != '\0') {
182		switch(*pt2) {
183		case 'g':
184		case 'G':
185			rep->flgs  |= GLOB;
186			break;
187		case 'p':
188		case 'P':
189			rep->flgs  |= PRNT;
190			break;
191		default:
192#			ifdef NET2_REGEX
193			(void)free((char *)rep->rcmp);
194#			else
195			regfree(&(rep->rcmp));
196#			endif
197			(void)free((char *)rep);
198			*pt1 = *str;
199			pax_warn(1, "Invalid replacement string option %s", str);
200			return(-1);
201		}
202		++pt2;
203	}
204
205	/*
206	 * all done, link it in at the end
207	 */
208	rep->fow = NULL;
209	if (rephead == NULL) {
210		reptail = rephead = rep;
211		return(0);
212	}
213	reptail->fow = rep;
214	reptail = rep;
215	return(0);
216}
217
218/*
219 * pat_add()
220 *	add a pattern match to the pattern match list. Pattern matches are used
221 *	to select which archive members are extracted. (They appear as
222 *	arguments to pax in the list and read modes). If no patterns are
223 *	supplied to pax, all members in the archive will be selected (and the
224 *	pattern match list is empty).
225 * Return:
226 *	0 if the pattern was added to the list, -1 otherwise
227 */
228
229#if __STDC__
230int
231pat_add(char *str)
232#else
233int
234pat_add(str)
235	char *str;
236#endif
237{
238	register PATTERN *pt;
239
240	/*
241	 * throw out the junk
242	 */
243	if ((str == NULL) || (*str == '\0')) {
244		pax_warn(1, "Empty pattern string");
245		return(-1);
246	}
247
248	/*
249	 * allocate space for the pattern and store the pattern. the pattern is
250	 * part of argv so do not bother to copy it, just point at it. Add the
251	 * node to the end of the pattern list
252	 */
253	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
254		pax_warn(1, "Unable to allocate memory for pattern string");
255		return(-1);
256	}
257
258	pt->pstr = str;
259	pt->pend = NULL;
260	pt->plen = strlen(str);
261	pt->fow = NULL;
262	pt->flgs = 0;
263	if (pathead == NULL) {
264		pattail = pathead = pt;
265		return(0);
266	}
267	pattail->fow = pt;
268	pattail = pt;
269	return(0);
270}
271
272/*
273 * pat_chk()
274 *	complain if any the user supplied pattern did not result in a match to
275 *	a selected archive member.
276 */
277
278#if __STDC__
279void
280pat_chk(void)
281#else
282void
283pat_chk()
284#endif
285{
286	register PATTERN *pt;
287	register int wban = 0;
288
289	/*
290	 * walk down the list checking the flags to make sure MTCH was set,
291	 * if not complain
292	 */
293	for (pt = pathead; pt != NULL; pt = pt->fow) {
294		if (pt->flgs & MTCH)
295			continue;
296		if (!wban) {
297			pax_warn(1, "WARNING! These patterns were not matched:");
298			++wban;
299		}
300		(void)fprintf(stderr, "%s\n", pt->pstr);
301	}
302}
303
304/*
305 * pat_sel()
306 *	the archive member which matches a pattern was selected. Mark the
307 *	pattern as having selected an archive member. arcn->pat points at the
308 *	pattern that was matched. arcn->pat is set in pat_match()
309 *
310 *	NOTE: When the -c option is used, we are called when there was no match
311 *	by pat_match() (that means we did match before the inverted sense of
312 *	the logic). Now this seems really strange at first, but with -c  we
313 *	need to keep track of those patterns that cause a archive member to NOT
314 *	be selected (it found an archive member with a specified pattern)
315 * Return:
316 *	0 if the pattern pointed at by arcn->pat was tagged as creating a
317 *	match, -1 otherwise.
318 */
319
320#if __STDC__
321int
322pat_sel(register ARCHD *arcn)
323#else
324int
325pat_sel(arcn)
326	register ARCHD *arcn;
327#endif
328{
329	register PATTERN *pt;
330	register PATTERN **ppt;
331	register int len;
332
333	/*
334	 * if no patterns just return
335	 */
336	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
337		return(0);
338
339	/*
340	 * when we are NOT limited to a single match per pattern mark the
341	 * pattern and return
342	 */
343	if (!nflag) {
344		pt->flgs |= MTCH;
345		return(0);
346	}
347
348	/*
349	 * we reach this point only when we allow a single selected match per
350	 * pattern, if the pattern matches a directory and we do not have -d
351	 * (dflag) we are done with this pattern. We may also be handed a file
352	 * in the subtree of a directory. in that case when we are operating
353	 * with -d, this pattern was already selected and we are done
354	 */
355	if (pt->flgs & DIR_MTCH)
356		return(0);
357
358	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
359		/*
360		 * ok we matched a directory and we are allowing
361		 * subtree matches but because of the -n only its children will
362		 * match. This is tagged as a DIR_MTCH type.
363		 * WATCH IT, the code assumes that pt->pend points
364		 * into arcn->name and arcn->name has not been modified.
365		 * If not we will have a big mess. Yup this is another kludge
366		 */
367
368		/*
369		 * if this was a prefix match, remove trailing part of path
370		 * so we can copy it. Future matches will be exact prefix match
371		 */
372		if (pt->pend != NULL)
373			*pt->pend = '\0';
374
375		if ((pt->pstr = strdup(arcn->name)) == NULL) {
376			pax_warn(1, "Pattern select out of memory");
377			if (pt->pend != NULL)
378				*pt->pend = '/';
379			pt->pend = NULL;
380			return(-1);
381		}
382
383		/*
384		 * put the trailing / back in the source string
385		 */
386		if (pt->pend != NULL) {
387			*pt->pend = '/';
388			pt->pend = NULL;
389		}
390		pt->plen = strlen(pt->pstr);
391
392		/*
393		 * strip off any trailing /, this should really never happen
394		 */
395		len = pt->plen - 1;
396		if (*(pt->pstr + len) == '/') {
397			*(pt->pstr + len) = '\0';
398			pt->plen = len;
399		}
400		pt->flgs = DIR_MTCH | MTCH;
401		arcn->pat = pt;
402		return(0);
403	}
404
405	/*
406	 * we are then done with this pattern, so we delete it from the list
407	 * because it can never be used for another match.
408	 * Seems kind of strange to do for a -c, but the pax spec is really
409	 * vague on the interaction of -c -n and -d. We assume that when -c
410	 * and the pattern rejects a member (i.e. it matched it) it is done.
411	 * In effect we place the order of the flags as having -c last.
412	 */
413	pt = pathead;
414	ppt = &pathead;
415	while ((pt != NULL) && (pt != arcn->pat)) {
416		ppt = &(pt->fow);
417		pt = pt->fow;
418	}
419
420	if (pt == NULL) {
421		/*
422		 * should never happen....
423		 */
424		pax_warn(1, "Pattern list inconsistant");
425		return(-1);
426	}
427	*ppt = pt->fow;
428	(void)free((char *)pt);
429	arcn->pat = NULL;
430	return(0);
431}
432
433/*
434 * pat_match()
435 *	see if this archive member matches any supplied pattern, if a match
436 *	is found, arcn->pat is set to point at the potential pattern. Later if
437 *	this archive member is "selected" we process and mark the pattern as
438 *	one which matched a selected archive member (see pat_sel())
439 * Return:
440 *	0 if this archive member should be processed, 1 if it should be
441 *	skipped and -1 if we are done with all patterns (and pax should quit
442 *	looking for more members)
443 */
444
445#if __STDC__
446int
447pat_match(register ARCHD *arcn)
448#else
449int
450pat_match(arcn)
451	register ARCHD *arcn;
452#endif
453{
454	register PATTERN *pt;
455
456	arcn->pat = NULL;
457
458	/*
459	 * if there are no more patterns and we have -n (and not -c) we are
460	 * done. otherwise with no patterns to match, matches all
461	 */
462	if (pathead == NULL) {
463		if (nflag && !cflag)
464			return(-1);
465		return(0);
466	}
467
468	/*
469	 * have to search down the list one at a time looking for a match.
470	 */
471	pt = pathead;
472	while (pt != NULL) {
473		/*
474		 * check for a file name match unless we have DIR_MTCH set in
475		 * this pattern then we want a prefix match
476		 */
477		if (pt->flgs & DIR_MTCH) {
478			/*
479			 * this pattern was matched before to a directory
480			 * as we must have -n set for this (but not -d). We can
481			 * only match CHILDREN of that directory so we must use
482			 * an exact prefix match (no wildcards).
483			 */
484			if ((arcn->name[pt->plen] == '/') &&
485			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
486				break;
487		} else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
488			break;
489		pt = pt->fow;
490	}
491
492	/*
493	 * return the result, remember that cflag (-c) inverts the sense of a
494	 * match
495	 */
496	if (pt == NULL)
497		return(cflag ? 0 : 1);
498
499	/*
500	 * we had a match, now when we invert the sense (-c) we reject this
501	 * member. However we have to tag the pattern a being successful, (in a
502	 * match, not in selecting a archive member) so we call pat_sel() here.
503	 */
504	arcn->pat = pt;
505	if (!cflag)
506		return(0);
507
508	if (pat_sel(arcn) < 0)
509		return(-1);
510	arcn->pat = NULL;
511	return(1);
512}
513
514/*
515 * fn_match()
516 * Return:
517 *	0 if this archive member should be processed, 1 if it should be
518 *	skipped and -1 if we are done with all patterns (and pax should quit
519 *	looking for more members)
520 *	Note: *pend may be changed to show where the prefix ends.
521 */
522
523#if __STDC__
524static int
525fn_match(register char *pattern, register char *string, char **pend)
526#else
527static int
528fn_match(pattern, string, pend)
529	register char *pattern;
530	register char *string;
531	char **pend;
532#endif
533{
534	register char c;
535	char test;
536
537	*pend = NULL;
538	for (;;) {
539		switch (c = *pattern++) {
540		case '\0':
541			/*
542			 * Ok we found an exact match
543			 */
544			if (*string == '\0')
545				return(0);
546
547			/*
548			 * Check if it is a prefix match
549			 */
550			if ((dflag == 1) || (*string != '/'))
551				return(-1);
552
553			/*
554			 * It is a prefix match, remember where the trailing
555			 * / is located
556			 */
557			*pend = string;
558			return(0);
559		case '?':
560			if ((test = *string++) == '\0')
561				return (-1);
562			break;
563		case '*':
564			c = *pattern;
565			/*
566			 * Collapse multiple *'s.
567			 */
568			while (c == '*')
569				c = *++pattern;
570
571			/*
572			 * Optimized hack for pattern with a * at the end
573			 */
574			if (c == '\0')
575				return (0);
576
577			/*
578			 * General case, use recursion.
579			 */
580			while ((test = *string) != '\0') {
581				if (!fn_match(pattern, string, pend))
582					return (0);
583				++string;
584			}
585			return (-1);
586		case '[':
587			/*
588			 * range match
589			 */
590			if (((test = *string++) == '\0') ||
591			    ((pattern = range_match(pattern, test)) == NULL))
592				return (-1);
593			break;
594		case '\\':
595		default:
596			if (c != *string++)
597				return (-1);
598			break;
599		}
600	}
601	/* NOTREACHED */
602}
603
604#ifdef __STDC__
605static char *
606range_match(register char *pattern, register int test)
607#else
608static char *
609range_match(pattern, test)
610	register char *pattern;
611	register int test;
612#endif
613{
614	register char c;
615	register char c2;
616	int negate;
617	int ok = 0;
618
619	if ((negate = (*pattern == '!')))
620		++pattern;
621
622	while ((c = *pattern++) != ']') {
623		/*
624		 * Illegal pattern
625		 */
626		if (c == '\0')
627			return (NULL);
628
629		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
630		    (c2 != ']')) {
631			if ((c <= test) && (test <= c2))
632				ok = 1;
633			pattern += 2;
634		} else if (c == test)
635			ok = 1;
636	}
637	return (ok == negate ? NULL : pattern);
638}
639
640/*
641 * mod_name()
642 *	modify a selected file name. first attempt to apply replacement string
643 *	expressions, then apply interactive file rename. We apply replacement
644 *	string expressions to both filenames and file links (if we didn't the
645 *	links would point to the wrong place, and we could never be able to
646 *	move an archive that has a file link in it). When we rename files
647 *	interactively, we store that mapping (old name to user input name) so
648 *	if we spot any file links to the old file name in the future, we will
649 *	know exactly how to fix the file link.
650 * Return:
651 *	0 continue to  process file, 1 skip this file, -1 pax is finished
652 */
653
654#if __STDC__
655int
656mod_name(register ARCHD *arcn)
657#else
658int
659mod_name(arcn)
660	register ARCHD *arcn;
661#endif
662{
663	register int res = 0;
664
665	/*
666	 * IMPORTANT: We have a problem. what do we do with symlinks?
667	 * Modifying a hard link name makes sense, as we know the file it
668	 * points at should have been seen already in the archive (and if it
669	 * wasn't seen because of a read error or a bad archive, we lose
670	 * anyway). But there are no such requirements for symlinks. On one
671	 * hand the symlink that refers to a file in the archive will have to
672	 * be modified to so it will still work at its new location in the
673	 * file system. On the other hand a symlink that points elsewhere (and
674	 * should continue to do so) should not be modified. There is clearly
675	 * no perfect solution here. So we handle them like hardlinks. Clearly
676	 * a replacement made by the interactive rename mapping is very likely
677	 * to be correct since it applies to a single file and is an exact
678	 * match. The regular expression replacements are a little harder to
679	 * justify though. We claim that the symlink name is only likely
680	 * to be replaced when it points within the file tree being moved and
681	 * in that case it should be modified. what we really need to do is to
682	 * call an oracle here. :)
683	 */
684	if (rephead != NULL) {
685		/*
686		 * we have replacement strings, modify the name and the link
687		 * name if any.
688		 */
689		if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
690			return(res);
691
692		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
693		    (arcn->type == PAX_HRG)) &&
694		    ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
695			return(res);
696	}
697
698	if (iflag) {
699		/*
700		 * perform interactive file rename, then map the link if any
701		 */
702		if ((res = tty_rename(arcn)) != 0)
703			return(res);
704		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
705		    (arcn->type == PAX_HRG))
706			sub_name(arcn->ln_name, &(arcn->ln_nlen));
707	}
708	return(res);
709}
710
711/*
712 * tty_rename()
713 *	Prompt the user for a replacement file name. A "." keeps the old name,
714 *	a empty line skips the file, and an EOF on reading the tty, will cause
715 *	pax to stop processing and exit. Otherwise the file name input, replaces
716 *	the old one.
717 * Return:
718 *	0 process this file, 1 skip this file, -1 we need to exit pax
719 */
720
721#if __STDC__
722static int
723tty_rename(register ARCHD *arcn)
724#else
725static int
726tty_rename(arcn)
727	register ARCHD *arcn;
728#endif
729{
730	char tmpname[PAXPATHLEN+2];
731	int res;
732
733	/*
734	 * prompt user for the replacement name for a file, keep trying until
735	 * we get some reasonable input. Archives may have more than one file
736	 * on them with the same name (from updates etc). We print verbose info
737	 * on the file so the user knows what is up.
738	 */
739	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
740
741	for (;;) {
742		ls_tty(arcn);
743		tty_prnt("Input new name, or a \".\" to keep the old name, ");
744		tty_prnt("or a \"return\" to skip this file.\n");
745		tty_prnt("Input > ");
746		if (tty_read(tmpname, sizeof(tmpname)) < 0)
747			return(-1);
748		if (strcmp(tmpname, "..") == 0) {
749			tty_prnt("Try again, illegal file name: ..\n");
750			continue;
751		}
752		if (strlen(tmpname) > PAXPATHLEN) {
753			tty_prnt("Try again, file name too long\n");
754			continue;
755		}
756		break;
757	}
758
759	/*
760	 * empty file name, skips this file. a "." leaves it alone
761	 */
762	if (tmpname[0] == '\0') {
763		tty_prnt("Skipping file.\n");
764		return(1);
765	}
766	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
767		tty_prnt("Processing continues, name unchanged.\n");
768		return(0);
769	}
770
771	/*
772	 * ok the name changed. We may run into links that point at this
773	 * file later. we have to remember where the user sent the file
774	 * in order to repair any links.
775	 */
776	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
777	res = add_name(arcn->name, arcn->nlen, tmpname);
778	arcn->nlen = l_strncpy(arcn->name, tmpname, PAXPATHLEN+1);
779	arcn->name[PAXPATHLEN] = '\0';
780	if (res < 0)
781		return(-1);
782	return(0);
783}
784
785/*
786 * set_dest()
787 *	fix up the file name and the link name (if any) so this file will land
788 *	in the destination directory (used during copy() -rw).
789 * Return:
790 *	0 if ok, -1 if failure (name too long)
791 */
792
793#if __STDC__
794int
795set_dest(register ARCHD *arcn, char *dest_dir, int dir_len)
796#else
797int
798set_dest(arcn, dest_dir, dir_len)
799	register ARCHD *arcn;
800	char *dest_dir;
801	int dir_len;
802#endif
803{
804	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
805		return(-1);
806
807	/*
808	 * It is really hard to deal with symlinks here, we cannot be sure
809	 * if the name they point was moved (or will be moved). It is best to
810	 * leave them alone.
811	 */
812	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
813		return(0);
814
815	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
816		return(-1);
817	return(0);
818}
819
820/*
821 * fix_path
822 *	concatenate dir_name and or_name and store the result in or_name (if
823 *	it fits). This is one ugly function.
824 * Return:
825 *	0 if ok, -1 if the final name is too long
826 */
827
828#if __STDC__
829static int
830fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
831#else
832static int
833fix_path(or_name, or_len, dir_name, dir_len)
834	char *or_name;
835	int *or_len;
836	char *dir_name;
837	int dir_len;
838#endif
839{
840	register char *src;
841	register char *dest;
842	register char *start;
843	int len;
844
845	/*
846	 * we shift the or_name to the right enough to tack in the dir_name
847	 * at the front. We make sure we have enough space for it all before
848	 * we start. since dest always ends in a slash, we skip of or_name
849	 * if it also starts with one.
850	 */
851	start = or_name;
852	src = start + *or_len;
853	dest = src + dir_len;
854	if (*start == '/') {
855		++start;
856		--dest;
857	}
858	if ((len = dest - or_name) > PAXPATHLEN) {
859		pax_warn(1, "File name %s/%s, too long", dir_name, start);
860		return(-1);
861	}
862	*or_len = len;
863
864	/*
865	 * enough space, shift
866	 */
867	while (src >= start)
868		*dest-- = *src--;
869	src = dir_name + dir_len - 1;
870
871	/*
872	 * splice in the destination directory name
873	 */
874	while (src >= dir_name)
875		*dest-- = *src--;
876
877	*(or_name + len) = '\0';
878	return(0);
879}
880
881/*
882 * rep_name()
883 *	walk down the list of replacement strings applying each one in order.
884 *	when we find one with a successful substitution, we modify the name
885 *	as specified. if required, we print the results. if the resulting name
886 *	is empty, we will skip this archive member. We use the regexp(3)
887 *	routines (regexp() ought to win a prize as having the most cryptic
888 *	library function manual page).
889 *	--Parameters--
890 *	name is the file name we are going to apply the regular expressions to
891 *	(and may be modified)
892 *	nlen is the length of this name (and is modified to hold the length of
893 *	the final string).
894 *	prnt is a flag that says whether to print the final result.
895 * Return:
896 *	0 if substitution was successful, 1 if we are to skip the file (the name
897 *	ended up empty)
898 */
899
900#if __STDC__
901static int
902rep_name(char *name, int *nlen, int prnt)
903#else
904static int
905rep_name(name, nlen, prnt)
906	char *name;
907	int *nlen;
908	int prnt;
909#endif
910{
911	register REPLACE *pt;
912	register char *inpt;
913	register char *outpt;
914	register char *endpt;
915	register char *rpt;
916	register int found = 0;
917	register int res;
918#	ifndef NET2_REGEX
919	regmatch_t pm[MAXSUBEXP];
920#	endif
921	char nname[PAXPATHLEN+1];	/* final result of all replacements */
922	char buf1[PAXPATHLEN+1];	/* where we work on the name */
923
924	/*
925	 * copy the name into buf1, where we will work on it. We need to keep
926	 * the orig string around so we can print out the result of the final
927	 * replacement. We build up the final result in nname. inpt points at
928	 * the string we apply the regular expression to. prnt is used to
929	 * suppress printing when we handle replacements on the link field
930	 * (the user already saw that substitution go by)
931	 */
932	pt = rephead;
933	(void)strcpy(buf1, name);
934	inpt = buf1;
935	outpt = nname;
936	endpt = outpt + PAXPATHLEN;
937
938	/*
939	 * try each replacement string in order
940	 */
941	while (pt != NULL) {
942		do {
943			/*
944			 * check for a successful substitution, if not go to
945			 * the next pattern, or cleanup if we were global
946			 */
947#			ifdef NET2_REGEX
948			if (regexec(pt->rcmp, inpt) == 0)
949#			else
950			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
951#			endif
952				break;
953
954			/*
955			 * ok we found one. We have three parts, the prefix
956			 * which did not match, the section that did and the
957			 * tail (that also did not match). Copy the prefix to
958			 * the final output buffer (watching to make sure we
959			 * do not create a string too long).
960			 */
961			found = 1;
962#			ifdef NET2_REGEX
963			rpt = pt->rcmp->startp[0];
964#			else
965			rpt = inpt + pm[0].rm_so;
966#			endif
967
968			while ((inpt < rpt) && (outpt < endpt))
969				*outpt++ = *inpt++;
970			if (outpt == endpt)
971				break;
972
973			/*
974			 * for the second part (which matched the regular
975			 * expression) apply the substitution using the
976			 * replacement string and place it the prefix in the
977			 * final output. If we have problems, skip it.
978			 */
979#			ifdef NET2_REGEX
980			if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
981#			else
982			if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt))
983			    < 0) {
984#			endif
985				if (prnt)
986					pax_warn(1, "Replacement name error %s",
987					    name);
988				return(1);
989			}
990			outpt += res;
991
992			/*
993			 * we set up to look again starting at the first
994			 * character in the tail (of the input string right
995			 * after the last character matched by the regular
996			 * expression (inpt always points at the first char in
997			 * the string to process). If we are not doing a global
998			 * substitution, we will use inpt to copy the tail to
999			 * the final result. Make sure we do not overrun the
1000			 * output buffer
1001			 */
1002#			ifdef NET2_REGEX
1003			inpt = pt->rcmp->endp[0];
1004#			else
1005			inpt += pm[0].rm_eo;
1006#			endif
1007
1008			if ((outpt == endpt) || (*inpt == '\0'))
1009				break;
1010
1011			/*
1012			 * if the user wants global we keep trying to
1013			 * substitute until it fails, then we are done.
1014			 */
1015		} while (pt->flgs & GLOB);
1016
1017		if (found)
1018			break;
1019
1020		/*
1021		 * a successful substitution did NOT occur, try the next one
1022		 */
1023		pt = pt->fow;
1024	}
1025
1026	if (found) {
1027		/*
1028		 * we had a substitution, copy the last tail piece (if there is
1029		 * room) to the final result
1030		 */
1031		while ((outpt < endpt) && (*inpt != '\0'))
1032			*outpt++ = *inpt++;
1033
1034		*outpt = '\0';
1035		if ((outpt == endpt) && (*inpt != '\0')) {
1036			if (prnt)
1037				pax_warn(1,"Replacement name too long %s >> %s",
1038				    name, nname);
1039			return(1);
1040		}
1041
1042		/*
1043		 * inform the user of the result if wanted
1044		 */
1045		if (prnt && (pt->flgs & PRNT)) {
1046			if (*nname == '\0')
1047				(void)fprintf(stderr,"%s >> <empty string>\n",
1048				    name);
1049			else
1050				(void)fprintf(stderr,"%s >> %s\n", name, nname);
1051		}
1052
1053		/*
1054		 * if empty inform the caller this file is to be skipped
1055		 * otherwise copy the new name over the orig name and return
1056		 */
1057		if (*nname == '\0')
1058			return(1);
1059		*nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1060		name[PAXPATHLEN] = '\0';
1061	}
1062	return(0);
1063}
1064
1065#ifdef NET2_REGEX
1066/*
1067 * resub()
1068 *	apply the replacement to the matched expression. expand out the old
1069 * 	style ed(1) subexpression expansion.
1070 * Return:
1071 *	-1 if error, or the number of characters added to the destination.
1072 */
1073
1074#if __STDC__
1075static int
1076resub(regexp *prog, char *src, char *dest, register char *destend)
1077#else
1078static int
1079resub(prog, src, dest, destend)
1080	regexp *prog;
1081	char *src;
1082	char *dest;
1083	register char *destend;
1084#endif
1085{
1086	register char *spt;
1087	register char *dpt;
1088	register char c;
1089	register int no;
1090	register int len;
1091
1092	spt = src;
1093	dpt = dest;
1094	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1095		if (c == '&')
1096			no = 0;
1097		else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1098			no = *spt++ - '0';
1099		else {
1100 			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1101 				c = *spt++;
1102 			*dpt++ = c;
1103			continue;
1104		}
1105 		if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1106		    ((len = prog->endp[no] - prog->startp[no]) <= 0))
1107			continue;
1108
1109		/*
1110		 * copy the subexpression to the destination.
1111		 * fail if we run out of space or the match string is damaged
1112		 */
1113		if (len > (destend - dpt))
1114			len = destend - dpt;
1115		if (l_strncpy(dpt, prog->startp[no], len) != len)
1116			return(-1);
1117		dpt += len;
1118	}
1119	return(dpt - dest);
1120}
1121
1122#else
1123
1124/*
1125 * resub()
1126 *	apply the replacement to the matched expression. expand out the old
1127 * 	style ed(1) subexpression expansion.
1128 * Return:
1129 *	-1 if error, or the number of characters added to the destination.
1130 */
1131
1132#if __STDC__
1133static int
1134resub(regex_t *rp, register regmatch_t *pm, char *src, char *dest,
1135	register char *destend)
1136#else
1137static int
1138resub(rp, pm, src, dest, destend)
1139	regex_t *rp;
1140	register regmatch_t *pm;
1141	char *src;
1142	char *dest;
1143	register char *destend;
1144#endif
1145{
1146	register char *spt;
1147	register char *dpt;
1148	register char c;
1149	register regmatch_t *pmpt;
1150	register int len;
1151	int subexcnt;
1152
1153	spt =  src;
1154	dpt = dest;
1155	subexcnt = rp->re_nsub;
1156	while ((dpt < destend) && ((c = *spt++) != '\0')) {
1157		/*
1158		 * see if we just have an ordinary replacement character
1159		 * or we refer to a subexpression.
1160		 */
1161		if (c == '&') {
1162			pmpt = pm;
1163		} else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1164			/*
1165			 * make sure there is a subexpression as specified
1166			 */
1167			if ((len = *spt++ - '0') > subexcnt)
1168				return(-1);
1169			pmpt = pm + len;
1170		} else {
1171 			/*
1172			 * Ordinary character, just copy it
1173			 */
1174 			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1175 				c = *spt++;
1176 			*dpt++ = c;
1177			continue;
1178		}
1179
1180		/*
1181		 * continue if the subexpression is bogus
1182		 */
1183		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1184		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1185			continue;
1186
1187		/*
1188		 * copy the subexpression to the destination.
1189		 * fail if we run out of space or the match string is damaged
1190		 */
1191		if (len > (destend - dpt))
1192			len = destend - dpt;
1193		if (l_strncpy(dpt, src + pmpt->rm_so, len) != len)
1194			return(-1);
1195		dpt += len;
1196	}
1197	return(dpt - dest);
1198}
1199#endif
1200