process.c revision 80286
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#ifndef lint
39#if 0
40static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
41#endif
42static const char rcsid[] =
43  "$FreeBSD: head/usr.bin/sed/process.c 80286 2001-07-24 14:05:21Z obrien $";
44#endif /* not lint */
45
46#include <sys/types.h>
47#include <sys/stat.h>
48#include <sys/ioctl.h>
49#include <sys/uio.h>
50
51#include <ctype.h>
52#include <err.h>
53#include <errno.h>
54#include <fcntl.h>
55#include <limits.h>
56#include <regex.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <unistd.h>
61
62#include "defs.h"
63#include "extern.h"
64
65static SPACE HS, PS, SS;
66#define	pd		PS.deleted
67#define	ps		PS.space
68#define	psl		PS.len
69#define	hs		HS.space
70#define	hsl		HS.len
71
72static inline int	 applies __P((struct s_command *));
73static void		 flush_appends __P((void));
74static void		 lputs __P((char *));
75static inline int	 regexec_e __P((regex_t *, const char *, int, int, size_t));
76static void		 regsub __P((SPACE *, char *, char *));
77static int		 substitute __P((struct s_command *));
78
79struct s_appends *appends;	/* Array of pointers to strings to append. */
80static int appendx;		/* Index into appends array. */
81int appendnum;			/* Size of appends array. */
82
83static int lastaddr;		/* Set by applies if last address of a range. */
84static int sdone;		/* If any substitutes since last line input. */
85				/* Iov structure for 'w' commands. */
86static regex_t *defpreg;
87size_t maxnsub;
88regmatch_t *match;
89
90#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
91
92void
93process()
94{
95	struct s_command *cp;
96	SPACE tspace;
97	size_t len, oldpsl = 0;
98	char *p;
99
100	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101		pd = 0;
102top:
103		cp = prog;
104redirect:
105		while (cp != NULL) {
106			if (!applies(cp)) {
107				cp = cp->next;
108				continue;
109			}
110			switch (cp->code) {
111			case '{':
112				cp = cp->u.c;
113				goto redirect;
114			case 'a':
115				if (appendx >= appendnum)
116					if ((appends = realloc(appends,
117					    sizeof(struct s_appends) *
118					    (appendnum *= 2))) == NULL)
119						err(1, "realloc");
120				appends[appendx].type = AP_STRING;
121				appends[appendx].s = cp->t;
122				appends[appendx].len = strlen(cp->t);
123				appendx++;
124				break;
125			case 'b':
126				cp = cp->u.c;
127				goto redirect;
128			case 'c':
129				pd = 1;
130				psl = 0;
131				if (cp->a2 == NULL || lastaddr)
132					(void)printf("%s", cp->t);
133				break;
134			case 'd':
135				pd = 1;
136				goto new;
137			case 'D':
138				if (pd)
139					goto new;
140				if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
141					pd = 1;
142					goto new;
143				} else {
144					psl -= (p + 1) - ps;
145					memmove(ps, p + 1, psl);
146					goto top;
147				}
148			case 'g':
149				cspace(&PS, hs, hsl, REPLACE);
150				break;
151			case 'G':
152				cspace(&PS, hs, hsl, 0);
153				break;
154			case 'h':
155				cspace(&HS, ps, psl, REPLACE);
156				break;
157			case 'H':
158				cspace(&HS, ps, psl, 0);
159				break;
160			case 'i':
161				(void)printf("%s", cp->t);
162				break;
163			case 'l':
164				lputs(ps);
165				break;
166			case 'n':
167				if (!nflag && !pd)
168					OUT(ps)
169				flush_appends();
170				if (!mf_fgets(&PS, REPLACE))
171					exit(0);
172				pd = 0;
173				break;
174			case 'N':
175				flush_appends();
176				if (!mf_fgets(&PS, 0)) {
177					if (!nflag && !pd)
178						OUT(ps)
179					exit(0);
180				}
181				break;
182			case 'p':
183				if (pd)
184					break;
185				OUT(ps)
186				break;
187			case 'P':
188				if (pd)
189					break;
190				if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
191					oldpsl = psl;
192					psl = (p + 1) - ps;
193				}
194				OUT(ps)
195				if (p != NULL)
196					psl = oldpsl;
197				break;
198			case 'q':
199				if (!nflag && !pd)
200					OUT(ps)
201				flush_appends();
202				exit(0);
203			case 'r':
204				if (appendx >= appendnum)
205					if ((appends = realloc(appends,
206					    sizeof(struct s_appends) *
207					    (appendnum *= 2))) == NULL)
208						err(1, "realloc");
209				appends[appendx].type = AP_FILE;
210				appends[appendx].s = cp->t;
211				appends[appendx].len = strlen(cp->t);
212				appendx++;
213				break;
214			case 's':
215				sdone |= substitute(cp);
216				break;
217			case 't':
218				if (sdone) {
219					sdone = 0;
220					cp = cp->u.c;
221					goto redirect;
222				}
223				break;
224			case 'w':
225				if (pd)
226					break;
227				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
228				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
229				    DEFFILEMODE)) == -1)
230					err(1, "%s", cp->t);
231				if (write(cp->u.fd, ps, psl) != psl)
232					err(1, "%s", cp->t);
233				break;
234			case 'x':
235				if (hs == NULL)
236					cspace(&HS, "\n", 1, REPLACE);
237				tspace = PS;
238				PS = HS;
239				HS = tspace;
240				break;
241			case 'y':
242				if (pd)
243					break;
244				for (p = ps, len = psl; --len; ++p)
245					*p = cp->u.y[(unsigned char)*p];
246				break;
247			case ':':
248			case '}':
249				break;
250			case '=':
251				(void)printf("%lu\n", linenum);
252			}
253			cp = cp->next;
254		} /* for all cp */
255
256new:		if (!nflag && !pd)
257			OUT(ps)
258		flush_appends();
259	} /* for all lines */
260}
261
262/*
263 * TRUE if the address passed matches the current program state
264 * (lastline, linenumber, ps).
265 */
266#define	MATCH(a)						\
267	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
268	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
269
270/*
271 * Return TRUE if the command applies to the current line.  Sets the inrange
272 * flag to process ranges.  Interprets the non-select (``!'') flag.
273 */
274static inline int
275applies(cp)
276	struct s_command *cp;
277{
278	int r;
279
280	lastaddr = 0;
281	if (cp->a1 == NULL && cp->a2 == NULL)
282		r = 1;
283	else if (cp->a2)
284		if (cp->inrange) {
285			if (MATCH(cp->a2)) {
286				cp->inrange = 0;
287				lastaddr = 1;
288			}
289			r = 1;
290		} else if (MATCH(cp->a1)) {
291			/*
292			 * If the second address is a number less than or
293			 * equal to the line number first selected, only
294			 * one line shall be selected.
295			 *	-- POSIX 1003.2
296			 */
297			if (cp->a2->type == AT_LINE &&
298			    linenum >= cp->a2->u.l)
299				lastaddr = 1;
300			else
301				cp->inrange = 1;
302			r = 1;
303		} else
304			r = 0;
305	else
306		r = MATCH(cp->a1);
307	return (cp->nonsel ? ! r : r);
308}
309
310/*
311 * substitute --
312 *	Do substitutions in the pattern space.  Currently, we build a
313 *	copy of the new pattern space in the substitute space structure
314 *	and then swap them.
315 */
316static int
317substitute(cp)
318	struct s_command *cp;
319{
320	SPACE tspace;
321	regex_t *re;
322	size_t re_off, slen;
323	int lastempty, n;
324	char *s;
325
326	s = ps;
327	re = cp->u.s->re;
328	if (re == NULL) {
329		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
330			linenum = cp->u.s->linenum;
331			errx(1, "%lu: %s: \\%d not defined in the RE",
332					linenum, fname, cp->u.s->maxbref);
333		}
334	}
335	if (!regexec_e(re, s, 0, 0, psl))
336		return (0);
337
338  	SS.len = 0;				/* Clean substitute space. */
339  	slen = psl;
340  	n = cp->u.s->n;
341	lastempty = 1;
342
343  	switch (n) {
344  	case 0:					/* Global */
345  		do {
346			if (lastempty || match[0].rm_so != match[0].rm_eo) {
347				/* Locate start of replaced string. */
348				re_off = match[0].rm_so;
349				/* Copy leading retained string. */
350				cspace(&SS, s, re_off, APPEND);
351				/* Add in regular expression. */
352				regsub(&SS, s, cp->u.s->new);
353			}
354
355			/* Move past this match. */
356			if (match[0].rm_so != match[0].rm_eo) {
357				s += match[0].rm_eo;
358				slen -= match[0].rm_eo;
359				lastempty = 0;
360			} else {
361				if (match[0].rm_so == 0)
362					cspace(&SS, s, match[0].rm_so + 1,
363					    APPEND);
364				else
365					cspace(&SS, s + match[0].rm_so, 1,
366					    APPEND);
367				s += match[0].rm_so + 1;
368				slen -= match[0].rm_so + 1;
369				lastempty = 1;
370			}
371		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
372		/* Copy trailing retained string. */
373		if (slen > 0)
374			cspace(&SS, s, slen, APPEND);
375  		break;
376	default:				/* Nth occurrence */
377		while (--n) {
378			s += match[0].rm_eo;
379			slen -= match[0].rm_eo;
380			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
381				return (0);
382		}
383		/* FALLTHROUGH */
384	case 1:					/* 1st occurrence */
385		/* Locate start of replaced string. */
386		re_off = match[0].rm_so + (s - ps);
387		/* Copy leading retained string. */
388		cspace(&SS, ps, re_off, APPEND);
389		/* Add in regular expression. */
390		regsub(&SS, s, cp->u.s->new);
391		/* Copy trailing retained string. */
392		s += match[0].rm_eo;
393		slen -= match[0].rm_eo;
394		cspace(&SS, s, slen, APPEND);
395		break;
396	}
397
398	/*
399	 * Swap the substitute space and the pattern space, and make sure
400	 * that any leftover pointers into stdio memory get lost.
401	 */
402	tspace = PS;
403	PS = SS;
404	SS = tspace;
405	SS.space = SS.back;
406
407	/* Handle the 'p' flag. */
408	if (cp->u.s->p)
409		OUT(ps)
410
411	/* Handle the 'w' flag. */
412	if (cp->u.s->wfile && !pd) {
413		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
414		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
415			err(1, "%s", cp->u.s->wfile);
416		if (write(cp->u.s->wfd, ps, psl) != psl)
417			err(1, "%s", cp->u.s->wfile);
418	}
419	return (1);
420}
421
422/*
423 * Flush append requests.  Always called before reading a line,
424 * therefore it also resets the substitution done (sdone) flag.
425 */
426static void
427flush_appends()
428{
429	FILE *f;
430	int count, i;
431	char buf[8 * 1024];
432
433	for (i = 0; i < appendx; i++)
434		switch (appends[i].type) {
435		case AP_STRING:
436			fwrite(appends[i].s, sizeof(char), appends[i].len,
437			    stdout);
438			break;
439		case AP_FILE:
440			/*
441			 * Read files probably shouldn't be cached.  Since
442			 * it's not an error to read a non-existent file,
443			 * it's possible that another program is interacting
444			 * with the sed script through the file system.  It
445			 * would be truly bizarre, but possible.  It's probably
446			 * not that big a performance win, anyhow.
447			 */
448			if ((f = fopen(appends[i].s, "r")) == NULL)
449				break;
450			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
451				(void)fwrite(buf, sizeof(char), count, stdout);
452			(void)fclose(f);
453			break;
454		}
455	if (ferror(stdout))
456		errx(1, "stdout: %s", strerror(errno ? errno : EIO));
457	appendx = sdone = 0;
458}
459
460static void
461lputs(s)
462	register char *s;
463{
464	register int count;
465	register char *escapes, *p;
466	struct winsize win;
467	static int termwidth = -1;
468
469	if (termwidth == -1) {
470		if ((p = getenv("COLUMNS")))
471			termwidth = atoi(p);
472		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
473		    win.ws_col > 0)
474			termwidth = win.ws_col;
475		else
476			termwidth = 60;
477	}
478
479	for (count = 0; *s; ++s) {
480		if (count >= termwidth) {
481			(void)printf("\\\n");
482			count = 0;
483		}
484		if (isprint((unsigned char)*s) && *s != '\\') {
485			(void)putchar(*s);
486			count++;
487		} else {
488			escapes = "\\\a\b\f\n\r\t\v";
489			(void)putchar('\\');
490			if ((p = strchr(escapes, *s))) {
491				(void)putchar("\\abfnrtv"[p - escapes]);
492				count += 2;
493			} else {
494				(void)printf("%03o", *(u_char *)s);
495				count += 4;
496			}
497		}
498	}
499	(void)putchar('$');
500	(void)putchar('\n');
501	if (ferror(stdout))
502		errx(1, "stdout: %s", strerror(errno ? errno : EIO));
503}
504
505static inline int
506regexec_e(preg, string, eflags, nomatch, slen)
507	regex_t *preg;
508	const char *string;
509	int eflags, nomatch;
510	size_t slen;
511{
512	int eval;
513
514	if (preg == NULL) {
515		if (defpreg == NULL)
516			errx(1, "first RE may not be empty");
517	} else
518		defpreg = preg;
519
520	/* Set anchors, discounting trailing newline (if any). */
521	if (slen > 0 && string[slen - 1] == '\n')
522		slen--;
523	match[0].rm_so = 0;
524	match[0].rm_eo = slen;
525
526	eval = regexec(defpreg, string,
527	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
528	switch(eval) {
529	case 0:
530		return (1);
531	case REG_NOMATCH:
532		return (0);
533	}
534	errx(1, "RE error: %s", strregerror(eval, defpreg));
535	/* NOTREACHED */
536}
537
538/*
539 * regsub - perform substitutions after a regexp match
540 * Based on a routine by Henry Spencer
541 */
542static void
543regsub(sp, string, src)
544	SPACE *sp;
545	char *string, *src;
546{
547	register int len, no;
548	register char c, *dst;
549
550#define	NEEDSP(reqlen)							\
551	if (sp->len >= sp->blen - (reqlen) - 1) {			\
552		sp->blen += (reqlen) + 1024;				\
553		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
554		    == NULL)						\
555			err(1, "realloc");				\
556		dst = sp->space + sp->len;				\
557	}
558
559	dst = sp->space + sp->len;
560	while ((c = *src++) != '\0') {
561		if (c == '&')
562			no = 0;
563		else if (c == '\\' && isdigit((unsigned char)*src))
564			no = *src++ - '0';
565		else
566			no = -1;
567		if (no < 0) {		/* Ordinary character. */
568 			if (c == '\\' && (*src == '\\' || *src == '&'))
569 				c = *src++;
570			NEEDSP(1);
571 			*dst++ = c;
572			++sp->len;
573 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
574			len = match[no].rm_eo - match[no].rm_so;
575			NEEDSP(len);
576			memmove(dst, string + match[no].rm_so, len);
577			dst += len;
578			sp->len += len;
579		}
580	}
581	NEEDSP(1);
582	*dst = '\0';
583}
584
585/*
586 * aspace --
587 *	Append the source space to the destination space, allocating new
588 *	space as necessary.
589 */
590void
591cspace(sp, p, len, spflag)
592	SPACE *sp;
593	char *p;
594	size_t len;
595	enum e_spflag spflag;
596{
597	size_t tlen;
598
599	/* Make sure SPACE has enough memory and ramp up quickly. */
600	tlen = sp->len + len + 1;
601	if (tlen > sp->blen) {
602		sp->blen = tlen + 1024;
603		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
604		    NULL)
605			err(1, "realloc");
606	}
607
608	if (spflag == REPLACE)
609		sp->len = 0;
610
611	memmove(sp->space + sp->len, p, len);
612
613	sp->space[sp->len += len] = '\0';
614}
615
616/*
617 * Close all cached opened files and report any errors
618 */
619void
620cfclose(cp, end)
621	register struct s_command *cp, *end;
622{
623
624	for (; cp != end; cp = cp->next)
625		switch(cp->code) {
626		case 's':
627			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
628				err(1, "%s", cp->u.s->wfile);
629			cp->u.s->wfd = -1;
630			break;
631		case 'w':
632			if (cp->u.fd != -1 && close(cp->u.fd))
633				err(1, "%s", cp->t);
634			cp->u.fd = -1;
635			break;
636		case '{':
637			cfclose(cp->u.c, cp->next);
638			break;
639		}
640}
641