process.c revision 10075
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#ifndef lint
39static char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
40#endif /* not lint */
41
42#include <sys/types.h>
43#include <sys/stat.h>
44#include <sys/ioctl.h>
45#include <sys/uio.h>
46
47#include <ctype.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56
57#include "defs.h"
58#include "extern.h"
59
60static SPACE HS, PS, SS;
61#define	pd		PS.deleted
62#define	ps		PS.space
63#define	psl		PS.len
64#define	hs		HS.space
65#define	hsl		HS.len
66
67static inline int	 applies __P((struct s_command *));
68static void		 flush_appends __P((void));
69static void		 lputs __P((char *));
70static inline int	 regexec_e __P((regex_t *, const char *, int, int, size_t));
71static void		 regsub __P((SPACE *, char *, char *));
72static int		 substitute __P((struct s_command *));
73
74struct s_appends *appends;	/* Array of pointers to strings to append. */
75static int appendx;		/* Index into appends array. */
76int appendnum;			/* Size of appends array. */
77
78static int lastaddr;		/* Set by applies if last address of a range. */
79static int sdone;		/* If any substitutes since last line input. */
80				/* Iov structure for 'w' commands. */
81static regex_t *defpreg;
82size_t maxnsub;
83regmatch_t *match;
84
85#define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
86
87void
88process()
89{
90	struct s_command *cp;
91	SPACE tspace;
92	size_t len, oldpsl;
93	char *p;
94
95	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
96		pd = 0;
97top:
98		cp = prog;
99redirect:
100		while (cp != NULL) {
101			if (!applies(cp)) {
102				cp = cp->next;
103				continue;
104			}
105			switch (cp->code) {
106			case '{':
107				cp = cp->u.c;
108				goto redirect;
109			case 'a':
110				if (appendx >= appendnum)
111					appends = xrealloc(appends,
112					    sizeof(struct s_appends) *
113					    (appendnum *= 2));
114				appends[appendx].type = AP_STRING;
115				appends[appendx].s = cp->t;
116				appends[appendx].len = strlen(cp->t);
117				appendx++;
118				break;
119			case 'b':
120				cp = cp->u.c;
121				goto redirect;
122			case 'c':
123				pd = 1;
124				psl = 0;
125				if (cp->a2 == NULL || lastaddr)
126					(void)printf("%s", cp->t);
127				break;
128			case 'd':
129				pd = 1;
130				goto new;
131			case 'D':
132				if (pd)
133					goto new;
134				if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
135					pd = 1;
136					goto new;
137				} else {
138					psl -= (p + 1) - ps;
139					memmove(ps, p + 1, psl);
140					goto top;
141				}
142			case 'g':
143				cspace(&PS, hs, hsl, REPLACE);
144				break;
145			case 'G':
146				cspace(&PS, hs, hsl, 0);
147				break;
148			case 'h':
149				cspace(&HS, ps, psl, REPLACE);
150				break;
151			case 'H':
152				cspace(&HS, ps, psl, 0);
153				break;
154			case 'i':
155				(void)printf("%s", cp->t);
156				break;
157			case 'l':
158				lputs(ps);
159				break;
160			case 'n':
161				if (!nflag && !pd)
162					OUT(ps)
163				flush_appends();
164				if (!mf_fgets(&PS, REPLACE))
165					exit(0);
166				pd = 0;
167				break;
168			case 'N':
169				flush_appends();
170				if (!mf_fgets(&PS, 0)) {
171					if (!nflag && !pd)
172						OUT(ps)
173					exit(0);
174				}
175				break;
176			case 'p':
177				if (pd)
178					break;
179				OUT(ps)
180				break;
181			case 'P':
182				if (pd)
183					break;
184				if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
185					oldpsl = psl;
186					psl = (p + 1) - ps;
187				}
188				OUT(ps)
189				if (p != NULL)
190					psl = oldpsl;
191				break;
192			case 'q':
193				if (!nflag && !pd)
194					OUT(ps)
195				flush_appends();
196				exit(0);
197			case 'r':
198				if (appendx >= appendnum)
199					appends = xrealloc(appends,
200					    sizeof(struct s_appends) *
201					    (appendnum *= 2));
202				appends[appendx].type = AP_FILE;
203				appends[appendx].s = cp->t;
204				appends[appendx].len = strlen(cp->t);
205				appendx++;
206				break;
207			case 's':
208				sdone |= substitute(cp);
209				break;
210			case 't':
211				if (sdone) {
212					sdone = 0;
213					cp = cp->u.c;
214					goto redirect;
215				}
216				break;
217			case 'w':
218				if (pd)
219					break;
220				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
221				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
222				    DEFFILEMODE)) == -1)
223					err(FATAL, "%s: %s\n",
224					    cp->t, strerror(errno));
225				if (write(cp->u.fd, ps, psl) != psl)
226					err(FATAL, "%s: %s\n",
227					    cp->t, strerror(errno));
228				break;
229			case 'x':
230				if (hs == NULL)
231					cspace(&HS, "\n", 1, REPLACE);
232				tspace = PS;
233				PS = HS;
234				HS = tspace;
235				break;
236			case 'y':
237				if (pd)
238					break;
239				for (p = ps, len = psl; --len; ++p)
240					*p = cp->u.y[*p];
241				break;
242			case ':':
243			case '}':
244				break;
245			case '=':
246				(void)printf("%lu\n", linenum);
247			}
248			cp = cp->next;
249		} /* for all cp */
250
251new:		if (!nflag && !pd)
252			OUT(ps)
253		flush_appends();
254	} /* for all lines */
255}
256
257/*
258 * TRUE if the address passed matches the current program state
259 * (lastline, linenumber, ps).
260 */
261#define	MATCH(a)						\
262	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
263	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
264
265/*
266 * Return TRUE if the command applies to the current line.  Sets the inrange
267 * flag to process ranges.  Interprets the non-select (``!'') flag.
268 */
269static inline int
270applies(cp)
271	struct s_command *cp;
272{
273	int r;
274
275	lastaddr = 0;
276	if (cp->a1 == NULL && cp->a2 == NULL)
277		r = 1;
278	else if (cp->a2)
279		if (cp->inrange) {
280			if (MATCH(cp->a2)) {
281				cp->inrange = 0;
282				lastaddr = 1;
283			}
284			r = 1;
285		} else if (MATCH(cp->a1)) {
286			/*
287			 * If the second address is a number less than or
288			 * equal to the line number first selected, only
289			 * one line shall be selected.
290			 *	-- POSIX 1003.2
291			 */
292			if (cp->a2->type == AT_LINE &&
293			    linenum >= cp->a2->u.l)
294				lastaddr = 1;
295			else
296				cp->inrange = 1;
297			r = 1;
298		} else
299			r = 0;
300	else
301		r = MATCH(cp->a1);
302	return (cp->nonsel ? ! r : r);
303}
304
305/*
306 * substitute --
307 *	Do substitutions in the pattern space.  Currently, we build a
308 *	copy of the new pattern space in the substitute space structure
309 *	and then swap them.
310 */
311static int
312substitute(cp)
313	struct s_command *cp;
314{
315	SPACE tspace;
316	regex_t *re;
317	size_t re_off, slen;
318	int lastempty, n;
319	char *s;
320
321	s = ps;
322	re = cp->u.s->re;
323	if (re == NULL) {
324		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
325			linenum = cp->u.s->linenum;
326			err(COMPILE, "\\%d not defined in the RE",
327			    cp->u.s->maxbref);
328		}
329	}
330	if (!regexec_e(re, s, 0, 0, psl))
331		return (0);
332
333  	SS.len = 0;				/* Clean substitute space. */
334  	slen = psl;
335  	n = cp->u.s->n;
336	lastempty = 1;
337
338  	switch (n) {
339  	case 0:					/* Global */
340  		do {
341			if (lastempty || match[0].rm_so != match[0].rm_eo) {
342				/* Locate start of replaced string. */
343				re_off = match[0].rm_so;
344				/* Copy leading retained string. */
345				cspace(&SS, s, re_off, APPEND);
346				/* Add in regular expression. */
347				regsub(&SS, s, cp->u.s->new);
348			}
349
350			/* Move past this match. */
351			if (match[0].rm_so != match[0].rm_eo) {
352				s += match[0].rm_eo;
353				slen -= match[0].rm_eo;
354				lastempty = 0;
355			} else {
356				if (match[0].rm_so == 0)
357					cspace(&SS, s, match[0].rm_so + 1,
358					    APPEND);
359				else
360					cspace(&SS, s + match[0].rm_so, 1,
361					    APPEND);
362				s += match[0].rm_so + 1;
363				slen -= match[0].rm_so + 1;
364				lastempty = 1;
365			}
366		} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
367		/* Copy trailing retained string. */
368		if (slen > 0)
369			cspace(&SS, s, slen, APPEND);
370  		break;
371	default:				/* Nth occurrence */
372		while (--n) {
373			s += match[0].rm_eo;
374			slen -= match[0].rm_eo;
375			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
376				return (0);
377		}
378		/* FALLTHROUGH */
379	case 1:					/* 1st occurrence */
380		/* Locate start of replaced string. */
381		re_off = match[0].rm_so + (s - ps);
382		/* Copy leading retained string. */
383		cspace(&SS, ps, re_off, APPEND);
384		/* Add in regular expression. */
385		regsub(&SS, s, cp->u.s->new);
386		/* Copy trailing retained string. */
387		s += match[0].rm_eo;
388		slen -= match[0].rm_eo;
389		cspace(&SS, s, slen, APPEND);
390		break;
391	}
392
393	/*
394	 * Swap the substitute space and the pattern space, and make sure
395	 * that any leftover pointers into stdio memory get lost.
396	 */
397	tspace = PS;
398	PS = SS;
399	SS = tspace;
400	SS.space = SS.back;
401
402	/* Handle the 'p' flag. */
403	if (cp->u.s->p)
404		OUT(ps)
405
406	/* Handle the 'w' flag. */
407	if (cp->u.s->wfile && !pd) {
408		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
409		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
410			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
411		if (write(cp->u.s->wfd, ps, psl) != psl)
412			err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
413	}
414	return (1);
415}
416
417/*
418 * Flush append requests.  Always called before reading a line,
419 * therefore it also resets the substitution done (sdone) flag.
420 */
421static void
422flush_appends()
423{
424	FILE *f;
425	int count, i;
426	char buf[8 * 1024];
427
428	for (i = 0; i < appendx; i++)
429		switch (appends[i].type) {
430		case AP_STRING:
431			fwrite(appends[i].s, sizeof(char), appends[i].len,
432			    stdout);
433			break;
434		case AP_FILE:
435			/*
436			 * Read files probably shouldn't be cached.  Since
437			 * it's not an error to read a non-existent file,
438			 * it's possible that another program is interacting
439			 * with the sed script through the file system.  It
440			 * would be truly bizarre, but possible.  It's probably
441			 * not that big a performance win, anyhow.
442			 */
443			if ((f = fopen(appends[i].s, "r")) == NULL)
444				break;
445			while (count = fread(buf, sizeof(char), sizeof(buf), f))
446				(void)fwrite(buf, sizeof(char), count, stdout);
447			(void)fclose(f);
448			break;
449		}
450	if (ferror(stdout))
451		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
452	appendx = sdone = 0;
453}
454
455static void
456lputs(s)
457	register char *s;
458{
459	register int count;
460	register char *escapes, *p;
461	struct winsize win;
462	static int termwidth = -1;
463
464	if (termwidth == -1)
465		if (p = getenv("COLUMNS"))
466			termwidth = atoi(p);
467		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
468		    win.ws_col > 0)
469			termwidth = win.ws_col;
470		else
471			termwidth = 60;
472
473	for (count = 0; *s; ++s) {
474		if (count >= termwidth) {
475			(void)printf("\\\n");
476			count = 0;
477		}
478		if (isascii(*s) && isprint(*s) && *s != '\\') {
479			(void)putchar(*s);
480			count++;
481		} else {
482			escapes = "\\\a\b\f\n\r\t\v";
483			(void)putchar('\\');
484			if (p = strchr(escapes, *s)) {
485				(void)putchar("\\abfnrtv"[p - escapes]);
486				count += 2;
487			} else {
488				(void)printf("%03o", *(u_char *)s);
489				count += 4;
490			}
491		}
492	}
493	(void)putchar('$');
494	(void)putchar('\n');
495	if (ferror(stdout))
496		err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
497}
498
499static inline int
500regexec_e(preg, string, eflags, nomatch, slen)
501	regex_t *preg;
502	const char *string;
503	int eflags, nomatch;
504	size_t slen;
505{
506	int eval;
507
508	if (preg == NULL) {
509		if (defpreg == NULL)
510			err(FATAL, "first RE may not be empty");
511	} else
512		defpreg = preg;
513
514	/* Set anchors, discounting trailing newline (if any). */
515	if (slen > 0 && string[slen - 1] == '\n')
516		slen--;
517	match[0].rm_so = 0;
518	match[0].rm_eo = slen;
519
520	eval = regexec(defpreg, string,
521	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
522	switch(eval) {
523	case 0:
524		return (1);
525	case REG_NOMATCH:
526		return (0);
527	}
528	err(FATAL, "RE error: %s", strregerror(eval, defpreg));
529	/* NOTREACHED */
530}
531
532/*
533 * regsub - perform substitutions after a regexp match
534 * Based on a routine by Henry Spencer
535 */
536static void
537regsub(sp, string, src)
538	SPACE *sp;
539	char *string, *src;
540{
541	register int len, no;
542	register char c, *dst;
543
544#define	NEEDSP(reqlen)							\
545	if (sp->len >= sp->blen - (reqlen) - 1) {			\
546		sp->blen += (reqlen) + 1024;				\
547		sp->space = sp->back = xrealloc(sp->back, sp->blen);	\
548		dst = sp->space + sp->len;				\
549	}
550
551	dst = sp->space + sp->len;
552	while ((c = *src++) != '\0') {
553		if (c == '&')
554			no = 0;
555		else if (c == '\\' && isdigit(*src))
556			no = *src++ - '0';
557		else
558			no = -1;
559		if (no < 0) {		/* Ordinary character. */
560 			if (c == '\\' && (*src == '\\' || *src == '&'))
561 				c = *src++;
562			NEEDSP(1);
563 			*dst++ = c;
564			++sp->len;
565 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
566			len = match[no].rm_eo - match[no].rm_so;
567			NEEDSP(len);
568			memmove(dst, string + match[no].rm_so, len);
569			dst += len;
570			sp->len += len;
571		}
572	}
573	NEEDSP(1);
574	*dst = '\0';
575}
576
577/*
578 * aspace --
579 *	Append the source space to the destination space, allocating new
580 *	space as necessary.
581 */
582void
583cspace(sp, p, len, spflag)
584	SPACE *sp;
585	char *p;
586	size_t len;
587	enum e_spflag spflag;
588{
589	size_t tlen;
590
591	/* Make sure SPACE has enough memory and ramp up quickly. */
592	tlen = sp->len + len + 1;
593	if (tlen > sp->blen) {
594		sp->blen = tlen + 1024;
595		sp->space = sp->back = xrealloc(sp->back, sp->blen);
596	}
597
598	if (spflag == REPLACE)
599		sp->len = 0;
600
601	memmove(sp->space + sp->len, p, len);
602
603	sp->space[sp->len += len] = '\0';
604}
605
606/*
607 * Close all cached opened files and report any errors
608 */
609void
610cfclose(cp, end)
611	register struct s_command *cp, *end;
612{
613
614	for (; cp != end; cp = cp->next)
615		switch(cp->code) {
616		case 's':
617			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
618				err(FATAL,
619				    "%s: %s", cp->u.s->wfile, strerror(errno));
620			cp->u.s->wfd = -1;
621			break;
622		case 'w':
623			if (cp->u.fd != -1 && close(cp->u.fd))
624				err(FATAL, "%s: %s", cp->t, strerror(errno));
625			cp->u.fd = -1;
626			break;
627		case '{':
628			cfclose(cp->u.c, cp->next);
629			break;
630		}
631}
632