process.c revision 168211
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/usr.bin/sed/process.c 168211 2007-04-01 13:25:03Z yar $");
36
37#ifndef lint
38static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
39#endif
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/uio.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <wchar.h>
57#include <wctype.h>
58
59#include "defs.h"
60#include "extern.h"
61
62static SPACE HS, PS, SS, YS;
63#define	pd		PS.deleted
64#define	ps		PS.space
65#define	psl		PS.len
66#define	hs		HS.space
67#define	hsl		HS.len
68
69static __inline int	 applies(struct s_command *);
70static void		 do_tr(struct s_tr *);
71static void		 flush_appends(void);
72static void		 lputs(char *, size_t);
73static __inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
74static void		 regsub(SPACE *, char *, char *);
75static int		 substitute(struct s_command *);
76
77struct s_appends *appends;	/* Array of pointers to strings to append. */
78static int appendx;		/* Index into appends array. */
79int appendnum;			/* Size of appends array. */
80
81static int lastaddr;		/* Set by applies if last address of a range. */
82static int sdone;		/* If any substitutes since last line input. */
83				/* Iov structure for 'w' commands. */
84static regex_t *defpreg;
85size_t maxnsub;
86regmatch_t *match;
87
88#define OUT(s) { fwrite(s, sizeof(u_char), psl, outfile); fputc('\n', outfile); }
89
90void
91process(void)
92{
93	struct s_command *cp;
94	SPACE tspace;
95	size_t oldpsl = 0;
96	char *p;
97
98	p = NULL;
99
100	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101		pd = 0;
102top:
103		cp = prog;
104redirect:
105		while (cp != NULL) {
106			if (!applies(cp)) {
107				cp = cp->next;
108				continue;
109			}
110			switch (cp->code) {
111			case '{':
112				cp = cp->u.c;
113				goto redirect;
114			case 'a':
115				if (appendx >= appendnum)
116					if ((appends = realloc(appends,
117					    sizeof(struct s_appends) *
118					    (appendnum *= 2))) == NULL)
119						err(1, "realloc");
120				appends[appendx].type = AP_STRING;
121				appends[appendx].s = cp->t;
122				appends[appendx].len = strlen(cp->t);
123				appendx++;
124				break;
125			case 'b':
126				cp = cp->u.c;
127				goto redirect;
128			case 'c':
129				pd = 1;
130				psl = 0;
131				if (cp->a2 == NULL || lastaddr || lastline())
132					(void)fprintf(outfile, "%s", cp->t);
133				break;
134			case 'd':
135				pd = 1;
136				goto new;
137			case 'D':
138				if (pd)
139					goto new;
140				if (psl == 0 ||
141				    (p = memchr(ps, '\n', psl)) == NULL) {
142					pd = 1;
143					goto new;
144				} else {
145					psl -= (p + 1) - ps;
146					memmove(ps, p + 1, psl);
147					goto top;
148				}
149			case 'g':
150				cspace(&PS, hs, hsl, REPLACE);
151				break;
152			case 'G':
153				cspace(&PS, "\n", 1, 0);
154				cspace(&PS, hs, hsl, 0);
155				break;
156			case 'h':
157				cspace(&HS, ps, psl, REPLACE);
158				break;
159			case 'H':
160				cspace(&HS, "\n", 1, 0);
161				cspace(&HS, ps, psl, 0);
162				break;
163			case 'i':
164				(void)fprintf(outfile, "%s", cp->t);
165				break;
166			case 'l':
167				lputs(ps, psl);
168				break;
169			case 'n':
170				if (!nflag && !pd)
171					OUT(ps)
172				flush_appends();
173				if (!mf_fgets(&PS, REPLACE))
174					exit(0);
175				pd = 0;
176				break;
177			case 'N':
178				flush_appends();
179				cspace(&PS, "\n", 1, 0);
180				if (!mf_fgets(&PS, 0))
181					exit(0);
182				break;
183			case 'p':
184				if (pd)
185					break;
186				OUT(ps)
187				break;
188			case 'P':
189				if (pd)
190					break;
191				if ((p = memchr(ps, '\n', psl)) != NULL) {
192					oldpsl = psl;
193					psl = p - ps;
194				}
195				OUT(ps)
196				if (p != NULL)
197					psl = oldpsl;
198				break;
199			case 'q':
200				if (!nflag && !pd)
201					OUT(ps)
202				flush_appends();
203				exit(0);
204			case 'r':
205				if (appendx >= appendnum)
206					if ((appends = realloc(appends,
207					    sizeof(struct s_appends) *
208					    (appendnum *= 2))) == NULL)
209						err(1, "realloc");
210				appends[appendx].type = AP_FILE;
211				appends[appendx].s = cp->t;
212				appends[appendx].len = strlen(cp->t);
213				appendx++;
214				break;
215			case 's':
216				sdone |= substitute(cp);
217				break;
218			case 't':
219				if (sdone) {
220					sdone = 0;
221					cp = cp->u.c;
222					goto redirect;
223				}
224				break;
225			case 'w':
226				if (pd)
227					break;
228				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
229				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
230				    DEFFILEMODE)) == -1)
231					err(1, "%s", cp->t);
232				if (write(cp->u.fd, ps, psl) != psl ||
233				    write(cp->u.fd, "\n", 1) != 1)
234					err(1, "%s", cp->t);
235				break;
236			case 'x':
237				if (hs == NULL)
238					cspace(&HS, "", 0, REPLACE);
239				tspace = PS;
240				PS = HS;
241				HS = tspace;
242				break;
243			case 'y':
244				if (pd || psl == 0)
245					break;
246				do_tr(cp->u.y);
247				break;
248			case ':':
249			case '}':
250				break;
251			case '=':
252				(void)fprintf(outfile, "%lu\n", linenum);
253			}
254			cp = cp->next;
255		} /* for all cp */
256
257new:		if (!nflag && !pd)
258			OUT(ps)
259		flush_appends();
260	} /* for all lines */
261}
262
263/*
264 * TRUE if the address passed matches the current program state
265 * (lastline, linenumber, ps).
266 */
267#define	MATCH(a)						\
268	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
269	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
270
271/*
272 * Return TRUE if the command applies to the current line.  Sets the inrange
273 * flag to process ranges.  Interprets the non-select (``!'') flag.
274 */
275static __inline int
276applies(struct s_command *cp)
277{
278	int r;
279
280	lastaddr = 0;
281	if (cp->a1 == NULL && cp->a2 == NULL)
282		r = 1;
283	else if (cp->a2)
284		if (cp->inrange) {
285			if (MATCH(cp->a2)) {
286				cp->inrange = 0;
287				lastaddr = 1;
288			}
289			r = 1;
290		} else if (MATCH(cp->a1)) {
291			/*
292			 * If the second address is a number less than or
293			 * equal to the line number first selected, only
294			 * one line shall be selected.
295			 *	-- POSIX 1003.2
296			 */
297			if (cp->a2->type == AT_LINE &&
298			    linenum >= cp->a2->u.l)
299				lastaddr = 1;
300			else
301				cp->inrange = 1;
302			r = 1;
303		} else
304			r = 0;
305	else
306		r = MATCH(cp->a1);
307	return (cp->nonsel ? ! r : r);
308}
309
310/*
311 * substitute --
312 *	Do substitutions in the pattern space.  Currently, we build a
313 *	copy of the new pattern space in the substitute space structure
314 *	and then swap them.
315 */
316static int
317substitute(struct s_command *cp)
318{
319	SPACE tspace;
320	regex_t *re;
321	regoff_t re_off, slen;
322	int lastempty, n;
323	char *s;
324
325	s = ps;
326	re = cp->u.s->re;
327	if (re == NULL) {
328		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
329			linenum = cp->u.s->linenum;
330			errx(1, "%lu: %s: \\%d not defined in the RE",
331					linenum, fname, cp->u.s->maxbref);
332		}
333	}
334	if (!regexec_e(re, s, 0, 0, psl))
335		return (0);
336
337	SS.len = 0;				/* Clean substitute space. */
338	slen = psl;
339	n = cp->u.s->n;
340	lastempty = 1;
341
342	switch (n) {
343	case 0:					/* Global */
344		do {
345			if (lastempty || match[0].rm_so != match[0].rm_eo) {
346				/* Locate start of replaced string. */
347				re_off = match[0].rm_so;
348				/* Copy leading retained string. */
349				cspace(&SS, s, re_off, APPEND);
350				/* Add in regular expression. */
351				regsub(&SS, s, cp->u.s->new);
352			}
353
354			/* Move past this match. */
355			if (match[0].rm_so != match[0].rm_eo) {
356				s += match[0].rm_eo;
357				slen -= match[0].rm_eo;
358				lastempty = 0;
359			} else {
360				if (match[0].rm_so < slen)
361					cspace(&SS, s + match[0].rm_so, 1,
362					    APPEND);
363				s += match[0].rm_so + 1;
364				slen -= match[0].rm_so + 1;
365				lastempty = 1;
366			}
367		} while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
368		/* Copy trailing retained string. */
369		if (slen > 0)
370			cspace(&SS, s, slen, APPEND);
371		break;
372	default:				/* Nth occurrence */
373		while (--n) {
374			if (match[0].rm_eo == match[0].rm_so)
375				match[0].rm_eo = match[0].rm_so + 1;
376			s += match[0].rm_eo;
377			slen -= match[0].rm_eo;
378			if (slen < 0)
379				return (0);
380			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
381				return (0);
382		}
383		/* FALLTHROUGH */
384	case 1:					/* 1st occurrence */
385		/* Locate start of replaced string. */
386		re_off = match[0].rm_so + (s - ps);
387		/* Copy leading retained string. */
388		cspace(&SS, ps, re_off, APPEND);
389		/* Add in regular expression. */
390		regsub(&SS, s, cp->u.s->new);
391		/* Copy trailing retained string. */
392		s += match[0].rm_eo;
393		slen -= match[0].rm_eo;
394		cspace(&SS, s, slen, APPEND);
395		break;
396	}
397
398	/*
399	 * Swap the substitute space and the pattern space, and make sure
400	 * that any leftover pointers into stdio memory get lost.
401	 */
402	tspace = PS;
403	PS = SS;
404	SS = tspace;
405	SS.space = SS.back;
406
407	/* Handle the 'p' flag. */
408	if (cp->u.s->p)
409		OUT(ps)
410
411	/* Handle the 'w' flag. */
412	if (cp->u.s->wfile && !pd) {
413		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
414		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
415			err(1, "%s", cp->u.s->wfile);
416		if (write(cp->u.s->wfd, ps, psl) != psl ||
417		    write(cp->u.s->wfd, "\n", 1) != 1)
418			err(1, "%s", cp->u.s->wfile);
419	}
420	return (1);
421}
422
423/*
424 * do_tr --
425 *	Perform translation ('y' command) in the pattern space.
426 */
427static void
428do_tr(struct s_tr *y)
429{
430	SPACE tmp;
431	char c, *p;
432	size_t clen, left;
433	int i;
434
435	if (MB_CUR_MAX == 1) {
436		/*
437		 * Single-byte encoding: perform in-place translation
438		 * of the pattern space.
439		 */
440		for (p = ps; p < &ps[psl]; p++)
441			*p = y->bytetab[(u_char)*p];
442	} else {
443		/*
444		 * Multi-byte encoding: perform translation into the
445		 * translation space, then swap the translation and
446		 * pattern spaces.
447		 */
448		/* Clean translation space. */
449		YS.len = 0;
450		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
451			if ((c = y->bytetab[(u_char)*p]) != '\0') {
452				cspace(&YS, &c, 1, APPEND);
453				clen = 1;
454				continue;
455			}
456			for (i = 0; i < y->nmultis; i++)
457				if (left >= y->multis[i].fromlen &&
458				    memcmp(p, y->multis[i].from,
459				    y->multis[i].fromlen) == 0)
460					break;
461			if (i < y->nmultis) {
462				cspace(&YS, y->multis[i].to,
463				    y->multis[i].tolen, APPEND);
464				clen = y->multis[i].fromlen;
465			} else {
466				cspace(&YS, p, 1, APPEND);
467				clen = 1;
468			}
469		}
470		/* Swap the translation space and the pattern space. */
471		tmp = PS;
472		PS = YS;
473		YS = tmp;
474		YS.space = YS.back;
475	}
476}
477
478/*
479 * Flush append requests.  Always called before reading a line,
480 * therefore it also resets the substitution done (sdone) flag.
481 */
482static void
483flush_appends(void)
484{
485	FILE *f;
486	int count, i;
487	char buf[8 * 1024];
488
489	for (i = 0; i < appendx; i++)
490		switch (appends[i].type) {
491		case AP_STRING:
492			fwrite(appends[i].s, sizeof(char), appends[i].len,
493			    outfile);
494			break;
495		case AP_FILE:
496			/*
497			 * Read files probably shouldn't be cached.  Since
498			 * it's not an error to read a non-existent file,
499			 * it's possible that another program is interacting
500			 * with the sed script through the filesystem.  It
501			 * would be truly bizarre, but possible.  It's probably
502			 * not that big a performance win, anyhow.
503			 */
504			if ((f = fopen(appends[i].s, "r")) == NULL)
505				break;
506			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
507				(void)fwrite(buf, sizeof(char), count, outfile);
508			(void)fclose(f);
509			break;
510		}
511	if (ferror(outfile))
512		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
513	appendx = sdone = 0;
514}
515
516static void
517lputs(char *s, size_t len)
518{
519	static const char escapes[] = "\\\a\b\f\r\t\v";
520	int c, col, width;
521	char *p;
522	struct winsize win;
523	static int termwidth = -1;
524	size_t clen, i;
525	wchar_t wc;
526	mbstate_t mbs;
527
528	if (outfile != stdout)
529		termwidth = 60;
530	if (termwidth == -1) {
531		if ((p = getenv("COLUMNS")) && *p != '\0')
532			termwidth = atoi(p);
533		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
534		    win.ws_col > 0)
535			termwidth = win.ws_col;
536		else
537			termwidth = 60;
538	}
539
540	memset(&mbs, 0, sizeof(mbs));
541	col = 0;
542	while (len != 0) {
543		clen = mbrtowc(&wc, s, len, &mbs);
544		if (clen == 0)
545			clen = 1;
546		if (clen == (size_t)-1 || clen == (size_t)-2) {
547			wc = (unsigned char)*s;
548			clen = 1;
549			memset(&mbs, 0, sizeof(mbs));
550		}
551		if (wc == '\n') {
552			if (col + 1 >= termwidth)
553				fprintf(outfile, "\\\n");
554			fputc('$', outfile);
555			fputc('\n', outfile);
556			col = 0;
557		} else if (iswprint(wc)) {
558			width = wcwidth(wc);
559			if (col + width >= termwidth) {
560				fprintf(outfile, "\\\n");
561				col = 0;
562			}
563			fwrite(s, 1, clen, outfile);
564			col += width;
565		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
566		    (p = strchr(escapes, c)) != NULL) {
567			if (col + 2 >= termwidth) {
568				fprintf(outfile, "\\\n");
569				col = 0;
570			}
571			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
572			col += 2;
573		} else {
574			if (col + 4 * clen >= termwidth) {
575				fprintf(outfile, "\\\n");
576				col = 0;
577			}
578			for (i = 0; i < clen; i++)
579				fprintf(outfile, "\\%03o",
580				    (int)(unsigned char)s[i]);
581			col += 4 * clen;
582		}
583		s += clen;
584		len -= clen;
585	}
586	if (col + 1 >= termwidth)
587		fprintf(outfile, "\\\n");
588	(void)fputc('$', outfile);
589	(void)fputc('\n', outfile);
590	if (ferror(outfile))
591		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
592}
593
594static __inline int
595regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
596	size_t slen)
597{
598	int eval;
599
600	if (preg == NULL) {
601		if (defpreg == NULL)
602			errx(1, "first RE may not be empty");
603	} else
604		defpreg = preg;
605
606	/* Set anchors */
607	match[0].rm_so = 0;
608	match[0].rm_eo = slen;
609
610	eval = regexec(defpreg, string,
611	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
612	switch(eval) {
613	case 0:
614		return (1);
615	case REG_NOMATCH:
616		return (0);
617	}
618	errx(1, "RE error: %s", strregerror(eval, defpreg));
619	/* NOTREACHED */
620}
621
622/*
623 * regsub - perform substitutions after a regexp match
624 * Based on a routine by Henry Spencer
625 */
626static void
627regsub(SPACE *sp, char *string, char *src)
628{
629	int len, no;
630	char c, *dst;
631
632#define	NEEDSP(reqlen)							\
633	/* XXX What is the +1 for? */					\
634	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
635		sp->blen += (reqlen) + 1024;				\
636		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
637		    == NULL)						\
638			err(1, "realloc");				\
639		dst = sp->space + sp->len;				\
640	}
641
642	dst = sp->space + sp->len;
643	while ((c = *src++) != '\0') {
644		if (c == '&')
645			no = 0;
646		else if (c == '\\' && isdigit((unsigned char)*src))
647			no = *src++ - '0';
648		else
649			no = -1;
650		if (no < 0) {		/* Ordinary character. */
651			if (c == '\\' && (*src == '\\' || *src == '&'))
652				c = *src++;
653			NEEDSP(1);
654			*dst++ = c;
655			++sp->len;
656		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
657			len = match[no].rm_eo - match[no].rm_so;
658			NEEDSP(len);
659			memmove(dst, string + match[no].rm_so, len);
660			dst += len;
661			sp->len += len;
662		}
663	}
664	NEEDSP(1);
665	*dst = '\0';
666}
667
668/*
669 * cspace --
670 *	Concatenate space: append the source space to the destination space,
671 *	allocating new space as necessary.
672 */
673void
674cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
675{
676	size_t tlen;
677
678	/* Make sure SPACE has enough memory and ramp up quickly. */
679	tlen = sp->len + len + 1;
680	if (tlen > sp->blen) {
681		sp->blen = tlen + 1024;
682		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
683		    NULL)
684			err(1, "realloc");
685	}
686
687	if (spflag == REPLACE)
688		sp->len = 0;
689
690	memmove(sp->space + sp->len, p, len);
691
692	sp->space[sp->len += len] = '\0';
693}
694
695/*
696 * Close all cached opened files and report any errors
697 */
698void
699cfclose(struct s_command *cp, struct s_command *end)
700{
701
702	for (; cp != end; cp = cp->next)
703		switch(cp->code) {
704		case 's':
705			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
706				err(1, "%s", cp->u.s->wfile);
707			cp->u.s->wfd = -1;
708			break;
709		case 'w':
710			if (cp->u.fd != -1 && close(cp->u.fd))
711				err(1, "%s", cp->t);
712			cp->u.fd = -1;
713			break;
714		case '{':
715			cfclose(cp->u.c, cp->next);
716			break;
717		}
718}
719