process.c revision 300555
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/usr.bin/sed/process.c 300555 2016-05-24 03:08:32Z pfg $");
36
37#ifndef lint
38static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
39#endif
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/uio.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <wchar.h>
57#include <wctype.h>
58
59#include "defs.h"
60#include "extern.h"
61
62static SPACE HS, PS, SS, YS;
63#define	pd		PS.deleted
64#define	ps		PS.space
65#define	psl		PS.len
66#define	hs		HS.space
67#define	hsl		HS.len
68
69static inline int	 applies(struct s_command *);
70static void		 do_tr(struct s_tr *);
71static void		 flush_appends(void);
72static void		 lputs(char *, size_t);
73static int		 regexec_e(regex_t *, const char *, int, int, size_t);
74static void		 regsub(SPACE *, char *, char *);
75static int		 substitute(struct s_command *);
76
77struct s_appends *appends;	/* Array of pointers to strings to append. */
78static int appendx;		/* Index into appends array. */
79int appendnum;			/* Size of appends array. */
80
81static int lastaddr;		/* Set by applies if last address of a range. */
82static int sdone;		/* If any substitutes since last line input. */
83				/* Iov structure for 'w' commands. */
84static regex_t *defpreg;
85size_t maxnsub;
86regmatch_t *match;
87
88#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
89
90void
91process(void)
92{
93	struct s_command *cp;
94	SPACE tspace;
95	size_t oldpsl = 0;
96	char *p;
97
98	p = NULL;
99
100	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101		pd = 0;
102top:
103		cp = prog;
104redirect:
105		while (cp != NULL) {
106			if (!applies(cp)) {
107				cp = cp->next;
108				continue;
109			}
110			switch (cp->code) {
111			case '{':
112				cp = cp->u.c;
113				goto redirect;
114			case 'a':
115				if (appendx >= appendnum)
116					if ((appends = realloc(appends,
117					    sizeof(struct s_appends) *
118					    (appendnum *= 2))) == NULL)
119						err(1, "realloc");
120				appends[appendx].type = AP_STRING;
121				appends[appendx].s = cp->t;
122				appends[appendx].len = strlen(cp->t);
123				appendx++;
124				break;
125			case 'b':
126				cp = cp->u.c;
127				goto redirect;
128			case 'c':
129				pd = 1;
130				psl = 0;
131				if (cp->a2 == NULL || lastaddr || lastline())
132					(void)fprintf(outfile, "%s", cp->t);
133				break;
134			case 'd':
135				pd = 1;
136				goto new;
137			case 'D':
138				if (pd)
139					goto new;
140				if (psl == 0 ||
141				    (p = memchr(ps, '\n', psl)) == NULL) {
142					pd = 1;
143					goto new;
144				} else {
145					psl -= (p + 1) - ps;
146					memmove(ps, p + 1, psl);
147					goto top;
148				}
149			case 'g':
150				cspace(&PS, hs, hsl, REPLACE);
151				break;
152			case 'G':
153				cspace(&PS, "\n", 1, APPEND);
154				cspace(&PS, hs, hsl, APPEND);
155				break;
156			case 'h':
157				cspace(&HS, ps, psl, REPLACE);
158				break;
159			case 'H':
160				cspace(&HS, "\n", 1, APPEND);
161				cspace(&HS, ps, psl, APPEND);
162				break;
163			case 'i':
164				(void)fprintf(outfile, "%s", cp->t);
165				break;
166			case 'l':
167				lputs(ps, psl);
168				break;
169			case 'n':
170				if (!nflag && !pd)
171					OUT();
172				flush_appends();
173				if (!mf_fgets(&PS, REPLACE))
174					exit(0);
175				pd = 0;
176				break;
177			case 'N':
178				flush_appends();
179				cspace(&PS, "\n", 1, APPEND);
180				if (!mf_fgets(&PS, APPEND))
181					exit(0);
182				break;
183			case 'p':
184				if (pd)
185					break;
186				OUT();
187				break;
188			case 'P':
189				if (pd)
190					break;
191				if ((p = memchr(ps, '\n', psl)) != NULL) {
192					oldpsl = psl;
193					psl = p - ps;
194				}
195				OUT();
196				if (p != NULL)
197					psl = oldpsl;
198				break;
199			case 'q':
200				if (!nflag && !pd)
201					OUT();
202				flush_appends();
203				exit(0);
204			case 'r':
205				if (appendx >= appendnum)
206					if ((appends = realloc(appends,
207					    sizeof(struct s_appends) *
208					    (appendnum *= 2))) == NULL)
209						err(1, "realloc");
210				appends[appendx].type = AP_FILE;
211				appends[appendx].s = cp->t;
212				appends[appendx].len = strlen(cp->t);
213				appendx++;
214				break;
215			case 's':
216				sdone |= substitute(cp);
217				break;
218			case 't':
219				if (sdone) {
220					sdone = 0;
221					cp = cp->u.c;
222					goto redirect;
223				}
224				break;
225			case 'w':
226				if (pd)
227					break;
228				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
229				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
230				    DEFFILEMODE)) == -1)
231					err(1, "%s", cp->t);
232				if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
233				    write(cp->u.fd, "\n", 1) != 1)
234					err(1, "%s", cp->t);
235				break;
236			case 'x':
237				/*
238				 * If the hold space is null, make it empty
239				 * but not null.  Otherwise the pattern space
240				 * will become null after the swap, which is
241				 * an abnormal condition.
242				 */
243				if (hs == NULL)
244					cspace(&HS, "", 0, REPLACE);
245				tspace = PS;
246				PS = HS;
247				HS = tspace;
248				break;
249			case 'y':
250				if (pd || psl == 0)
251					break;
252				do_tr(cp->u.y);
253				break;
254			case ':':
255			case '}':
256				break;
257			case '=':
258				(void)fprintf(outfile, "%lu\n", linenum);
259			}
260			cp = cp->next;
261		} /* for all cp */
262
263new:		if (!nflag && !pd)
264			OUT();
265		flush_appends();
266	} /* for all lines */
267}
268
269/*
270 * TRUE if the address passed matches the current program state
271 * (lastline, linenumber, ps).
272 */
273#define	MATCH(a)							\
274	((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
275	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
276
277/*
278 * Return TRUE if the command applies to the current line.  Sets the start
279 * line for process ranges.  Interprets the non-select (``!'') flag.
280 */
281static inline int
282applies(struct s_command *cp)
283{
284	int r;
285
286	lastaddr = 0;
287	if (cp->a1 == NULL && cp->a2 == NULL)
288		r = 1;
289	else if (cp->a2)
290		if (cp->startline > 0) {
291                        switch (cp->a2->type) {
292                        case AT_RELLINE:
293                                if (linenum - cp->startline <= cp->a2->u.l)
294                                        r = 1;
295                                else {
296				        cp->startline = 0;
297				        r = 0;
298                                }
299                                break;
300                        default:
301                                if (MATCH(cp->a2)) {
302                                        cp->startline = 0;
303                                        lastaddr = 1;
304                                        r = 1;
305                                } else if (cp->a2->type == AT_LINE &&
306                                            linenum > cp->a2->u.l) {
307                                        /*
308                                         * We missed the 2nd address due to a
309                                         * branch, so just close the range and
310                                         * return false.
311                                         */
312                                        cp->startline = 0;
313                                        r = 0;
314                                } else
315                                        r = 1;
316                        }
317		} else if (cp->a1 && MATCH(cp->a1)) {
318			/*
319			 * If the second address is a number less than or
320			 * equal to the line number first selected, only
321			 * one line shall be selected.
322			 *	-- POSIX 1003.2
323			 * Likewise if the relative second line address is zero.
324			 */
325			if ((cp->a2->type == AT_LINE &&
326			    linenum >= cp->a2->u.l) ||
327			    (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
328				lastaddr = 1;
329			else {
330				cp->startline = linenum;
331			}
332			r = 1;
333		} else
334			r = 0;
335	else
336		r = MATCH(cp->a1);
337	return (cp->nonsel ? ! r : r);
338}
339
340/*
341 * Reset the sed processor to its initial state.
342 */
343void
344resetstate(void)
345{
346	struct s_command *cp;
347
348	/*
349	 * Reset all in-range markers.
350	 */
351	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
352		if (cp->a2)
353			cp->startline = 0;
354
355	/*
356	 * Clear out the hold space.
357	 */
358	cspace(&HS, "", 0, REPLACE);
359}
360
361/*
362 * substitute --
363 *	Do substitutions in the pattern space.  Currently, we build a
364 *	copy of the new pattern space in the substitute space structure
365 *	and then swap them.
366 */
367static int
368substitute(struct s_command *cp)
369{
370	SPACE tspace;
371	regex_t *re;
372	regoff_t slen;
373	int lastempty, n;
374	char *s;
375
376	s = ps;
377	re = cp->u.s->re;
378	if (re == NULL) {
379		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
380			linenum = cp->u.s->linenum;
381			errx(1, "%lu: %s: \\%u not defined in the RE",
382					linenum, fname, cp->u.s->maxbref);
383		}
384	}
385	if (!regexec_e(re, s, 0, 0, psl))
386		return (0);
387
388	SS.len = 0;				/* Clean substitute space. */
389	slen = psl;
390	n = cp->u.s->n;
391	lastempty = 1;
392
393	do {
394		/* Copy the leading retained string. */
395		if (n <= 1 && match[0].rm_so)
396			cspace(&SS, s, match[0].rm_so, APPEND);
397
398		/* Skip zero-length matches right after other matches. */
399		if (lastempty || match[0].rm_so ||
400		    match[0].rm_so != match[0].rm_eo) {
401			if (n <= 1) {
402				/* Want this match: append replacement. */
403				regsub(&SS, s, cp->u.s->new);
404				if (n == 1)
405					n = -1;
406			} else {
407				/* Want a later match: append original. */
408				if (match[0].rm_eo)
409					cspace(&SS, s, match[0].rm_eo, APPEND);
410				n--;
411			}
412		}
413
414		/* Move past this match. */
415		s += match[0].rm_eo;
416		slen -= match[0].rm_eo;
417
418		/*
419		 * After a zero-length match, advance one byte,
420		 * and at the end of the line, terminate.
421		 */
422		if (match[0].rm_so == match[0].rm_eo) {
423			if (*s == '\0' || *s == '\n')
424				slen = -1;
425			else
426				slen--;
427			if (*s != '\0')
428			 	cspace(&SS, s++, 1, APPEND);
429			lastempty = 1;
430		} else
431			lastempty = 0;
432
433	} while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
434
435	/* Did not find the requested number of matches. */
436	if (n > 1)
437		return (0);
438
439	/* Copy the trailing retained string. */
440	if (slen > 0)
441		cspace(&SS, s, slen, APPEND);
442
443	/*
444	 * Swap the substitute space and the pattern space, and make sure
445	 * that any leftover pointers into stdio memory get lost.
446	 */
447	tspace = PS;
448	PS = SS;
449	SS = tspace;
450	SS.space = SS.back;
451
452	/* Handle the 'p' flag. */
453	if (cp->u.s->p)
454		OUT();
455
456	/* Handle the 'w' flag. */
457	if (cp->u.s->wfile && !pd) {
458		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
459		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
460			err(1, "%s", cp->u.s->wfile);
461		if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
462		    write(cp->u.s->wfd, "\n", 1) != 1)
463			err(1, "%s", cp->u.s->wfile);
464	}
465	return (1);
466}
467
468/*
469 * do_tr --
470 *	Perform translation ('y' command) in the pattern space.
471 */
472static void
473do_tr(struct s_tr *y)
474{
475	SPACE tmp;
476	char c, *p;
477	size_t clen, left;
478	int i;
479
480	if (MB_CUR_MAX == 1) {
481		/*
482		 * Single-byte encoding: perform in-place translation
483		 * of the pattern space.
484		 */
485		for (p = ps; p < &ps[psl]; p++)
486			*p = y->bytetab[(u_char)*p];
487	} else {
488		/*
489		 * Multi-byte encoding: perform translation into the
490		 * translation space, then swap the translation and
491		 * pattern spaces.
492		 */
493		/* Clean translation space. */
494		YS.len = 0;
495		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
496			if ((c = y->bytetab[(u_char)*p]) != '\0') {
497				cspace(&YS, &c, 1, APPEND);
498				clen = 1;
499				continue;
500			}
501			for (i = 0; i < y->nmultis; i++)
502				if (left >= y->multis[i].fromlen &&
503				    memcmp(p, y->multis[i].from,
504				    y->multis[i].fromlen) == 0)
505					break;
506			if (i < y->nmultis) {
507				cspace(&YS, y->multis[i].to,
508				    y->multis[i].tolen, APPEND);
509				clen = y->multis[i].fromlen;
510			} else {
511				cspace(&YS, p, 1, APPEND);
512				clen = 1;
513			}
514		}
515		/* Swap the translation space and the pattern space. */
516		tmp = PS;
517		PS = YS;
518		YS = tmp;
519		YS.space = YS.back;
520	}
521}
522
523/*
524 * Flush append requests.  Always called before reading a line,
525 * therefore it also resets the substitution done (sdone) flag.
526 */
527static void
528flush_appends(void)
529{
530	FILE *f;
531	int count, i;
532	char buf[8 * 1024];
533
534	for (i = 0; i < appendx; i++)
535		switch (appends[i].type) {
536		case AP_STRING:
537			fwrite(appends[i].s, sizeof(char), appends[i].len,
538			    outfile);
539			break;
540		case AP_FILE:
541			/*
542			 * Read files probably shouldn't be cached.  Since
543			 * it's not an error to read a non-existent file,
544			 * it's possible that another program is interacting
545			 * with the sed script through the filesystem.  It
546			 * would be truly bizarre, but possible.  It's probably
547			 * not that big a performance win, anyhow.
548			 */
549			if ((f = fopen(appends[i].s, "r")) == NULL)
550				break;
551			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
552				(void)fwrite(buf, sizeof(char), count, outfile);
553			(void)fclose(f);
554			break;
555		}
556	if (ferror(outfile))
557		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
558	appendx = sdone = 0;
559}
560
561static void
562lputs(char *s, size_t len)
563{
564	static const char escapes[] = "\\\a\b\f\r\t\v";
565	int c, col, width;
566	const char *p;
567	struct winsize win;
568	static int termwidth = -1;
569	size_t clen, i;
570	wchar_t wc;
571	mbstate_t mbs;
572
573	if (outfile != stdout)
574		termwidth = 60;
575	if (termwidth == -1) {
576		if ((p = getenv("COLUMNS")) && *p != '\0')
577			termwidth = atoi(p);
578		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
579		    win.ws_col > 0)
580			termwidth = win.ws_col;
581		else
582			termwidth = 60;
583	}
584	if (termwidth <= 0)
585		termwidth = 1;
586
587	memset(&mbs, 0, sizeof(mbs));
588	col = 0;
589	while (len != 0) {
590		clen = mbrtowc(&wc, s, len, &mbs);
591		if (clen == 0)
592			clen = 1;
593		if (clen == (size_t)-1 || clen == (size_t)-2) {
594			wc = (unsigned char)*s;
595			clen = 1;
596			memset(&mbs, 0, sizeof(mbs));
597		}
598		if (wc == '\n') {
599			if (col + 1 >= termwidth)
600				fprintf(outfile, "\\\n");
601			fputc('$', outfile);
602			fputc('\n', outfile);
603			col = 0;
604		} else if (iswprint(wc)) {
605			width = wcwidth(wc);
606			if (col + width >= termwidth) {
607				fprintf(outfile, "\\\n");
608				col = 0;
609			}
610			fwrite(s, 1, clen, outfile);
611			col += width;
612		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
613		    (p = strchr(escapes, c)) != NULL) {
614			if (col + 2 >= termwidth) {
615				fprintf(outfile, "\\\n");
616				col = 0;
617			}
618			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
619			col += 2;
620		} else {
621			if (col + 4 * clen >= (unsigned)termwidth) {
622				fprintf(outfile, "\\\n");
623				col = 0;
624			}
625			for (i = 0; i < clen; i++)
626				fprintf(outfile, "\\%03o",
627				    (int)(unsigned char)s[i]);
628			col += 4 * clen;
629		}
630		s += clen;
631		len -= clen;
632	}
633	if (col + 1 >= termwidth)
634		fprintf(outfile, "\\\n");
635	(void)fputc('$', outfile);
636	(void)fputc('\n', outfile);
637	if (ferror(outfile))
638		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
639}
640
641static int
642regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
643	size_t slen)
644{
645	int eval;
646
647	if (preg == NULL) {
648		if (defpreg == NULL)
649			errx(1, "first RE may not be empty");
650	} else
651		defpreg = preg;
652
653	/* Set anchors */
654	match[0].rm_so = 0;
655	match[0].rm_eo = slen;
656
657	eval = regexec(defpreg, string,
658	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
659	switch(eval) {
660	case 0:
661		return (1);
662	case REG_NOMATCH:
663		return (0);
664	}
665	errx(1, "RE error: %s", strregerror(eval, defpreg));
666	/* NOTREACHED */
667}
668
669/*
670 * regsub - perform substitutions after a regexp match
671 * Based on a routine by Henry Spencer
672 */
673static void
674regsub(SPACE *sp, char *string, char *src)
675{
676	int len, no;
677	char c, *dst;
678
679#define	NEEDSP(reqlen)							\
680	/* XXX What is the +1 for? */					\
681	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
682		sp->blen += (reqlen) + 1024;				\
683		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
684		    == NULL)						\
685			err(1, "realloc");				\
686		dst = sp->space + sp->len;				\
687	}
688
689	dst = sp->space + sp->len;
690	while ((c = *src++) != '\0') {
691		if (c == '&')
692			no = 0;
693		else if (c == '\\' && isdigit((unsigned char)*src))
694			no = *src++ - '0';
695		else
696			no = -1;
697		if (no < 0) {		/* Ordinary character. */
698			if (c == '\\' && (*src == '\\' || *src == '&'))
699				c = *src++;
700			NEEDSP(1);
701			*dst++ = c;
702			++sp->len;
703		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
704			len = match[no].rm_eo - match[no].rm_so;
705			NEEDSP(len);
706			memmove(dst, string + match[no].rm_so, len);
707			dst += len;
708			sp->len += len;
709		}
710	}
711	NEEDSP(1);
712	*dst = '\0';
713}
714
715/*
716 * cspace --
717 *	Concatenate space: append the source space to the destination space,
718 *	allocating new space as necessary.
719 */
720void
721cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
722{
723	size_t tlen;
724
725	/* Make sure SPACE has enough memory and ramp up quickly. */
726	tlen = sp->len + len + 1;
727	if (tlen > sp->blen) {
728		sp->blen = tlen + 1024;
729		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
730		    NULL)
731			err(1, "realloc");
732	}
733
734	if (spflag == REPLACE)
735		sp->len = 0;
736
737	memmove(sp->space + sp->len, p, len);
738
739	sp->space[sp->len += len] = '\0';
740}
741
742/*
743 * Close all cached opened files and report any errors
744 */
745void
746cfclose(struct s_command *cp, struct s_command *end)
747{
748
749	for (; cp != end; cp = cp->next)
750		switch(cp->code) {
751		case 's':
752			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
753				err(1, "%s", cp->u.s->wfile);
754			cp->u.s->wfd = -1;
755			break;
756		case 'w':
757			if (cp->u.fd != -1 && close(cp->u.fd))
758				err(1, "%s", cp->t);
759			cp->u.fd = -1;
760			break;
761		case '{':
762			cfclose(cp->u.c, cp->next);
763			break;
764		}
765}
766