119304Speter/*- 219304Speter * Copyright (c) 1992, 1993, 1994 319304Speter * The Regents of the University of California. All rights reserved. 419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996 519304Speter * Keith Bostic. All rights reserved. 619304Speter * 719304Speter * See the LICENSE file for redistribution information. 819304Speter */ 919304Speter 1019304Speter#include "config.h" 1119304Speter 1219304Speter#ifndef lint 13254225Speterstatic const char sccsid[] = "$Id: ex_subst.c,v 10.53 2011/12/21 20:40:35 zy Exp $"; 1419304Speter#endif /* not lint */ 1519304Speter 1619304Speter#include <sys/types.h> 1719304Speter#include <sys/queue.h> 1819304Speter#include <sys/time.h> 1919304Speter 2019304Speter#include <bitstring.h> 2119304Speter#include <ctype.h> 2219304Speter#include <errno.h> 2319304Speter#include <limits.h> 2419304Speter#include <stdio.h> 2519304Speter#include <stdlib.h> 2619304Speter#include <string.h> 2719304Speter#include <unistd.h> 2819304Speter 2919304Speter#include "../common/common.h" 3019304Speter#include "../vi/vi.h" 3119304Speter 3219304Speter#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 3319304Speter#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 3419304Speter 35254225Speterstatic int re_conv __P((SCR *, CHAR_T **, size_t *, int *)); 36254225Speterstatic int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *)); 3719304Speterstatic int re_sub __P((SCR *, 38254225Speter CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10])); 39254225Speterstatic int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *)); 40254225Speterstatic int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int)); 4119304Speter 4219304Speter/* 4319304Speter * ex_s -- 4419304Speter * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 4519304Speter * 4619304Speter * Substitute on lines matching a pattern. 4719304Speter * 4819304Speter * PUBLIC: int ex_s __P((SCR *, EXCMD *)); 4919304Speter */ 5019304Speterint 51254225Speterex_s(SCR *sp, EXCMD *cmdp) 5219304Speter{ 5319304Speter regex_t *re; 5419304Speter size_t blen, len; 5519304Speter u_int flags; 5619304Speter int delim; 57254225Speter CHAR_T *bp, *p, *ptrn, *rep, *t; 5819304Speter 5919304Speter /* 6019304Speter * Skip leading white space. 6119304Speter * 6219304Speter * !!! 6319304Speter * Historic vi allowed any non-alphanumeric to serve as the 6419304Speter * substitution command delimiter. 6519304Speter * 6619304Speter * !!! 6719304Speter * If the arguments are empty, it's the same as &, i.e. we 6819304Speter * repeat the last substitution. 6919304Speter */ 7019304Speter if (cmdp->argc == 0) 7119304Speter goto subagain; 7219304Speter for (p = cmdp->argv[0]->bp, 7319304Speter len = cmdp->argv[0]->len; len > 0; --len, ++p) { 74254225Speter if (!cmdskip(*p)) 7519304Speter break; 7619304Speter } 7719304Speter if (len == 0) 7819304Spetersubagain: return (ex_subagain(sp, cmdp)); 7919304Speter 8019304Speter delim = *p++; 81254225Speter if (!isascii(delim) || isalnum(delim) || delim == '\\') 8219304Speter return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 8319304Speter 8419304Speter /* 8519304Speter * !!! 8619304Speter * The full-blown substitute command reset the remembered 8719304Speter * state of the 'c' and 'g' suffices. 8819304Speter */ 8919304Speter sp->c_suffix = sp->g_suffix = 0; 9019304Speter 9119304Speter /* 9219304Speter * Get the pattern string, toss escaping characters. 9319304Speter * 9419304Speter * !!! 9519304Speter * Historic vi accepted any of the following forms: 9619304Speter * 9719304Speter * :s/abc/def/ change "abc" to "def" 9819304Speter * :s/abc/def change "abc" to "def" 9919304Speter * :s/abc/ delete "abc" 10019304Speter * :s/abc delete "abc" 10119304Speter * 10219304Speter * QUOTING NOTE: 10319304Speter * 10419304Speter * Only toss an escaping character if it escapes a delimiter. 10519304Speter * This means that "s/A/\\\\f" replaces "A" with "\\f". It 10619304Speter * would be nice to be more regular, i.e. for each layer of 10719304Speter * escaping a single escaping character is removed, but that's 10819304Speter * not how the historic vi worked. 10919304Speter */ 11019304Speter for (ptrn = t = p;;) { 11119304Speter if (p[0] == '\0' || p[0] == delim) { 11219304Speter if (p[0] == delim) 11319304Speter ++p; 11419304Speter /* 11519304Speter * !!! 11619304Speter * Nul terminate the pattern string -- it's passed 11719304Speter * to regcomp which doesn't understand anything else. 11819304Speter */ 11919304Speter *t = '\0'; 12019304Speter break; 12119304Speter } 12219304Speter if (p[0] == '\\') 12319304Speter if (p[1] == delim) 12419304Speter ++p; 12519304Speter else if (p[1] == '\\') 12619304Speter *t++ = *p++; 12719304Speter *t++ = *p++; 12819304Speter } 12919304Speter 13019304Speter /* 13119304Speter * If the pattern string is empty, use the last RE (not just the 13219304Speter * last substitution RE). 13319304Speter */ 13419304Speter if (*ptrn == '\0') { 13519304Speter if (sp->re == NULL) { 13619304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 13719304Speter return (1); 13819304Speter } 13919304Speter 14019304Speter /* Re-compile the RE if necessary. */ 141254225Speter if (!F_ISSET(sp, SC_RE_SEARCH) && 142254225Speter re_compile(sp, sp->re, sp->re_len, 143254225Speter NULL, NULL, &sp->re_c, RE_C_SEARCH)) 14419304Speter return (1); 14519304Speter flags = 0; 14619304Speter } else { 14719304Speter /* 14819304Speter * !!! 14919304Speter * Compile the RE. Historic practice is that substitutes set 15019304Speter * the search direction as well as both substitute and search 15119304Speter * RE's. We compile the RE twice, as we don't want to bother 15219304Speter * ref counting the pattern string and (opaque) structure. 15319304Speter */ 154254225Speter if (re_compile(sp, ptrn, t - ptrn, &sp->re, 155254225Speter &sp->re_len, &sp->re_c, RE_C_SEARCH)) 15619304Speter return (1); 157254225Speter if (re_compile(sp, ptrn, t - ptrn, &sp->subre, 158254225Speter &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 15919304Speter return (1); 16019304Speter 16119304Speter flags = SUB_FIRST; 16219304Speter sp->searchdir = FORWARD; 16319304Speter } 16419304Speter re = &sp->re_c; 16519304Speter 16619304Speter /* 16719304Speter * Get the replacement string. 16819304Speter * 16919304Speter * The special character & (\& if O_MAGIC not set) matches the 17019304Speter * entire RE. No handling of & is required here, it's done by 17119304Speter * re_sub(). 17219304Speter * 17319304Speter * The special character ~ (\~ if O_MAGIC not set) inserts the 17419304Speter * previous replacement string into this replacement string. 17519304Speter * Count ~'s to figure out how much space we need. We could 17619304Speter * special case nonexistent last patterns or whether or not 17719304Speter * O_MAGIC is set, but it's probably not worth the effort. 17819304Speter * 17919304Speter * QUOTING NOTE: 18019304Speter * 18119304Speter * Only toss an escaping character if it escapes a delimiter or 18219304Speter * if O_MAGIC is set and it escapes a tilde. 18319304Speter * 18419304Speter * !!! 18519304Speter * If the entire replacement pattern is "%", then use the last 18619304Speter * replacement pattern. This semantic was added to vi in System 18719304Speter * V and then percolated elsewhere, presumably around the time 18819304Speter * that it was added to their version of ed(1). 18919304Speter */ 19019304Speter if (p[0] == '\0' || p[0] == delim) { 19119304Speter if (p[0] == delim) 19219304Speter ++p; 19319304Speter if (sp->repl != NULL) 19419304Speter free(sp->repl); 19519304Speter sp->repl = NULL; 19619304Speter sp->repl_len = 0; 19719304Speter } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 19819304Speter p += p[1] == delim ? 2 : 1; 19919304Speter else { 20019304Speter for (rep = p, len = 0; 20119304Speter p[0] != '\0' && p[0] != delim; ++p, ++len) 20219304Speter if (p[0] == '~') 20319304Speter len += sp->repl_len; 204254225Speter GET_SPACE_RETW(sp, bp, blen, len); 20519304Speter for (t = bp, len = 0, p = rep;;) { 20619304Speter if (p[0] == '\0' || p[0] == delim) { 20719304Speter if (p[0] == delim) 20819304Speter ++p; 20919304Speter break; 21019304Speter } 21119304Speter if (p[0] == '\\') { 21219304Speter if (p[1] == delim) 21319304Speter ++p; 21419304Speter else if (p[1] == '\\') { 21519304Speter *t++ = *p++; 21619304Speter ++len; 21719304Speter } else if (p[1] == '~') { 21819304Speter ++p; 21919304Speter if (!O_ISSET(sp, O_MAGIC)) 22019304Speter goto tilde; 22119304Speter } 22219304Speter } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 22319304Spetertilde: ++p; 224254225Speter MEMCPY(t, sp->repl, sp->repl_len); 22519304Speter t += sp->repl_len; 22619304Speter len += sp->repl_len; 22719304Speter continue; 22819304Speter } 22919304Speter *t++ = *p++; 23019304Speter ++len; 23119304Speter } 23219304Speter if ((sp->repl_len = len) != 0) { 23319304Speter if (sp->repl != NULL) 23419304Speter free(sp->repl); 235254225Speter MALLOC(sp, sp->repl, CHAR_T *, len * sizeof(CHAR_T)); 236254225Speter if (sp->repl == NULL) { 237254225Speter FREE_SPACEW(sp, bp, blen); 23819304Speter return (1); 23919304Speter } 240254225Speter MEMCPY(sp->repl, bp, len); 24119304Speter } 242254225Speter FREE_SPACEW(sp, bp, blen); 24319304Speter } 24419304Speter return (s(sp, cmdp, p, re, flags)); 24519304Speter} 24619304Speter 24719304Speter/* 24819304Speter * ex_subagain -- 24919304Speter * [line [,line]] & [cgr] [count] [#lp]] 25019304Speter * 25119304Speter * Substitute using the last substitute RE and replacement pattern. 25219304Speter * 25319304Speter * PUBLIC: int ex_subagain __P((SCR *, EXCMD *)); 25419304Speter */ 25519304Speterint 256254225Speterex_subagain(SCR *sp, EXCMD *cmdp) 25719304Speter{ 25819304Speter if (sp->subre == NULL) { 25919304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 26019304Speter return (1); 26119304Speter } 262254225Speter if (!F_ISSET(sp, SC_RE_SUBST) && 263254225Speter re_compile(sp, sp->subre, sp->subre_len, 264254225Speter NULL, NULL, &sp->subre_c, RE_C_SUBST)) 26519304Speter return (1); 26619304Speter return (s(sp, 26719304Speter cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 26819304Speter} 26919304Speter 27019304Speter/* 27119304Speter * ex_subtilde -- 27219304Speter * [line [,line]] ~ [cgr] [count] [#lp]] 27319304Speter * 27419304Speter * Substitute using the last RE and last substitute replacement pattern. 27519304Speter * 27619304Speter * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *)); 27719304Speter */ 27819304Speterint 279254225Speterex_subtilde(SCR *sp, EXCMD *cmdp) 28019304Speter{ 28119304Speter if (sp->re == NULL) { 28219304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 28319304Speter return (1); 28419304Speter } 285254225Speter if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re, 286254225Speter sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 28719304Speter return (1); 28819304Speter return (s(sp, 28919304Speter cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 29019304Speter} 29119304Speter 29219304Speter/* 29319304Speter * s -- 29419304Speter * Do the substitution. This stuff is *really* tricky. There are lots of 29519304Speter * special cases, and general nastiness. Don't mess with it unless you're 29619304Speter * pretty confident. 29719304Speter * 29819304Speter * The nasty part of the substitution is what happens when the replacement 29919304Speter * string contains newlines. It's a bit tricky -- consider the information 30019304Speter * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 30119304Speter * to build a set of newline offsets which we use to break the line up later, 30219304Speter * when the replacement is done. Don't change it unless you're *damned* 30319304Speter * confident. 30419304Speter */ 30519304Speter#define NEEDNEWLINE(sp) { \ 30619304Speter if (sp->newl_len == sp->newl_cnt) { \ 30719304Speter sp->newl_len += 25; \ 30819304Speter REALLOC(sp, sp->newl, size_t *, \ 30919304Speter sp->newl_len * sizeof(size_t)); \ 31019304Speter if (sp->newl == NULL) { \ 31119304Speter sp->newl_len = 0; \ 31219304Speter return (1); \ 31319304Speter } \ 31419304Speter } \ 31519304Speter} 31619304Speter 31719304Speter#define BUILD(sp, l, len) { \ 31819304Speter if (lbclen + (len) > lblen) { \ 319254225Speter lblen = p2roundup(MAX(lbclen + (len), 256)); \ 320254225Speter REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 32119304Speter if (lb == NULL) { \ 32219304Speter lbclen = 0; \ 32319304Speter return (1); \ 32419304Speter } \ 32519304Speter } \ 326254225Speter MEMCPY(lb + lbclen, l, len); \ 32719304Speter lbclen += len; \ 32819304Speter} 32919304Speter 33019304Speter#define NEEDSP(sp, len, pnt) { \ 33119304Speter if (lbclen + (len) > lblen) { \ 332254225Speter lblen = p2roundup(MAX(lbclen + (len), 256)); \ 333254225Speter REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \ 33419304Speter if (lb == NULL) { \ 33519304Speter lbclen = 0; \ 33619304Speter return (1); \ 33719304Speter } \ 33819304Speter pnt = lb + lbclen; \ 33919304Speter } \ 34019304Speter} 34119304Speter 34219304Speterstatic int 343254225Speters(SCR *sp, EXCMD *cmdp, CHAR_T *s, regex_t *re, u_int flags) 34419304Speter{ 34519304Speter EVENT ev; 34619304Speter MARK from, to; 347254225Speter TEXTH tiq[] = {{ 0 }}; 34819304Speter recno_t elno, lno, slno; 349254225Speter u_long ul; 35019304Speter regmatch_t match[10]; 35119304Speter size_t blen, cnt, last, lbclen, lblen, len, llen; 35219304Speter size_t offset, saved_offset, scno; 35319304Speter int cflag, lflag, nflag, pflag, rflag; 35419304Speter int didsub, do_eol_match, eflags, empty_ok, eval; 35519304Speter int linechanged, matched, quit, rval; 356254225Speter CHAR_T *bp, *lb; 357254225Speter enum nresult nret; 35819304Speter 35919304Speter NEEDFILE(sp, cmdp); 36019304Speter 36119304Speter slno = sp->lno; 36219304Speter scno = sp->cno; 36319304Speter 36419304Speter /* 36519304Speter * !!! 36619304Speter * Historically, the 'g' and 'c' suffices were always toggled as flags, 36719304Speter * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 36819304Speter * not set, they were initialized to 0 for all substitute commands. If 36919304Speter * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 37019304Speter * specified substitute/replacement patterns (see ex_s()). 37119304Speter */ 37219304Speter if (!O_ISSET(sp, O_EDCOMPATIBLE)) 37319304Speter sp->c_suffix = sp->g_suffix = 0; 37419304Speter 37519304Speter /* 37619304Speter * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 37719304Speter * it only displayed the last change. I'd disallow them, but they are 37819304Speter * useful in combination with the [v]global commands. In the current 37919304Speter * model the problem is combining them with the 'c' flag -- the screen 38019304Speter * would have to flip back and forth between the confirm screen and the 38119304Speter * ex print screen, which would be pretty awful. We do display all 38219304Speter * changes, though, for what that's worth. 38319304Speter * 38419304Speter * !!! 38519304Speter * Historic vi was fairly strict about the order of "options", the 38619304Speter * count, and "flags". I'm somewhat fuzzy on the difference between 38719304Speter * options and flags, anyway, so this is a simpler approach, and we 38819304Speter * just take it them in whatever order the user gives them. (The ex 38919304Speter * usage statement doesn't reflect this.) 39019304Speter */ 39119304Speter cflag = lflag = nflag = pflag = rflag = 0; 39219304Speter if (s == NULL) 39319304Speter goto noargs; 39419304Speter for (lno = OOBLNO; *s != '\0'; ++s) 39519304Speter switch (*s) { 39619304Speter case ' ': 39719304Speter case '\t': 39819304Speter continue; 39919304Speter case '+': 40019304Speter ++cmdp->flagoff; 40119304Speter break; 40219304Speter case '-': 40319304Speter --cmdp->flagoff; 40419304Speter break; 40519304Speter case '0': case '1': case '2': case '3': case '4': 40619304Speter case '5': case '6': case '7': case '8': case '9': 40719304Speter if (lno != OOBLNO) 40819304Speter goto usage; 40919304Speter errno = 0; 410254225Speter nret = nget_uslong(&ul, s, &s, 10); 411254225Speter lno = ul; 41219304Speter if (*s == '\0') /* Loop increment correction. */ 41319304Speter --s; 414254225Speter if (nret != NUM_OK) { 415254225Speter if (nret == NUM_OVER) 41619304Speter msgq(sp, M_ERR, "153|Count overflow"); 417254225Speter else if (nret == NUM_UNDER) 41819304Speter msgq(sp, M_ERR, "154|Count underflow"); 41919304Speter else 42019304Speter msgq(sp, M_SYSERR, NULL); 42119304Speter return (1); 42219304Speter } 42319304Speter /* 42419304Speter * In historic vi, the count was inclusive from the 42519304Speter * second address. 42619304Speter */ 42719304Speter cmdp->addr1.lno = cmdp->addr2.lno; 42819304Speter cmdp->addr2.lno += lno - 1; 42919304Speter if (!db_exist(sp, cmdp->addr2.lno) && 43019304Speter db_last(sp, &cmdp->addr2.lno)) 43119304Speter return (1); 43219304Speter break; 43319304Speter case '#': 43419304Speter nflag = 1; 43519304Speter break; 43619304Speter case 'c': 43719304Speter sp->c_suffix = !sp->c_suffix; 43819304Speter 43919304Speter /* Ex text structure initialization. */ 440254225Speter if (F_ISSET(sp, SC_EX)) 441254225Speter TAILQ_INIT(tiq); 44219304Speter break; 44319304Speter case 'g': 44419304Speter sp->g_suffix = !sp->g_suffix; 44519304Speter break; 44619304Speter case 'l': 44719304Speter lflag = 1; 44819304Speter break; 44919304Speter case 'p': 45019304Speter pflag = 1; 45119304Speter break; 45219304Speter case 'r': 45319304Speter if (LF_ISSET(SUB_FIRST)) { 45419304Speter msgq(sp, M_ERR, 45519304Speter "155|Regular expression specified; r flag meaningless"); 45619304Speter return (1); 45719304Speter } 45819304Speter if (!F_ISSET(sp, SC_RE_SEARCH)) { 45919304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 46019304Speter return (1); 46119304Speter } 46219304Speter rflag = 1; 46319304Speter re = &sp->re_c; 46419304Speter break; 46519304Speter default: 46619304Speter goto usage; 46719304Speter } 46819304Speter 469254225Speter if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { 47019304Speterusage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 47119304Speter return (1); 47219304Speter } 47319304Speter 47419304Speternoargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 47519304Speter msgq(sp, M_ERR, 47619304Speter"156|The #, l and p flags may not be combined with the c flag in vi mode"); 47719304Speter return (1); 47819304Speter } 47919304Speter 48019304Speter /* 48119304Speter * bp: if interactive, line cache 48219304Speter * blen: if interactive, line cache length 48319304Speter * lb: build buffer pointer. 48419304Speter * lbclen: current length of built buffer. 48519304Speter * lblen; length of build buffer. 48619304Speter */ 48719304Speter bp = lb = NULL; 48819304Speter blen = lbclen = lblen = 0; 48919304Speter 49019304Speter /* For each line... */ 491254225Speter lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno; 492254225Speter for (matched = quit = 0, 49319304Speter elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 49419304Speter 49519304Speter /* Someone's unhappy, time to stop. */ 49619304Speter if (INTERRUPTED(sp)) 49719304Speter break; 49819304Speter 49919304Speter /* Get the line. */ 50019304Speter if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 50119304Speter goto err; 50219304Speter 50319304Speter /* 50419304Speter * Make a local copy if doing confirmation -- when calling 50519304Speter * the confirm routine we're likely to lose the cached copy. 50619304Speter */ 50719304Speter if (sp->c_suffix) { 50819304Speter if (bp == NULL) { 509254225Speter GET_SPACE_RETW(sp, bp, blen, llen); 51019304Speter } else 511254225Speter ADD_SPACE_RETW(sp, bp, blen, llen); 512254225Speter MEMCPY(bp, s, llen); 51319304Speter s = bp; 51419304Speter } 51519304Speter 51619304Speter /* Start searching from the beginning. */ 51719304Speter offset = 0; 51819304Speter len = llen; 51919304Speter 52019304Speter /* Reset the build buffer offset. */ 52119304Speter lbclen = 0; 52219304Speter 52319304Speter /* Reset empty match flag. */ 52419304Speter empty_ok = 1; 52519304Speter 52619304Speter /* 52719304Speter * We don't want to have to do a setline if the line didn't 52819304Speter * change -- keep track of whether or not this line changed. 52919304Speter * If doing confirmations, don't want to keep setting the 53019304Speter * line if change is refused -- keep track of substitutions. 53119304Speter */ 53219304Speter didsub = linechanged = 0; 53319304Speter 53419304Speter /* New line, do an EOL match. */ 53519304Speter do_eol_match = 1; 53619304Speter 53719304Speter /* It's not nul terminated, but we pretend it is. */ 53819304Speter eflags = REG_STARTEND; 53919304Speter 54019304Speter /* 54119304Speter * The search area is from s + offset to the EOL. 54219304Speter * 54319304Speter * Generally, match[0].rm_so is the offset of the start 54419304Speter * of the match from the start of the search, and offset 54519304Speter * is the offset of the start of the last search. 54619304Speter */ 54719304Speternextmatch: match[0].rm_so = 0; 54819304Speter match[0].rm_eo = len; 54919304Speter 55019304Speter /* Get the next match. */ 551254225Speter eval = regexec(re, s + offset, 10, match, eflags); 55219304Speter 55319304Speter /* 55419304Speter * There wasn't a match or if there was an error, deal with 55519304Speter * it. If there was a previous match in this line, resolve 55619304Speter * the changes into the database. Otherwise, just move on. 55719304Speter */ 55819304Speter if (eval == REG_NOMATCH) 55919304Speter goto endmatch; 56019304Speter if (eval != 0) { 56119304Speter re_error(sp, eval, re); 56219304Speter goto err; 56319304Speter } 56419304Speter matched = 1; 56519304Speter 56619304Speter /* Only the first search can match an anchored expression. */ 56719304Speter eflags |= REG_NOTBOL; 56819304Speter 56919304Speter /* 57019304Speter * !!! 57119304Speter * It's possible to match 0-length strings -- for example, the 57219304Speter * command s;a*;X;, when matched against the string "aabb" will 57319304Speter * result in "XbXbX", i.e. the matches are "aa", the space 57419304Speter * between the b's and the space between the b's and the end of 57519304Speter * the string. There is a similar space between the beginning 57619304Speter * of the string and the a's. The rule that we use (because vi 57719304Speter * historically used it) is that any 0-length match, occurring 57819304Speter * immediately after a match, is ignored. Otherwise, the above 57919304Speter * example would have resulted in "XXbXbX". Another example is 58019304Speter * incorrectly using " *" to replace groups of spaces with one 58119304Speter * space. 58219304Speter * 58319304Speter * The way we do this is that if we just had a successful match, 58419304Speter * the starting offset does not skip characters, and the match 58519304Speter * is empty, ignore the match and move forward. If there's no 58619304Speter * more characters in the string, we were attempting to match 58719304Speter * after the last character, so quit. 58819304Speter */ 58919304Speter if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 59019304Speter empty_ok = 1; 59119304Speter if (len == 0) 59219304Speter goto endmatch; 59319304Speter BUILD(sp, s + offset, 1) 59419304Speter ++offset; 59519304Speter --len; 59619304Speter goto nextmatch; 59719304Speter } 59819304Speter 59919304Speter /* Confirm change. */ 60019304Speter if (sp->c_suffix) { 60119304Speter /* 60219304Speter * Set the cursor position for confirmation. Note, 60319304Speter * if we matched on a '$', the cursor may be past 60419304Speter * the end of line. 60519304Speter */ 60619304Speter from.lno = to.lno = lno; 60719304Speter from.cno = match[0].rm_so + offset; 60819304Speter to.cno = match[0].rm_eo + offset; 60919304Speter /* 61019304Speter * Both ex and vi have to correct for a change before 61119304Speter * the first character in the line. 61219304Speter */ 61319304Speter if (llen == 0) 61419304Speter from.cno = to.cno = 0; 61519304Speter if (F_ISSET(sp, SC_VI)) { 61619304Speter /* 61719304Speter * Only vi has to correct for a change after 61819304Speter * the last character in the line. 61919304Speter * 62019304Speter * XXX 62119304Speter * It would be nice to change the vi code so 62219304Speter * that we could display a cursor past EOL. 62319304Speter */ 62419304Speter if (to.cno >= llen) 62519304Speter to.cno = llen - 1; 62619304Speter if (from.cno >= llen) 62719304Speter from.cno = llen - 1; 62819304Speter 62919304Speter sp->lno = from.lno; 63019304Speter sp->cno = from.cno; 63119304Speter if (vs_refresh(sp, 1)) 63219304Speter goto err; 63319304Speter 63419304Speter vs_update(sp, msg_cat(sp, 63519304Speter "169|Confirm change? [n]", NULL), NULL); 63619304Speter 63719304Speter if (v_event_get(sp, &ev, 0, 0)) 63819304Speter goto err; 63919304Speter switch (ev.e_event) { 64019304Speter case E_CHARACTER: 64119304Speter break; 64219304Speter case E_EOF: 64319304Speter case E_ERR: 64419304Speter case E_INTERRUPT: 64519304Speter goto lquit; 64619304Speter default: 64719304Speter v_event_err(sp, &ev); 64819304Speter goto lquit; 64919304Speter } 65019304Speter } else { 65119304Speter if (ex_print(sp, cmdp, &from, &to, 0) || 65219304Speter ex_scprint(sp, &from, &to)) 65319304Speter goto lquit; 654254225Speter if (ex_txt(sp, tiq, 0, TXT_CR)) 65519304Speter goto err; 656254225Speter ev.e_c = TAILQ_FIRST(tiq)->lb[0]; 65719304Speter } 65819304Speter 65919304Speter switch (ev.e_c) { 66019304Speter case CH_YES: 66119304Speter break; 66219304Speter default: 66319304Speter case CH_NO: 66419304Speter didsub = 0; 66519304Speter BUILD(sp, s +offset, match[0].rm_eo); 66619304Speter goto skip; 66719304Speter case CH_QUIT: 66819304Speter /* Set the quit/interrupted flags. */ 66919304Speterlquit: quit = 1; 67019304Speter F_SET(sp->gp, G_INTERRUPTED); 67119304Speter 67219304Speter /* 67319304Speter * Resolve any changes, then return to (and 67419304Speter * exit from) the main loop. 67519304Speter */ 67619304Speter goto endmatch; 67719304Speter } 67819304Speter } 67919304Speter 68019304Speter /* 68119304Speter * Set the cursor to the last position changed, converting 68219304Speter * from 1-based to 0-based. 68319304Speter */ 68419304Speter sp->lno = lno; 68519304Speter sp->cno = match[0].rm_so; 68619304Speter 68719304Speter /* Copy the bytes before the match into the build buffer. */ 68819304Speter BUILD(sp, s + offset, match[0].rm_so); 68919304Speter 69019304Speter /* Substitute the matching bytes. */ 69119304Speter didsub = 1; 69219304Speter if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 69319304Speter goto err; 69419304Speter 69519304Speter /* Set the change flag so we know this line was modified. */ 69619304Speter linechanged = 1; 69719304Speter 69819304Speter /* Move past the matched bytes. */ 69919304Speterskip: offset += match[0].rm_eo; 70019304Speter len -= match[0].rm_eo; 70119304Speter 70219304Speter /* A match cannot be followed by an empty pattern. */ 70319304Speter empty_ok = 0; 70419304Speter 70519304Speter /* 70619304Speter * If doing a global change with confirmation, we have to 70719304Speter * update the screen. The basic idea is to store the line 70819304Speter * so the screen update routines can find it, and restart. 70919304Speter */ 71019304Speter if (didsub && sp->c_suffix && sp->g_suffix) { 71119304Speter /* 71219304Speter * The new search offset will be the end of the 71319304Speter * modified line. 71419304Speter */ 71519304Speter saved_offset = lbclen; 71619304Speter 71719304Speter /* Copy the rest of the line. */ 71819304Speter if (len) 71919304Speter BUILD(sp, s + offset, len) 72019304Speter 72119304Speter /* Set the new offset. */ 72219304Speter offset = saved_offset; 72319304Speter 72419304Speter /* Store inserted lines, adjusting the build buffer. */ 72519304Speter last = 0; 72619304Speter if (sp->newl_cnt) { 72719304Speter for (cnt = 0; 72819304Speter cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 72919304Speter if (db_insert(sp, lno, 73019304Speter lb + last, sp->newl[cnt] - last)) 73119304Speter goto err; 73219304Speter last = sp->newl[cnt] + 1; 73319304Speter ++sp->rptlines[L_ADDED]; 73419304Speter } 73519304Speter lbclen -= last; 73619304Speter offset -= last; 73719304Speter sp->newl_cnt = 0; 73819304Speter } 73919304Speter 74019304Speter /* Store and retrieve the line. */ 74119304Speter if (db_set(sp, lno, lb + last, lbclen)) 74219304Speter goto err; 74319304Speter if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 74419304Speter goto err; 745254225Speter ADD_SPACE_RETW(sp, bp, blen, llen) 746254225Speter MEMCPY(bp, s, llen); 74719304Speter s = bp; 74819304Speter len = llen - offset; 74919304Speter 75019304Speter /* Restart the build. */ 75119304Speter lbclen = 0; 75219304Speter BUILD(sp, s, offset); 75319304Speter 75419304Speter /* 75519304Speter * If we haven't already done the after-the-string 75619304Speter * match, do one. Set REG_NOTEOL so the '$' pattern 75719304Speter * only matches once. 75819304Speter */ 75919304Speter if (!do_eol_match) 76019304Speter goto endmatch; 76119304Speter if (offset == len) { 76219304Speter do_eol_match = 0; 76319304Speter eflags |= REG_NOTEOL; 76419304Speter } 76519304Speter goto nextmatch; 76619304Speter } 76719304Speter 76819304Speter /* 76919304Speter * If it's a global: 77019304Speter * 77119304Speter * If at the end of the string, do a test for the after 77219304Speter * the string match. Set REG_NOTEOL so the '$' pattern 77319304Speter * only matches once. 77419304Speter */ 77519304Speter if (sp->g_suffix && do_eol_match) { 77619304Speter if (len == 0) { 77719304Speter do_eol_match = 0; 77819304Speter eflags |= REG_NOTEOL; 77919304Speter } 78019304Speter goto nextmatch; 78119304Speter } 78219304Speter 78319304Speterendmatch: if (!linechanged) 78419304Speter continue; 78519304Speter 78619304Speter /* Copy any remaining bytes into the build buffer. */ 78719304Speter if (len) 78819304Speter BUILD(sp, s + offset, len) 78919304Speter 79019304Speter /* Store inserted lines, adjusting the build buffer. */ 79119304Speter last = 0; 79219304Speter if (sp->newl_cnt) { 79319304Speter for (cnt = 0; 79419304Speter cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 79519304Speter if (db_insert(sp, 79619304Speter lno, lb + last, sp->newl[cnt] - last)) 79719304Speter goto err; 79819304Speter last = sp->newl[cnt] + 1; 79919304Speter ++sp->rptlines[L_ADDED]; 80019304Speter } 80119304Speter lbclen -= last; 80219304Speter sp->newl_cnt = 0; 80319304Speter } 80419304Speter 80519304Speter /* Store the changed line. */ 80619304Speter if (db_set(sp, lno, lb + last, lbclen)) 80719304Speter goto err; 80819304Speter 80919304Speter /* Update changed line counter. */ 81019304Speter if (sp->rptlchange != lno) { 81119304Speter sp->rptlchange = lno; 81219304Speter ++sp->rptlines[L_CHANGED]; 81319304Speter } 81419304Speter 81519304Speter /* 81619304Speter * !!! 81719304Speter * Display as necessary. Historic practice is to only 81819304Speter * display the last line of a line split into multiple 81919304Speter * lines. 82019304Speter */ 82119304Speter if (lflag || nflag || pflag) { 82219304Speter from.lno = to.lno = lno; 82319304Speter from.cno = to.cno = 0; 82419304Speter if (lflag) 82519304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 82619304Speter if (nflag) 82719304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 82819304Speter if (pflag) 82919304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 83019304Speter } 83119304Speter } 83219304Speter 83319304Speter /* 83419304Speter * !!! 83519304Speter * Historically, vi attempted to leave the cursor at the same place if 83619304Speter * the substitution was done at the current cursor position. Otherwise 83719304Speter * it moved it to the first non-blank of the last line changed. There 83819304Speter * were some problems: for example, :s/$/foo/ with the cursor on the 83919304Speter * last character of the line left the cursor on the last character, or 84019304Speter * the & command with multiple occurrences of the matching string in the 84119304Speter * line usually left the cursor in a fairly random position. 84219304Speter * 84319304Speter * We try to do the same thing, with the exception that if the user is 84419304Speter * doing substitution with confirmation, we move to the last line about 84519304Speter * which the user was consulted, as opposed to the last line that they 84619304Speter * actually changed. This prevents a screen flash if the user doesn't 84719304Speter * change many of the possible lines. 84819304Speter */ 84919304Speter if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 85019304Speter sp->cno = 0; 85119304Speter (void)nonblank(sp, sp->lno, &sp->cno); 85219304Speter } 85319304Speter 85419304Speter /* 85519304Speter * If not in a global command, and nothing matched, say so. 85619304Speter * Else, if none of the lines displayed, put something up. 85719304Speter */ 85819304Speter rval = 0; 85919304Speter if (!matched) { 86019304Speter if (!F_ISSET(sp, SC_EX_GLOBAL)) { 86119304Speter msgq(sp, M_ERR, "157|No match found"); 86219304Speter goto err; 86319304Speter } 86419304Speter } else if (!lflag && !nflag && !pflag) 86519304Speter F_SET(cmdp, E_AUTOPRINT); 86619304Speter 86719304Speter if (0) { 86819304Spetererr: rval = 1; 86919304Speter } 87019304Speter 87119304Speter if (bp != NULL) 872254225Speter FREE_SPACEW(sp, bp, blen); 87319304Speter if (lb != NULL) 87419304Speter free(lb); 87519304Speter return (rval); 87619304Speter} 87719304Speter 87819304Speter/* 87919304Speter * re_compile -- 88019304Speter * Compile the RE. 88119304Speter * 88219304Speter * PUBLIC: int re_compile __P((SCR *, 883254225Speter * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int)); 88419304Speter */ 88519304Speterint 886254225Speterre_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags) 88719304Speter{ 88819304Speter size_t len; 88919304Speter int reflags, replaced, rval; 890254225Speter CHAR_T *p; 89119304Speter 89219304Speter /* Set RE flags. */ 89319304Speter reflags = 0; 89419304Speter if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 89519304Speter if (O_ISSET(sp, O_EXTENDED)) 89619304Speter reflags |= REG_EXTENDED; 89719304Speter if (O_ISSET(sp, O_IGNORECASE)) 89819304Speter reflags |= REG_ICASE; 89919304Speter if (O_ISSET(sp, O_ICLOWER)) { 90019304Speter for (p = ptrn, len = plen; len > 0; ++p, --len) 901254225Speter if (ISUPPER(*p)) 90219304Speter break; 90319304Speter if (len == 0) 90419304Speter reflags |= REG_ICASE; 90519304Speter } 90619304Speter } 90719304Speter 90819304Speter /* If we're replacing a saved value, clear the old one. */ 90919304Speter if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 91019304Speter regfree(&sp->re_c); 91119304Speter F_CLR(sp, SC_RE_SEARCH); 91219304Speter } 91319304Speter if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 91419304Speter regfree(&sp->subre_c); 91519304Speter F_CLR(sp, SC_RE_SUBST); 91619304Speter } 91719304Speter 91819304Speter /* 91919304Speter * If we're saving the string, it's a pattern we haven't seen before, 92019304Speter * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 92119304Speter * later recompilation. Free any previously saved value. 92219304Speter */ 92319304Speter if (ptrnp != NULL) { 924254225Speter replaced = 0; 92519304Speter if (LF_ISSET(RE_C_CSCOPE)) { 92619304Speter if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 92719304Speter return (1); 92819304Speter /* 92919304Speter * XXX 93019304Speter * Currently, the match-any-<blank> expression used in 93119304Speter * re_cscope_conv() requires extended RE's. This may 93219304Speter * not be right or safe. 93319304Speter */ 93419304Speter reflags |= REG_EXTENDED; 93519304Speter } else if (LF_ISSET(RE_C_TAG)) { 93619304Speter if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 93719304Speter return (1); 93819304Speter } else 93919304Speter if (re_conv(sp, &ptrn, &plen, &replaced)) 94019304Speter return (1); 94119304Speter 94219304Speter /* Discard previous pattern. */ 94319304Speter if (*ptrnp != NULL) { 94419304Speter free(*ptrnp); 94519304Speter *ptrnp = NULL; 94619304Speter } 94719304Speter if (lenp != NULL) 94819304Speter *lenp = plen; 94919304Speter 95019304Speter /* 95119304Speter * Copy the string into allocated memory. 95219304Speter * 95319304Speter * XXX 95419304Speter * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 95519304Speter * for now. There's just no other solution. 95619304Speter */ 957254225Speter MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T)); 95819304Speter if (*ptrnp != NULL) { 959254225Speter MEMCPY(*ptrnp, ptrn, plen); 96019304Speter (*ptrnp)[plen] = '\0'; 96119304Speter } 96219304Speter 96319304Speter /* Free up conversion-routine-allocated memory. */ 96419304Speter if (replaced) 965254225Speter FREE_SPACEW(sp, ptrn, 0); 96619304Speter 96719304Speter if (*ptrnp == NULL) 96819304Speter return (1); 96919304Speter 97019304Speter ptrn = *ptrnp; 97119304Speter } 97219304Speter 97319304Speter /* 97419304Speter * XXX 97519304Speter * Regcomp isn't 8-bit clean, so we just lost if the pattern 97619304Speter * contained a nul. Bummer! 97719304Speter */ 97819304Speter if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 97919304Speter if (!LF_ISSET(RE_C_SILENT)) 98019304Speter re_error(sp, rval, rep); 98119304Speter return (1); 98219304Speter } 98319304Speter 98419304Speter if (LF_ISSET(RE_C_SEARCH)) 98519304Speter F_SET(sp, SC_RE_SEARCH); 98619304Speter if (LF_ISSET(RE_C_SUBST)) 98719304Speter F_SET(sp, SC_RE_SUBST); 98819304Speter 98919304Speter return (0); 99019304Speter} 99119304Speter 99219304Speter/* 99319304Speter * re_conv -- 99419304Speter * Convert vi's regular expressions into something that the 99519304Speter * the POSIX 1003.2 RE functions can handle. 99619304Speter * 99719304Speter * There are three conversions we make to make vi's RE's (specifically 99819304Speter * the global, search, and substitute patterns) work with POSIX RE's. 99919304Speter * 100019304Speter * 1: If O_MAGIC is not set, strip backslashes from the magic character 100119304Speter * set (.[*~) that have them, and add them to the ones that don't. 100219304Speter * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 100319304Speter * from the last substitute command's replacement string. If O_MAGIC 100419304Speter * is set, it's the string "~". 100519304Speter * 3: The pattern \<ptrn\> does "word" searches, convert it to use the 100619304Speter * new RE escapes. 100719304Speter * 100819304Speter * !!!/XXX 100919304Speter * This doesn't exactly match the historic behavior of vi because we do 101019304Speter * the ~ substitution before calling the RE engine, so magic characters 101119304Speter * in the replacement string will be expanded by the RE engine, and they 101219304Speter * weren't historically. It's a bug. 101319304Speter */ 101419304Speterstatic int 1015254225Speterre_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 101619304Speter{ 101719304Speter size_t blen, len, needlen; 101819304Speter int magic; 1019254225Speter CHAR_T *bp, *p, *t; 102019304Speter 102119304Speter /* 102219304Speter * First pass through, we figure out how much space we'll need. 102319304Speter * We do it in two passes, on the grounds that most of the time 102419304Speter * the user is doing a search and won't have magic characters. 102519304Speter * That way we can skip most of the memory allocation and copies. 102619304Speter */ 102719304Speter magic = 0; 102819304Speter for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 102919304Speter switch (*p) { 103019304Speter case '\\': 103119304Speter if (len > 1) { 103219304Speter --len; 103319304Speter switch (*++p) { 103419304Speter case '<': 103519304Speter magic = 1; 1036254225Speter needlen += RE_WSTART_LEN + 1; 103719304Speter break; 103819304Speter case '>': 103919304Speter magic = 1; 1040254225Speter needlen += RE_WSTOP_LEN + 1; 104119304Speter break; 104219304Speter case '~': 104319304Speter if (!O_ISSET(sp, O_MAGIC)) { 104419304Speter magic = 1; 104519304Speter needlen += sp->repl_len; 104619304Speter } 104719304Speter break; 104819304Speter case '.': 104919304Speter case '[': 105019304Speter case '*': 105119304Speter if (!O_ISSET(sp, O_MAGIC)) { 105219304Speter magic = 1; 105319304Speter needlen += 1; 105419304Speter } 105519304Speter break; 105619304Speter default: 105719304Speter needlen += 2; 105819304Speter } 105919304Speter } else 106019304Speter needlen += 1; 106119304Speter break; 106219304Speter case '~': 106319304Speter if (O_ISSET(sp, O_MAGIC)) { 106419304Speter magic = 1; 106519304Speter needlen += sp->repl_len; 106619304Speter } 106719304Speter break; 106819304Speter case '.': 106919304Speter case '[': 107019304Speter case '*': 107119304Speter if (!O_ISSET(sp, O_MAGIC)) { 107219304Speter magic = 1; 107319304Speter needlen += 2; 107419304Speter } 107519304Speter break; 107619304Speter default: 107719304Speter needlen += 1; 107819304Speter break; 107919304Speter } 108019304Speter 108119304Speter if (!magic) { 108219304Speter *replacedp = 0; 108319304Speter return (0); 108419304Speter } 108519304Speter 108619304Speter /* Get enough memory to hold the final pattern. */ 108719304Speter *replacedp = 1; 1088254225Speter GET_SPACE_RETW(sp, bp, blen, needlen); 108919304Speter 109019304Speter for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 109119304Speter switch (*p) { 109219304Speter case '\\': 109319304Speter if (len > 1) { 109419304Speter --len; 109519304Speter switch (*++p) { 109619304Speter case '<': 1097254225Speter MEMCPY(t, 1098254225Speter RE_WSTART, RE_WSTART_LEN); 1099254225Speter t += RE_WSTART_LEN; 110019304Speter break; 110119304Speter case '>': 1102254225Speter MEMCPY(t, 1103254225Speter RE_WSTOP, RE_WSTOP_LEN); 1104254225Speter t += RE_WSTOP_LEN; 110519304Speter break; 110619304Speter case '~': 110719304Speter if (O_ISSET(sp, O_MAGIC)) 110819304Speter *t++ = '~'; 110919304Speter else { 1110254225Speter MEMCPY(t, 111119304Speter sp->repl, sp->repl_len); 111219304Speter t += sp->repl_len; 111319304Speter } 111419304Speter break; 111519304Speter case '.': 111619304Speter case '[': 111719304Speter case '*': 111819304Speter if (O_ISSET(sp, O_MAGIC)) 111919304Speter *t++ = '\\'; 112019304Speter *t++ = *p; 112119304Speter break; 112219304Speter default: 112319304Speter *t++ = '\\'; 112419304Speter *t++ = *p; 112519304Speter } 112619304Speter } else 112719304Speter *t++ = '\\'; 112819304Speter break; 112919304Speter case '~': 113019304Speter if (O_ISSET(sp, O_MAGIC)) { 1131254225Speter MEMCPY(t, sp->repl, sp->repl_len); 113219304Speter t += sp->repl_len; 113319304Speter } else 113419304Speter *t++ = '~'; 113519304Speter break; 113619304Speter case '.': 113719304Speter case '[': 113819304Speter case '*': 113919304Speter if (!O_ISSET(sp, O_MAGIC)) 114019304Speter *t++ = '\\'; 114119304Speter *t++ = *p; 114219304Speter break; 114319304Speter default: 114419304Speter *t++ = *p; 114519304Speter break; 114619304Speter } 114719304Speter 114819304Speter *ptrnp = bp; 114919304Speter *plenp = t - bp; 115019304Speter return (0); 115119304Speter} 115219304Speter 115319304Speter/* 115419304Speter * re_tag_conv -- 115519304Speter * Convert a tags search path into something that the POSIX 115619304Speter * 1003.2 RE functions can handle. 115719304Speter */ 115819304Speterstatic int 1159254225Speterre_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 116019304Speter{ 116119304Speter size_t blen, len; 116219304Speter int lastdollar; 1163254225Speter CHAR_T *bp, *p, *t; 116419304Speter 116519304Speter len = *plenp; 116619304Speter 116719304Speter /* Max memory usage is 2 times the length of the string. */ 116819304Speter *replacedp = 1; 1169254225Speter GET_SPACE_RETW(sp, bp, blen, len * 2); 117019304Speter 117119304Speter p = *ptrnp; 117219304Speter t = bp; 117319304Speter 117419304Speter /* If the last character is a '/' or '?', we just strip it. */ 117519304Speter if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 117619304Speter --len; 117719304Speter 117819304Speter /* If the next-to-last or last character is a '$', it's magic. */ 117919304Speter if (len > 0 && p[len - 1] == '$') { 118019304Speter --len; 118119304Speter lastdollar = 1; 118219304Speter } else 118319304Speter lastdollar = 0; 118419304Speter 118519304Speter /* If the first character is a '/' or '?', we just strip it. */ 118619304Speter if (len > 0 && (p[0] == '/' || p[0] == '?')) { 118719304Speter ++p; 118819304Speter --len; 118919304Speter } 119019304Speter 119119304Speter /* If the first or second character is a '^', it's magic. */ 119219304Speter if (p[0] == '^') { 119319304Speter *t++ = *p++; 119419304Speter --len; 119519304Speter } 119619304Speter 119719304Speter /* 119819304Speter * Escape every other magic character we can find, meanwhile stripping 119919304Speter * the backslashes ctags inserts when escaping the search delimiter 120019304Speter * characters. 120119304Speter */ 120219304Speter for (; len > 0; --len) { 120319304Speter if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 120419304Speter ++p; 120519304Speter --len; 1206254225Speter } else if (STRCHR(L("^.[]$*"), p[0])) 120719304Speter *t++ = '\\'; 120819304Speter *t++ = *p++; 120919304Speter } 121019304Speter if (lastdollar) 121119304Speter *t++ = '$'; 121219304Speter 121319304Speter *ptrnp = bp; 121419304Speter *plenp = t - bp; 121519304Speter return (0); 121619304Speter} 121719304Speter 121819304Speter/* 121919304Speter * re_cscope_conv -- 122019304Speter * Convert a cscope search path into something that the POSIX 122119304Speter * 1003.2 RE functions can handle. 122219304Speter */ 122319304Speterstatic int 1224254225Speterre_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp) 122519304Speter{ 122619304Speter size_t blen, len, nspaces; 1227254225Speter CHAR_T *bp, *t; 1228254225Speter CHAR_T *p; 1229254225Speter CHAR_T *wp; 1230254225Speter size_t wlen; 123119304Speter 123219304Speter /* 123319304Speter * Each space in the source line printed by cscope represents an 123419304Speter * arbitrary sequence of spaces, tabs, and comments. 123519304Speter */ 123619304Speter#define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 1237254225Speter#define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1 1238254225Speter CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen); 123919304Speter for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 124019304Speter if (*p == ' ') 124119304Speter ++nspaces; 124219304Speter 124319304Speter /* 124419304Speter * Allocate plenty of space: 124519304Speter * the string, plus potential escaping characters; 124619304Speter * nspaces + 2 copies of CSCOPE_RE_SPACE; 124719304Speter * ^, $, nul terminator characters. 124819304Speter */ 124919304Speter *replacedp = 1; 125019304Speter len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 1251254225Speter GET_SPACE_RETW(sp, bp, blen, len); 125219304Speter 125319304Speter p = *ptrnp; 125419304Speter t = bp; 125519304Speter 125619304Speter *t++ = '^'; 1257254225Speter MEMCPY(t, wp, wlen); 1258254225Speter t += wlen; 125919304Speter 126019304Speter for (len = *plenp; len > 0; ++p, --len) 126119304Speter if (*p == ' ') { 1262254225Speter MEMCPY(t, wp, wlen); 1263254225Speter t += wlen; 126419304Speter } else { 1265254225Speter if (STRCHR(L("\\^.[]$*+?()|{}"), *p)) 126619304Speter *t++ = '\\'; 126719304Speter *t++ = *p; 126819304Speter } 126919304Speter 1270254225Speter MEMCPY(t, wp, wlen); 1271254225Speter t += wlen; 127219304Speter *t++ = '$'; 127319304Speter 127419304Speter *ptrnp = bp; 127519304Speter *plenp = t - bp; 127619304Speter return (0); 127719304Speter} 127819304Speter 127919304Speter/* 128019304Speter * re_error -- 128119304Speter * Report a regular expression error. 128219304Speter * 128319304Speter * PUBLIC: void re_error __P((SCR *, int, regex_t *)); 128419304Speter */ 128519304Spetervoid 1286254225Speterre_error(SCR *sp, int errcode, regex_t *preg) 128719304Speter{ 128819304Speter size_t s; 128919304Speter char *oe; 129019304Speter 129119304Speter s = regerror(errcode, preg, "", 0); 1292254225Speter MALLOC(sp, oe, char *, s); 1293254225Speter if (oe != NULL) { 129419304Speter (void)regerror(errcode, preg, oe, s); 129519304Speter msgq(sp, M_ERR, "RE error: %s", oe); 129619304Speter free(oe); 129719304Speter } 129819304Speter} 129919304Speter 130019304Speter/* 130119304Speter * re_sub -- 130219304Speter * Do the substitution for a regular expression. 130319304Speter */ 130419304Speterstatic int 1305254225Speterre_sub( 1306254225Speter SCR *sp, 1307254225Speter CHAR_T *ip, /* Input line. */ 1308254225Speter CHAR_T **lbp, 1309254225Speter size_t *lbclenp, 1310254225Speter size_t *lblenp, 1311254225Speter regmatch_t match[10]) 131219304Speter{ 131319304Speter enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 131419304Speter size_t lbclen, lblen; /* Local copies. */ 131519304Speter size_t mlen; /* Match length. */ 131619304Speter size_t rpl; /* Remaining replacement length. */ 1317254225Speter CHAR_T *rp; /* Replacement pointer. */ 131819304Speter int ch; 131919304Speter int no; /* Match replacement offset. */ 1320254225Speter CHAR_T *p, *t; /* Buffer pointers. */ 1321254225Speter CHAR_T *lb; /* Local copies. */ 132219304Speter 132319304Speter lb = *lbp; /* Get local copies. */ 132419304Speter lbclen = *lbclenp; 132519304Speter lblen = *lblenp; 132619304Speter 132719304Speter /* 132819304Speter * QUOTING NOTE: 132919304Speter * 133019304Speter * There are some special sequences that vi provides in the 133119304Speter * replacement patterns. 133219304Speter * & string the RE matched (\& if nomagic set) 133319304Speter * \# n-th regular subexpression 133419304Speter * \E end \U, \L conversion 133519304Speter * \e end \U, \L conversion 133619304Speter * \l convert the next character to lower-case 133719304Speter * \L convert to lower-case, until \E, \e, or end of replacement 133819304Speter * \u convert the next character to upper-case 133919304Speter * \U convert to upper-case, until \E, \e, or end of replacement 134019304Speter * 134119304Speter * Otherwise, since this is the lowest level of replacement, discard 134219304Speter * all escaping characters. This (hopefully) matches historic practice. 134319304Speter */ 134419304Speter#define OUTCH(ch, nltrans) { \ 1345254225Speter ARG_CHAR_T __ch = (ch); \ 1346254225Speter e_key_t __value = KEY_VAL(sp, __ch); \ 134719304Speter if (nltrans && (__value == K_CR || __value == K_NL)) { \ 134819304Speter NEEDNEWLINE(sp); \ 134919304Speter sp->newl[sp->newl_cnt++] = lbclen; \ 135019304Speter } else if (conv != C_NOTSET) { \ 135119304Speter switch (conv) { \ 135219304Speter case C_ONELOWER: \ 135319304Speter conv = C_NOTSET; \ 135419304Speter /* FALLTHROUGH */ \ 135519304Speter case C_LOWER: \ 1356254225Speter if (ISUPPER(__ch)) \ 1357254225Speter __ch = TOLOWER(__ch); \ 135819304Speter break; \ 135919304Speter case C_ONEUPPER: \ 136019304Speter conv = C_NOTSET; \ 136119304Speter /* FALLTHROUGH */ \ 136219304Speter case C_UPPER: \ 1363254225Speter if (ISLOWER(__ch)) \ 1364254225Speter __ch = TOUPPER(__ch); \ 136519304Speter break; \ 136619304Speter default: \ 136719304Speter abort(); \ 136819304Speter } \ 136919304Speter } \ 137019304Speter NEEDSP(sp, 1, p); \ 137119304Speter *p++ = __ch; \ 137219304Speter ++lbclen; \ 137319304Speter} 137419304Speter conv = C_NOTSET; 137519304Speter for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 137619304Speter switch (ch = *rp++) { 137719304Speter case '&': 137819304Speter if (O_ISSET(sp, O_MAGIC)) { 137919304Speter no = 0; 138019304Speter goto subzero; 138119304Speter } 138219304Speter break; 138319304Speter case '\\': 138419304Speter if (rpl == 0) 138519304Speter break; 138619304Speter --rpl; 138719304Speter switch (ch = *rp) { 138819304Speter case '&': 138919304Speter ++rp; 139019304Speter if (!O_ISSET(sp, O_MAGIC)) { 139119304Speter no = 0; 139219304Speter goto subzero; 139319304Speter } 139419304Speter break; 139519304Speter case '0': case '1': case '2': case '3': case '4': 139619304Speter case '5': case '6': case '7': case '8': case '9': 139719304Speter no = *rp++ - '0'; 139819304Spetersubzero: if (match[no].rm_so == -1 || 139919304Speter match[no].rm_eo == -1) 140019304Speter break; 140119304Speter mlen = match[no].rm_eo - match[no].rm_so; 140219304Speter for (t = ip + match[no].rm_so; mlen--; ++t) 140319304Speter OUTCH(*t, 0); 140419304Speter continue; 140519304Speter case 'e': 140619304Speter case 'E': 140719304Speter ++rp; 140819304Speter conv = C_NOTSET; 140919304Speter continue; 141019304Speter case 'l': 141119304Speter ++rp; 141219304Speter conv = C_ONELOWER; 141319304Speter continue; 141419304Speter case 'L': 141519304Speter ++rp; 141619304Speter conv = C_LOWER; 141719304Speter continue; 141819304Speter case 'u': 141919304Speter ++rp; 142019304Speter conv = C_ONEUPPER; 142119304Speter continue; 142219304Speter case 'U': 142319304Speter ++rp; 142419304Speter conv = C_UPPER; 142519304Speter continue; 1426254225Speter case '\r': 1427254225Speter OUTCH(ch, 0); 1428254225Speter continue; 142919304Speter default: 143019304Speter ++rp; 143119304Speter break; 143219304Speter } 143319304Speter } 143419304Speter OUTCH(ch, 1); 143519304Speter } 143619304Speter 143719304Speter *lbp = lb; /* Update caller's information. */ 143819304Speter *lbclenp = lbclen; 143919304Speter *lblenp = lblen; 144019304Speter return (0); 144119304Speter} 1442