cook.c revision 290001
1/**
2 * \file cook.c
3 *
4 *  This file contains the routines that deal with processing quoted strings
5 *  into an internal format.
6 *
7 * @addtogroup autoopts
8 * @{
9 */
10/*
11 *  This file is part of AutoOpts, a companion to AutoGen.
12 *  AutoOpts is free software.
13 *  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
14 *
15 *  AutoOpts is available under any one of two licenses.  The license
16 *  in use must be one of these two and the choice is under the control
17 *  of the user of the license.
18 *
19 *   The GNU Lesser General Public License, version 3 or later
20 *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
21 *
22 *   The Modified Berkeley Software Distribution License
23 *      See the file "COPYING.mbsd"
24 *
25 *  These files have the following sha256 sums:
26 *
27 *  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
28 *  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
29 *  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
30 */
31
32/* = = = START-STATIC-FORWARD = = = */
33static bool
34contiguous_quote(char ** pps, char * pq, int * lnct_p);
35/* = = = END-STATIC-FORWARD = = = */
36
37/*=export_func  ao_string_cook_escape_char
38 * private:
39 *
40 * what:  escape-process a string fragment
41 * arg:   + char const * + pzScan  + points to character after the escape +
42 * arg:   + char *       + pRes    + Where to put the result byte +
43 * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
44 *
45 * ret-type: unsigned int
46 * ret-desc: The number of bytes consumed processing the escaped character.
47 *
48 * doc:
49 *
50 *  This function converts "t" into "\t" and all your other favorite
51 *  escapes, including numeric ones:  hex and ocatal, too.
52 *  The returned result tells the caller how far to advance the
53 *  scan pointer (passed in).  The default is to just pass through the
54 *  escaped character and advance the scan by one.
55 *
56 *  Some applications need to keep an escaped newline, others need to
57 *  suppress it.  This is accomplished by supplying a '\n' replacement
58 *  character that is different from \n, if need be.  For example, use
59 *  0x7F and never emit a 0x7F.
60 *
61 * err:  @code{NULL} is returned if the string is mal-formed.
62=*/
63unsigned int
64ao_string_cook_escape_char(char const * pzIn, char * pRes, uint_t nl)
65{
66    unsigned int res = 1;
67
68    switch (*pRes = *pzIn++) {
69    case NUL:         /* NUL - end of input string */
70        return 0;
71    case '\r':
72        if (*pzIn != NL)
73            return 1;
74        res++;
75        /* FALLTHROUGH */
76    case NL:        /* NL  - emit newline        */
77        *pRes = (char)nl;
78        return res;
79
80    case 'a': *pRes = '\a'; break;
81    case 'b': *pRes = '\b'; break;
82    case 'f': *pRes = '\f'; break;
83    case 'n': *pRes = NL;   break;
84    case 'r': *pRes = '\r'; break;
85    case 't': *pRes = '\t'; break;
86    case 'v': *pRes = '\v'; break;
87
88    case 'x':
89    case 'X':         /* HEX Escape       */
90        if (IS_HEX_DIGIT_CHAR(*pzIn))  {
91            char z[4];
92            unsigned int ct = 0;
93
94            do  {
95                z[ct] = pzIn[ct];
96                if (++ct >= 2)
97                    break;
98            } while (IS_HEX_DIGIT_CHAR(pzIn[ct]));
99            z[ct] = NUL;
100            *pRes = (char)strtoul(z, NULL, 16);
101            return ct + 1;
102        }
103        break;
104
105    case '0': case '1': case '2': case '3':
106    case '4': case '5': case '6': case '7':
107    {
108        /*
109         *  IF the character copied was an octal digit,
110         *  THEN set the output character to an octal value.
111         *  The 3 octal digit result might exceed 0xFF, so check it.
112         */
113        char z[4];
114        unsigned long val;
115        unsigned int  ct = 0;
116
117        z[ct++] = *--pzIn;
118        while (IS_OCT_DIGIT_CHAR(pzIn[ct])) {
119            z[ct] = pzIn[ct];
120            if (++ct >= 3)
121                break;
122        }
123
124        z[ct] = NUL;
125        val = strtoul(z, NULL, 8);
126        if (val > 0xFF)
127            val = 0xFF;
128        *pRes = (char)val;
129        return ct;
130    }
131
132    default: /* quoted character is result character */;
133    }
134
135    return res;
136}
137
138
139/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
140 *
141 *  A quoted string has been found.
142 *  Find the end of it and compress any escape sequences.
143 */
144static bool
145contiguous_quote(char ** pps, char * pq, int * lnct_p)
146{
147    char * ps = *pps + 1;
148
149    for (;;) {
150        while (IS_WHITESPACE_CHAR(*ps))
151            if (*(ps++) == NL)
152                (*lnct_p)++;
153
154        /*
155         *  IF the next character is a quote character,
156         *  THEN we will concatenate the strings.
157         */
158        switch (*ps) {
159        case '"':
160        case '\'':
161            *pq  = *(ps++);  /* assign new quote character and return */
162            *pps = ps;
163            return true;
164
165        case '/':
166            /*
167             *  Allow for a comment embedded in the concatenated string.
168             */
169            switch (ps[1]) {
170            default:
171                *pps = NULL;
172                return false;
173
174            case '/':
175                /*
176                 *  Skip to end of line
177                 */
178                ps = strchr(ps, NL);
179                if (ps == NULL) {
180                    *pps = NULL;
181                    return false;
182                }
183                break;
184
185            case '*':
186            {
187                char * p = strstr( ps+2, "*/" );
188                /*
189                 *  Skip to terminating star slash
190                 */
191                if (p == NULL) {
192                    *pps = NULL;
193                    return false;
194                }
195
196                while (ps < p) {
197                    if (*(ps++) == NL)
198                        (*lnct_p)++;
199                }
200
201                ps = p + 2;
202            }
203            }
204            continue;
205
206        default:
207            /*
208             *  The next non-whitespace character is not a quote.
209             *  The series of quoted strings has come to an end.
210             */
211            *pps = ps;
212            return false;
213        }
214    }
215}
216
217/*=export_func  ao_string_cook
218 * private:
219 *
220 * what:  concatenate and escape-process strings
221 * arg:   + char * + pzScan  + The *MODIFIABLE* input buffer +
222 * arg:   + int *  + lnct_p  + The (possibly NULL) pointer to a line count +
223 *
224 * ret-type: char *
225 * ret-desc: The address of the text following the processed strings.
226 *           The return value is NULL if the strings are ill-formed.
227 *
228 * doc:
229 *
230 *  A series of one or more quoted strings are concatenated together.
231 *  If they are quoted with double quotes (@code{"}), then backslash
232 *  escapes are processed per the C programming language.  If they are
233 *  single quote strings, then the backslashes are honored only when they
234 *  precede another backslash or a single quote character.
235 *
236 * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
237=*/
238char *
239ao_string_cook(char * pzScan, int * lnct_p)
240{
241    int   l = 0;
242    char  q = *pzScan;
243
244    /*
245     *  It is a quoted string.  Process the escape sequence characters
246     *  (in the set "abfnrtv") and make sure we find a closing quote.
247     */
248    char * pzD = pzScan++;
249    char * pzS = pzScan;
250
251    if (lnct_p == NULL)
252        lnct_p = &l;
253
254    for (;;) {
255        /*
256         *  IF the next character is the quote character, THEN we may end the
257         *  string.  We end it unless the next non-blank character *after* the
258         *  string happens to also be a quote.  If it is, then we will change
259         *  our quote character to the new quote character and continue
260         *  condensing text.
261         */
262        while (*pzS == q) {
263            *pzD = NUL; /* This is probably the end of the line */
264            if (! contiguous_quote(&pzS, &q, lnct_p))
265                return pzS;
266        }
267
268        /*
269         *  We are inside a quoted string.  Copy text.
270         */
271        switch (*(pzD++) = *(pzS++)) {
272        case NUL:
273            return NULL;
274
275        case NL:
276            (*lnct_p)++;
277            break;
278
279        case '\\':
280            /*
281             *  IF we are escaping a new line,
282             *  THEN drop both the escape and the newline from
283             *       the result string.
284             */
285            if (*pzS == NL) {
286                pzS++;
287                pzD--;
288                (*lnct_p)++;
289            }
290
291            /*
292             *  ELSE IF the quote character is '"' or '`',
293             *  THEN we do the full escape character processing
294             */
295            else if (q != '\'') {
296                unsigned int ct;
297                ct = ao_string_cook_escape_char(pzS, pzD-1, (uint_t)NL);
298                if (ct == 0)
299                    return NULL;
300
301                pzS += ct;
302            }     /* if (q != '\'')                  */
303
304            /*
305             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
306             *  The latter only to easily hide preprocessing directives.
307             */
308            else switch (*pzS) {
309            case '\\':
310            case '\'':
311            case '#':
312                pzD[-1] = *pzS++;
313            }
314        }     /* switch (*(pzD++) = *(pzS++))    */
315    }         /* for (;;)                        */
316}
317
318/** @}
319 *
320 * Local Variables:
321 * mode: C
322 * c-file-style: "stroustrup"
323 * indent-tabs-mode: nil
324 * End:
325 * end of autoopts/cook.c */
326