1/*
2 *  $Id: 3da9a5fc88c904673b3b95d0c9667b2bcbccfc80 $
3 *  Time-stamp:      "2007-11-16 22:49:11 bkorb"
4 *
5 *  This file contains the routines that deal with processing quoted strings
6 *  into an internal format.
7 *
8 *  This file is part of AutoOpts, a companion to AutoGen.
9 *  AutoOpts is free software.
10 *  AutoOpts is copyright (c) 1992-2009 by Bruce Korb - all rights reserved
11 *
12 *  AutoOpts is available under any one of two licenses.  The license
13 *  in use must be one of these two and the choice is under the control
14 *  of the user of the license.
15 *
16 *   The GNU Lesser General Public License, version 3 or later
17 *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
18 *
19 *   The Modified Berkeley Software Distribution License
20 *      See the file "COPYING.mbsd"
21 *
22 *  These files have the following md5sums:
23 *
24 *  43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
25 *  06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
26 *  66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
27 */
28
29/* = = = START-STATIC-FORWARD = = = */
30/* static forward declarations maintained by mk-fwd */
31/* = = = END-STATIC-FORWARD = = = */
32
33/*=export_func  ao_string_cook_escape_char
34 * private:
35 *
36 * what:  escape-process a string fragment
37 * arg:   + char const*  + pzScan  + points to character after the escape +
38 * arg:   + char*        + pRes    + Where to put the result byte +
39 * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
40 *
41 * ret-type: unsigned int
42 * ret-desc: The number of bytes consumed processing the escaped character.
43 *
44 * doc:
45 *
46 *  This function converts "t" into "\t" and all your other favorite
47 *  escapes, including numeric ones:  hex and ocatal, too.
48 *  The returned result tells the caller how far to advance the
49 *  scan pointer (passed in).  The default is to just pass through the
50 *  escaped character and advance the scan by one.
51 *
52 *  Some applications need to keep an escaped newline, others need to
53 *  suppress it.  This is accomplished by supplying a '\n' replacement
54 *  character that is different from \n, if need be.  For example, use
55 *  0x7F and never emit a 0x7F.
56 *
57 * err:  @code{NULL} is returned if the string is mal-formed.
58=*/
59unsigned int
60ao_string_cook_escape_char( char const* pzIn, char* pRes, u_int nl )
61{
62    unsigned int  res = 1;
63
64    switch (*pRes = *pzIn++) {
65    case NUL:         /* NUL - end of input string */
66        return 0;
67    case '\r':
68        if (*pzIn != '\n')
69            return 1;
70        res++;
71        /* FALLTHROUGH */
72    case '\n':        /* NL  - emit newline        */
73        *pRes = (char)nl;
74        return res;
75
76    case 'a': *pRes = '\a'; break;
77    case 'b': *pRes = '\b'; break;
78    case 'f': *pRes = '\f'; break;
79    case 'n': *pRes = '\n'; break;
80    case 'r': *pRes = '\r'; break;
81    case 't': *pRes = '\t'; break;
82    case 'v': *pRes = '\v'; break;
83
84    case 'x':
85    case 'X':         /* HEX Escape       */
86        if (IS_HEX_DIGIT_CHAR(*pzIn))  {
87            char z[4], *pz = z;
88
89            do *(pz++) = *(pzIn++);
90            while (IS_HEX_DIGIT_CHAR(*pzIn) && (pz < z + 2));
91            *pz = NUL;
92            *pRes = (unsigned char)strtoul(z, NULL, 16);
93            res += pz - z;
94        }
95        break;
96
97    case '0': case '1': case '2': case '3':
98    case '4': case '5': case '6': case '7':
99    {
100        /*
101         *  IF the character copied was an octal digit,
102         *  THEN set the output character to an octal value
103         */
104        char z[4], *pz = z + 1;
105        unsigned long val;
106        z[0] = *pRes;
107
108        while (IS_OCT_DIGIT_CHAR(*pzIn) && (pz < z + 3))
109            *(pz++) = *(pzIn++);
110        *pz = NUL;
111        val = strtoul(z, NULL, 8);
112        if (val > 0xFF)
113            val = 0xFF;
114        *pRes = (unsigned char)val;
115        res = pz - z;
116        break;
117    }
118
119    default: ;
120    }
121
122    return res;
123}
124
125
126/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
127 *
128 *  A quoted string has been found.
129 *  Find the end of it and compress any escape sequences.
130 */
131/*=export_func  ao_string_cook
132 * private:
133 *
134 * what:  concatenate and escape-process strings
135 * arg:   + char* + pzScan     + The *MODIFIABLE* input buffer +
136 * arg:   + int*  + pLineCt    + The (possibly NULL) pointer to a line count +
137 *
138 * ret-type: char*
139 * ret-desc: The address of the text following the processed strings.
140 *           The return value is NULL if the strings are ill-formed.
141 *
142 * doc:
143 *
144 *  A series of one or more quoted strings are concatenated together.
145 *  If they are quoted with double quotes (@code{"}), then backslash
146 *  escapes are processed per the C programming language.  If they are
147 *  single quote strings, then the backslashes are honored only when they
148 *  precede another backslash or a single quote character.
149 *
150 * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
151=*/
152char*
153ao_string_cook( char* pzScan, int* pLineCt )
154{
155    int   l = 0;
156    char  q = *pzScan;
157
158    /*
159     *  It is a quoted string.  Process the escape sequence characters
160     *  (in the set "abfnrtv") and make sure we find a closing quote.
161     */
162    char* pzD = pzScan++;
163    char* pzS = pzScan;
164
165    if (pLineCt == NULL)
166        pLineCt = &l;
167
168    for (;;) {
169        /*
170         *  IF the next character is the quote character, THEN we may end the
171         *  string.  We end it unless the next non-blank character *after* the
172         *  string happens to also be a quote.  If it is, then we will change
173         *  our quote character to the new quote character and continue
174         *  condensing text.
175         */
176        while (*pzS == q) {
177            *pzD = NUL; /* This is probably the end of the line */
178            pzS++;
179
180        scan_for_quote:
181            while (IS_WHITESPACE_CHAR(*pzS))
182                if (*(pzS++) == '\n')
183                    (*pLineCt)++;
184
185            /*
186             *  IF the next character is a quote character,
187             *  THEN we will concatenate the strings.
188             */
189            switch (*pzS) {
190            case '"':
191            case '\'':
192                break;
193
194            case '/':
195                /*
196                 *  Allow for a comment embedded in the concatenated string.
197                 */
198                switch (pzS[1]) {
199                default:  return NULL;
200                case '/':
201                    /*
202                     *  Skip to end of line
203                     */
204                    pzS = strchr( pzS, '\n' );
205                    if (pzS == NULL)
206                        return NULL;
207                    (*pLineCt)++;
208                    break;
209
210                case '*':
211                {
212                    char* p = strstr( pzS+2, "*/" );
213                    /*
214                     *  Skip to terminating star slash
215                     */
216                    if (p == NULL)
217                        return NULL;
218                    while (pzS < p) {
219                        if (*(pzS++) == '\n')
220                            (*pLineCt)++;
221                    }
222
223                    pzS = p + 2;
224                }
225                }
226                goto scan_for_quote;
227
228            default:
229                /*
230                 *  The next non-whitespace character is not a quote.
231                 *  The series of quoted strings has come to an end.
232                 */
233                return pzS;
234            }
235
236            q = *(pzS++);  /* assign new quote character and advance scan */
237        }
238
239        /*
240         *  We are inside a quoted string.  Copy text.
241         */
242        switch (*(pzD++) = *(pzS++)) {
243        case NUL:
244            return NULL;
245
246        case '\n':
247            (*pLineCt)++;
248            break;
249
250        case '\\':
251            /*
252             *  IF we are escaping a new line,
253             *  THEN drop both the escape and the newline from
254             *       the result string.
255             */
256            if (*pzS == '\n') {
257                pzS++;
258                pzD--;
259                (*pLineCt)++;
260            }
261
262            /*
263             *  ELSE IF the quote character is '"' or '`',
264             *  THEN we do the full escape character processing
265             */
266            else if (q != '\'') {
267                int ct = ao_string_cook_escape_char( pzS, pzD-1, (u_int)'\n' );
268                if (ct == 0)
269                    return NULL;
270
271                pzS += ct;
272            }     /* if (q != '\'')                  */
273
274            /*
275             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
276             *  The latter only to easily hide preprocessing directives.
277             */
278            else switch (*pzS) {
279            case '\\':
280            case '\'':
281            case '#':
282                pzD[-1] = *pzS++;
283            }
284        }     /* switch (*(pzD++) = *(pzS++))    */
285    }         /* for (;;)                        */
286}
287/*
288 * Local Variables:
289 * mode: C
290 * c-file-style: "stroustrup"
291 * indent-tabs-mode: nil
292 * End:
293 * end of autoopts/cook.c */
294