1181834Sroberto
2181834Sroberto/*
3181834Sroberto *  $Id: cook.c,v 4.10 2007/02/04 17:44:12 bkorb Exp $
4181834Sroberto *  Time-stamp:      "2006-09-24 15:21:02 bkorb"
5181834Sroberto *
6181834Sroberto *  This file contains the routines that deal with processing quoted strings
7181834Sroberto *  into an internal format.
8181834Sroberto */
9181834Sroberto
10181834Sroberto/*
11181834Sroberto *  Automated Options copyright 1992-2007 Bruce Korb
12181834Sroberto *
13181834Sroberto *  Automated Options is free software.
14181834Sroberto *  You may redistribute it and/or modify it under the terms of the
15181834Sroberto *  GNU General Public License, as published by the Free Software
16181834Sroberto *  Foundation; either version 2, or (at your option) any later version.
17181834Sroberto *
18181834Sroberto *  Automated Options is distributed in the hope that it will be useful,
19181834Sroberto *  but WITHOUT ANY WARRANTY; without even the implied warranty of
20181834Sroberto *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21181834Sroberto *  GNU General Public License for more details.
22181834Sroberto *
23181834Sroberto *  You should have received a copy of the GNU General Public License
24181834Sroberto *  along with Automated Options.  See the file "COPYING".  If not,
25181834Sroberto *  write to:  The Free Software Foundation, Inc.,
26181834Sroberto *             51 Franklin Street, Fifth Floor,
27181834Sroberto *             Boston, MA  02110-1301, USA.
28181834Sroberto *
29181834Sroberto * As a special exception, Bruce Korb gives permission for additional
30181834Sroberto * uses of the text contained in his release of AutoOpts.
31181834Sroberto *
32181834Sroberto * The exception is that, if you link the AutoOpts library with other
33181834Sroberto * files to produce an executable, this does not by itself cause the
34181834Sroberto * resulting executable to be covered by the GNU General Public License.
35181834Sroberto * Your use of that executable is in no way restricted on account of
36181834Sroberto * linking the AutoOpts library code into it.
37181834Sroberto *
38181834Sroberto * This exception does not however invalidate any other reasons why
39181834Sroberto * the executable file might be covered by the GNU General Public License.
40181834Sroberto *
41181834Sroberto * This exception applies only to the code released by Bruce Korb under
42181834Sroberto * the name AutoOpts.  If you copy code from other sources under the
43181834Sroberto * General Public License into a copy of AutoOpts, as the General Public
44181834Sroberto * License permits, the exception does not apply to the code that you add
45181834Sroberto * in this way.  To avoid misleading anyone as to the status of such
46181834Sroberto * modified files, you must delete this exception notice from them.
47181834Sroberto *
48181834Sroberto * If you write modifications of your own for AutoOpts, it is your choice
49181834Sroberto * whether to permit this exception to apply to your modifications.
50181834Sroberto * If you do not wish that, delete this exception notice.
51181834Sroberto */
52181834Sroberto
53181834Sroberto/* = = = START-STATIC-FORWARD = = = */
54181834Sroberto/* static forward declarations maintained by :mkfwd */
55181834Sroberto/* = = = END-STATIC-FORWARD = = = */
56181834Sroberto
57181834Sroberto/*=export_func  ao_string_cook_escape_char
58181834Sroberto * private:
59181834Sroberto *
60181834Sroberto * what:  escape-process a string fragment
61181834Sroberto * arg:   + char const*  + pzScan  + points to character after the escape +
62181834Sroberto * arg:   + char*        + pRes    + Where to put the result byte +
63181834Sroberto * arg:   + unsigned int + nl_ch   + replacement char if scanned char is \n +
64181834Sroberto *
65181834Sroberto * ret-type: unsigned int
66181834Sroberto * ret-desc: The number of bytes consumed processing the escaped character.
67181834Sroberto *
68181834Sroberto * doc:
69181834Sroberto *
70181834Sroberto *  This function converts "t" into "\t" and all your other favorite
71181834Sroberto *  escapes, including numeric ones:  hex and ocatal, too.
72181834Sroberto *  The returned result tells the caller how far to advance the
73181834Sroberto *  scan pointer (passed in).  The default is to just pass through the
74181834Sroberto *  escaped character and advance the scan by one.
75181834Sroberto *
76181834Sroberto *  Some applications need to keep an escaped newline, others need to
77181834Sroberto *  suppress it.  This is accomplished by supplying a '\n' replacement
78181834Sroberto *  character that is different from \n, if need be.  For example, use
79181834Sroberto *  0x7F and never emit a 0x7F.
80181834Sroberto *
81181834Sroberto * err:  @code{NULL} is returned if the string is mal-formed.
82181834Sroberto=*/
83181834Srobertounsigned int
84181834Srobertoao_string_cook_escape_char( char const* pzIn, char* pRes, u_int nl )
85181834Sroberto{
86181834Sroberto    unsigned int  res = 1;
87181834Sroberto
88181834Sroberto    switch (*pRes = *pzIn++) {
89181834Sroberto    case NUL:         /* NUL - end of input string */
90181834Sroberto        return 0;
91181834Sroberto    case '\r':
92181834Sroberto        if (*pzIn != '\n')
93181834Sroberto            return 1;
94181834Sroberto        res++;
95181834Sroberto        /* FALLTHROUGH */
96181834Sroberto    case '\n':        /* NL  - emit newline        */
97181834Sroberto        *pRes = (char)nl;
98181834Sroberto        return res;
99181834Sroberto
100181834Sroberto    case 'a': *pRes = '\a'; break;
101181834Sroberto    case 'b': *pRes = '\b'; break;
102181834Sroberto    case 'f': *pRes = '\f'; break;
103181834Sroberto    case 'n': *pRes = '\n'; break;
104181834Sroberto    case 'r': *pRes = '\r'; break;
105181834Sroberto    case 't': *pRes = '\t'; break;
106181834Sroberto    case 'v': *pRes = '\v'; break;
107181834Sroberto
108181834Sroberto    case 'x':         /* HEX Escape       */
109181834Sroberto        if (isxdigit( (int)*pzIn ))  {
110181834Sroberto            unsigned int  val;
111181834Sroberto            unsigned char ch = *pzIn++;
112181834Sroberto
113181834Sroberto            if ((ch >= 'A') && (ch <= 'F'))
114181834Sroberto                val = 10 + (ch - 'A');
115181834Sroberto            else if ((ch >= 'a') && (ch <= 'f'))
116181834Sroberto                val = 10 + (ch - 'a');
117181834Sroberto            else val = ch - '0';
118181834Sroberto
119181834Sroberto            ch = *pzIn;
120181834Sroberto
121181834Sroberto            if (! isxdigit( ch )) {
122181834Sroberto                *pRes = val;
123181834Sroberto                res   = 2;
124181834Sroberto                break;
125181834Sroberto            }
126181834Sroberto            val <<= 4;
127181834Sroberto            if ((ch >= 'A') && (ch <= 'F'))
128181834Sroberto                val += 10 + (ch - 'A');
129181834Sroberto            else if ((ch >= 'a') && (ch <= 'f'))
130181834Sroberto                val += 10 + (ch - 'a');
131181834Sroberto            else val += ch - '0';
132181834Sroberto
133181834Sroberto            res = 3;
134181834Sroberto            *pRes = val;
135181834Sroberto        }
136181834Sroberto        break;
137181834Sroberto
138181834Sroberto    default:
139181834Sroberto        /*
140181834Sroberto         *  IF the character copied was an octal digit,
141181834Sroberto         *  THEN set the output character to an octal value
142181834Sroberto         */
143181834Sroberto        if (isdigit( (int)*pRes ) && (*pRes < '8'))  {
144181834Sroberto            unsigned int  val = *pRes - '0';
145181834Sroberto            unsigned char ch  = *pzIn++;
146181834Sroberto
147181834Sroberto            /*
148181834Sroberto             *  IF the second character is *not* an octal digit,
149181834Sroberto             *  THEN save the value and bail
150181834Sroberto             */
151181834Sroberto            if ((ch < '0') || (ch > '7')) {
152181834Sroberto                *pRes = val;
153181834Sroberto                break;
154181834Sroberto            }
155181834Sroberto
156181834Sroberto            val = (val<<3) + (ch - '0');
157181834Sroberto            ch  = *pzIn;
158181834Sroberto            res = 2;
159181834Sroberto
160181834Sroberto            /*
161181834Sroberto             *  IF the THIRD character is *not* an octal digit,
162181834Sroberto             *  THEN save the value and bail
163181834Sroberto             */
164181834Sroberto            if ((ch < '0') || (ch > '7')) {
165181834Sroberto                *pRes = val;
166181834Sroberto                break;
167181834Sroberto            }
168181834Sroberto
169181834Sroberto            /*
170181834Sroberto             *  IF the new value would not be too large,
171181834Sroberto             *  THEN add on the third and last character value
172181834Sroberto             */
173181834Sroberto            if ((val<<3) < 0xFF) {
174181834Sroberto                val = (val<<3) + (ch - '0');
175181834Sroberto                res = 3;
176181834Sroberto            }
177181834Sroberto
178181834Sroberto            *pRes = val;
179181834Sroberto            break;
180181834Sroberto        }
181181834Sroberto    }
182181834Sroberto
183181834Sroberto    return res;
184181834Sroberto}
185181834Sroberto
186181834Sroberto
187181834Sroberto/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
188181834Sroberto *
189181834Sroberto *  A quoted string has been found.
190181834Sroberto *  Find the end of it and compress any escape sequences.
191181834Sroberto */
192181834Sroberto/*=export_func  ao_string_cook
193181834Sroberto * private:
194181834Sroberto *
195181834Sroberto * what:  concatenate and escape-process strings
196181834Sroberto * arg:   + char* + pzScan     + The *MODIFIABLE* input buffer +
197181834Sroberto * arg:   + int*  + pLineCt    + The (possibly NULL) pointer to a line count +
198181834Sroberto *
199181834Sroberto * ret-type: char*
200181834Sroberto * ret-desc: The address of the text following the processed strings.
201181834Sroberto *           The return value is NULL if the strings are ill-formed.
202181834Sroberto *
203181834Sroberto * doc:
204181834Sroberto *
205181834Sroberto *  A series of one or more quoted strings are concatenated together.
206181834Sroberto *  If they are quoted with double quotes (@code{"}), then backslash
207181834Sroberto *  escapes are processed per the C programming language.  If they are
208181834Sroberto *  single quote strings, then the backslashes are honored only when they
209181834Sroberto *  precede another backslash or a single quote character.
210181834Sroberto *
211181834Sroberto * err:  @code{NULL} is returned if the string(s) is/are mal-formed.
212181834Sroberto=*/
213181834Srobertochar*
214181834Srobertoao_string_cook( char* pzScan, int* pLineCt )
215181834Sroberto{
216181834Sroberto    int   l = 0;
217181834Sroberto    char  q = *pzScan;
218181834Sroberto
219181834Sroberto    /*
220181834Sroberto     *  It is a quoted string.  Process the escape sequence characters
221181834Sroberto     *  (in the set "abfnrtv") and make sure we find a closing quote.
222181834Sroberto     */
223181834Sroberto    char* pzD = pzScan++;
224181834Sroberto    char* pzS = pzScan;
225181834Sroberto
226181834Sroberto    if (pLineCt == NULL)
227181834Sroberto        pLineCt = &l;
228181834Sroberto
229181834Sroberto    for (;;) {
230181834Sroberto        /*
231181834Sroberto         *  IF the next character is the quote character, THEN we may end the
232181834Sroberto         *  string.  We end it unless the next non-blank character *after* the
233181834Sroberto         *  string happens to also be a quote.  If it is, then we will change
234181834Sroberto         *  our quote character to the new quote character and continue
235181834Sroberto         *  condensing text.
236181834Sroberto         */
237181834Sroberto        while (*pzS == q) {
238181834Sroberto            *pzD = NUL; /* This is probably the end of the line */
239181834Sroberto            pzS++;
240181834Sroberto
241181834Sroberto        scan_for_quote:
242181834Sroberto            while (isspace((int)*pzS))
243181834Sroberto                if (*(pzS++) == '\n')
244181834Sroberto                    (*pLineCt)++;
245181834Sroberto
246181834Sroberto            /*
247181834Sroberto             *  IF the next character is a quote character,
248181834Sroberto             *  THEN we will concatenate the strings.
249181834Sroberto             */
250181834Sroberto            switch (*pzS) {
251181834Sroberto            case '"':
252181834Sroberto            case '\'':
253181834Sroberto                break;
254181834Sroberto
255181834Sroberto            case '/':
256181834Sroberto                /*
257181834Sroberto                 *  Allow for a comment embedded in the concatenated string.
258181834Sroberto                 */
259181834Sroberto                switch (pzS[1]) {
260181834Sroberto                default:  return NULL;
261181834Sroberto                case '/':
262181834Sroberto                    /*
263181834Sroberto                     *  Skip to end of line
264181834Sroberto                     */
265181834Sroberto                    pzS = strchr( pzS, '\n' );
266181834Sroberto                    if (pzS == NULL)
267181834Sroberto                        return NULL;
268181834Sroberto                    (*pLineCt)++;
269181834Sroberto                    break;
270181834Sroberto
271181834Sroberto                case '*':
272181834Sroberto                {
273181834Sroberto                    char* p = strstr( pzS+2, "*/" );
274181834Sroberto                    /*
275181834Sroberto                     *  Skip to terminating star slash
276181834Sroberto                     */
277181834Sroberto                    if (p == NULL)
278181834Sroberto                        return NULL;
279181834Sroberto                    while (pzS < p) {
280181834Sroberto                        if (*(pzS++) == '\n')
281181834Sroberto                            (*pLineCt)++;
282181834Sroberto                    }
283181834Sroberto
284181834Sroberto                    pzS = p + 2;
285181834Sroberto                }
286181834Sroberto                }
287181834Sroberto                goto scan_for_quote;
288181834Sroberto
289181834Sroberto            default:
290181834Sroberto                /*
291181834Sroberto                 *  The next non-whitespace character is not a quote.
292181834Sroberto                 *  The series of quoted strings has come to an end.
293181834Sroberto                 */
294181834Sroberto                return pzS;
295181834Sroberto            }
296181834Sroberto
297181834Sroberto            q = *(pzS++);  /* assign new quote character and advance scan */
298181834Sroberto        }
299181834Sroberto
300181834Sroberto        /*
301181834Sroberto         *  We are inside a quoted string.  Copy text.
302181834Sroberto         */
303181834Sroberto        switch (*(pzD++) = *(pzS++)) {
304181834Sroberto        case NUL:
305181834Sroberto            return NULL;
306181834Sroberto
307181834Sroberto        case '\n':
308181834Sroberto            (*pLineCt)++;
309181834Sroberto            break;
310181834Sroberto
311181834Sroberto        case '\\':
312181834Sroberto            /*
313181834Sroberto             *  IF we are escaping a new line,
314181834Sroberto             *  THEN drop both the escape and the newline from
315181834Sroberto             *       the result string.
316181834Sroberto             */
317181834Sroberto            if (*pzS == '\n') {
318181834Sroberto                pzS++;
319181834Sroberto                pzD--;
320181834Sroberto                (*pLineCt)++;
321181834Sroberto            }
322181834Sroberto
323181834Sroberto            /*
324181834Sroberto             *  ELSE IF the quote character is '"' or '`',
325181834Sroberto             *  THEN we do the full escape character processing
326181834Sroberto             */
327181834Sroberto            else if (q != '\'') {
328181834Sroberto                int ct = ao_string_cook_escape_char( pzS, pzD-1, (u_int)'\n' );
329181834Sroberto                if (ct == 0)
330181834Sroberto                    return NULL;
331181834Sroberto
332181834Sroberto                pzS += ct;
333181834Sroberto            }     /* if (q != '\'')                  */
334181834Sroberto
335181834Sroberto            /*
336181834Sroberto             *  OTHERWISE, we only process "\\", "\'" and "\#" sequences.
337181834Sroberto             *  The latter only to easily hide preprocessing directives.
338181834Sroberto             */
339181834Sroberto            else switch (*pzS) {
340181834Sroberto            case '\\':
341181834Sroberto            case '\'':
342181834Sroberto            case '#':
343181834Sroberto                pzD[-1] = *pzS++;
344181834Sroberto            }
345181834Sroberto        }     /* switch (*(pzD++) = *(pzS++))    */
346181834Sroberto    }         /* for (;;)                        */
347181834Sroberto}
348181834Sroberto/*
349181834Sroberto * Local Variables:
350181834Sroberto * mode: C
351181834Sroberto * c-file-style: "stroustrup"
352181834Sroberto * indent-tabs-mode: nil
353181834Sroberto * End:
354181834Sroberto * end of autoopts/cook.c */
355