1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr.h"
18#include "apr_strings.h"
19
20#define APR_WANT_STDIO          /* for sprintf() */
21#define APR_WANT_STRFUNC
22#include "apr_want.h"
23
24#include "apr_xml.h"
25
26#include "apu_config.h"
27
28#if defined(HAVE_XMLPARSE_XMLPARSE_H)
29#include <xmlparse/xmlparse.h>
30#elif defined(HAVE_XMLTOK_XMLPARSE_H)
31#include <xmltok/xmlparse.h>
32#elif defined(HAVE_XML_XMLPARSE_H)
33#include <xml/xmlparse.h>
34#else
35#include <expat.h>
36#endif
37
38#define DEBUG_CR "\r\n"
39
40static const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
41static const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
42static const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
43
44/* errors related to namespace processing */
45#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
46#define APR_XML_NS_ERROR_INVALID_DECL (-1001)
47
48/* test for a namespace prefix that begins with [Xx][Mm][Ll] */
49#define APR_XML_NS_IS_RESERVED(name) \
50	( (name[0] == 0x58 || name[0] == 0x78) && \
51	  (name[1] == 0x4D || name[1] == 0x6D) && \
52	  (name[2] == 0x4C || name[2] == 0x6C) )
53
54
55/* the real (internal) definition of the parser context */
56struct apr_xml_parser {
57    apr_xml_doc *doc;		/* the doc we're parsing */
58    apr_pool_t *p;		/* the pool we allocate from */
59    apr_xml_elem *cur_elem;	/* current element */
60
61    int error;			/* an error has occurred */
62#define APR_XML_ERROR_EXPAT             1
63#define APR_XML_ERROR_PARSE_DONE        2
64/* also: public APR_XML_NS_ERROR_* values (if any) */
65
66    XML_Parser xp;              /* the actual (Expat) XML parser */
67    enum XML_Error xp_err;      /* stored Expat error code */
68};
69
70/* struct for scoping namespace declarations */
71typedef struct apr_xml_ns_scope {
72    const char *prefix;		/* prefix used for this ns */
73    int ns;			/* index into namespace table */
74    int emptyURI;		/* the namespace URI is the empty string */
75    struct apr_xml_ns_scope *next;	/* next scoped namespace */
76} apr_xml_ns_scope;
77
78
79/* return namespace table index for a given prefix */
80static int find_prefix(apr_xml_parser *parser, const char *prefix)
81{
82    apr_xml_elem *elem = parser->cur_elem;
83
84    /*
85    ** Walk up the tree, looking for a namespace scope that defines this
86    ** prefix.
87    */
88    for (; elem; elem = elem->parent) {
89	apr_xml_ns_scope *ns_scope = elem->ns_scope;
90
91	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
92	    if (strcmp(prefix, ns_scope->prefix) == 0) {
93		if (ns_scope->emptyURI) {
94		    /*
95		    ** It is possible to set the default namespace to an
96		    ** empty URI string; this resets the default namespace
97		    ** to mean "no namespace." We just found the prefix
98		    ** refers to an empty URI, so return "no namespace."
99		    */
100		    return APR_XML_NS_NONE;
101		}
102
103		return ns_scope->ns;
104	    }
105	}
106    }
107
108    /*
109     * If the prefix is empty (""), this means that a prefix was not
110     * specified in the element/attribute. The search that was performed
111     * just above did not locate a default namespace URI (which is stored
112     * into ns_scope with an empty prefix). This means the element/attribute
113     * has "no namespace". We have a reserved value for this.
114     */
115    if (*prefix == '\0') {
116	return APR_XML_NS_NONE;
117    }
118
119    /* not found */
120    return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
121}
122
123static void start_handler(void *userdata, const char *name, const char **attrs)
124{
125    apr_xml_parser *parser = userdata;
126    apr_xml_elem *elem;
127    apr_xml_attr *attr;
128    apr_xml_attr *prev;
129    char *colon;
130    const char *quoted;
131    char *elem_name;
132
133    /* punt once we find an error */
134    if (parser->error)
135	return;
136
137    elem = apr_pcalloc(parser->p, sizeof(*elem));
138
139    /* prep the element */
140    elem->name = elem_name = apr_pstrdup(parser->p, name);
141
142    /* fill in the attributes (note: ends up in reverse order) */
143    while (*attrs) {
144	attr = apr_palloc(parser->p, sizeof(*attr));
145	attr->name = apr_pstrdup(parser->p, *attrs++);
146	attr->value = apr_pstrdup(parser->p, *attrs++);
147	attr->next = elem->attr;
148	elem->attr = attr;
149    }
150
151    /* hook the element into the tree */
152    if (parser->cur_elem == NULL) {
153	/* no current element; this also becomes the root */
154	parser->cur_elem = parser->doc->root = elem;
155    }
156    else {
157	/* this element appeared within the current elem */
158	elem->parent = parser->cur_elem;
159
160	/* set up the child/sibling links */
161	if (elem->parent->last_child == NULL) {
162	    /* no first child either */
163	    elem->parent->first_child = elem->parent->last_child = elem;
164	}
165	else {
166	    /* hook onto the end of the parent's children */
167	    elem->parent->last_child->next = elem;
168	    elem->parent->last_child = elem;
169	}
170
171	/* this element is now the current element */
172	parser->cur_elem = elem;
173    }
174
175    /* scan the attributes for namespace declarations */
176    for (prev = NULL, attr = elem->attr;
177	 attr;
178	 attr = attr->next) {
179	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
180	    const char *prefix = &attr->name[5];
181	    apr_xml_ns_scope *ns_scope;
182
183	    /* test for xmlns:foo= form and xmlns= form */
184	    if (*prefix == 0x3A) {
185                /* a namespace prefix declaration must have a
186                   non-empty value. */
187                if (attr->value[0] == '\0') {
188                    parser->error = APR_XML_NS_ERROR_INVALID_DECL;
189                    return;
190                }
191		++prefix;
192            }
193	    else if (*prefix != '\0') {
194		/* advance "prev" since "attr" is still present */
195		prev = attr;
196		continue;
197	    }
198
199	    /* quote the URI before we ever start working with it */
200	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
201
202	    /* build and insert the new scope */
203	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
204	    ns_scope->prefix = prefix;
205	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
206	    ns_scope->emptyURI = *quoted == '\0';
207	    ns_scope->next = elem->ns_scope;
208	    elem->ns_scope = ns_scope;
209
210	    /* remove this attribute from the element */
211	    if (prev == NULL)
212		elem->attr = attr->next;
213	    else
214		prev->next = attr->next;
215
216	    /* Note: prev will not be advanced since we just removed "attr" */
217	}
218	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
219	    /* save away the language (in quoted form) */
220	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
221
222	    /* remove this attribute from the element */
223	    if (prev == NULL)
224		elem->attr = attr->next;
225	    else
226		prev->next = attr->next;
227
228	    /* Note: prev will not be advanced since we just removed "attr" */
229	}
230	else {
231	    /* advance "prev" since "attr" is still present */
232	    prev = attr;
233	}
234    }
235
236    /*
237    ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
238    ** language from the parent element (if present).
239    **
240    ** NOTE: elem_size() *depends* upon this pointer equality.
241    */
242    if (elem->lang == NULL && elem->parent != NULL)
243	elem->lang = elem->parent->lang;
244
245    /* adjust the element's namespace */
246    colon = strchr(elem_name, 0x3A);
247    if (colon == NULL) {
248	/*
249	 * The element is using the default namespace, which will always
250	 * be found. Either it will be "no namespace", or a default
251	 * namespace URI has been specified at some point.
252	 */
253	elem->ns = find_prefix(parser, "");
254    }
255    else if (APR_XML_NS_IS_RESERVED(elem->name)) {
256	elem->ns = APR_XML_NS_NONE;
257    }
258    else {
259	*colon = '\0';
260	elem->ns = find_prefix(parser, elem->name);
261	elem->name = colon + 1;
262
263	if (APR_XML_NS_IS_ERROR(elem->ns)) {
264	    parser->error = elem->ns;
265	    return;
266	}
267    }
268
269    /* adjust all remaining attributes' namespaces */
270    for (attr = elem->attr; attr; attr = attr->next) {
271        /*
272         * apr_xml_attr defines this as "const" but we dup'd it, so we
273         * know that we can change it. a bit hacky, but the existing
274         * structure def is best.
275         */
276        char *attr_name = (char *)attr->name;
277
278	colon = strchr(attr_name, 0x3A);
279	if (colon == NULL) {
280	    /*
281	     * Attributes do NOT use the default namespace. Therefore,
282	     * we place them into the "no namespace" category.
283	     */
284	    attr->ns = APR_XML_NS_NONE;
285	}
286	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
287	    attr->ns = APR_XML_NS_NONE;
288	}
289	else {
290	    *colon = '\0';
291	    attr->ns = find_prefix(parser, attr->name);
292	    attr->name = colon + 1;
293
294	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
295		parser->error = attr->ns;
296		return;
297	    }
298	}
299    }
300}
301
302static void end_handler(void *userdata, const char *name)
303{
304    apr_xml_parser *parser = userdata;
305
306    /* punt once we find an error */
307    if (parser->error)
308	return;
309
310    /* pop up one level */
311    parser->cur_elem = parser->cur_elem->parent;
312}
313
314static void cdata_handler(void *userdata, const char *data, int len)
315{
316    apr_xml_parser *parser = userdata;
317    apr_xml_elem *elem;
318    apr_text_header *hdr;
319    const char *s;
320
321    /* punt once we find an error */
322    if (parser->error)
323	return;
324
325    elem = parser->cur_elem;
326    s = apr_pstrndup(parser->p, data, len);
327
328    if (elem->last_child == NULL) {
329	/* no children yet. this cdata follows the start tag */
330	hdr = &elem->first_cdata;
331    }
332    else {
333	/* child elements exist. this cdata follows the last child. */
334	hdr = &elem->last_child->following_cdata;
335    }
336
337    apr_text_append(parser->p, hdr, s);
338}
339
340static apr_status_t cleanup_parser(void *ctx)
341{
342    apr_xml_parser *parser = ctx;
343
344    XML_ParserFree(parser->xp);
345    parser->xp = NULL;
346
347    return APR_SUCCESS;
348}
349
350#if XML_MAJOR_VERSION > 1
351/* Stop the parser if an entity declaration is hit. */
352static void entity_declaration(void *userData, const XML_Char *entityName,
353                               int is_parameter_entity, const XML_Char *value,
354                               int value_length, const XML_Char *base,
355                               const XML_Char *systemId, const XML_Char *publicId,
356                               const XML_Char *notationName)
357{
358    apr_xml_parser *parser = userData;
359
360    XML_StopParser(parser->xp, XML_FALSE);
361}
362#else
363/* A noop default_handler. */
364static void default_handler(void *userData, const XML_Char *s, int len)
365{
366}
367#endif
368
369APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
370{
371    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
372
373    parser->p = pool;
374    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
375
376    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
377
378    /* ### is there a way to avoid hard-coding this? */
379    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
380
381    parser->xp = XML_ParserCreate(NULL);
382    if (parser->xp == NULL) {
383        (*apr_pool_abort_get(pool))(APR_ENOMEM);
384        return NULL;
385    }
386
387    apr_pool_cleanup_register(pool, parser, cleanup_parser,
388                              apr_pool_cleanup_null);
389
390    XML_SetUserData(parser->xp, parser);
391    XML_SetElementHandler(parser->xp, start_handler, end_handler);
392    XML_SetCharacterDataHandler(parser->xp, cdata_handler);
393
394    /* Prevent the "billion laughs" attack against expat by disabling
395     * internal entity expansion.  With 2.x, forcibly stop the parser
396     * if an entity is declared - this is safer and a more obvious
397     * failure mode.  With older versions, installing a noop
398     * DefaultHandler means that internal entities will be expanded as
399     * the empty string, which is also sufficient to prevent the
400     * attack. */
401#if XML_MAJOR_VERSION > 1
402    XML_SetEntityDeclHandler(parser->xp, entity_declaration);
403#else
404    XML_SetDefaultHandler(parser->xp, default_handler);
405#endif
406
407    return parser;
408}
409
410static apr_status_t do_parse(apr_xml_parser *parser,
411                             const char *data, apr_size_t len,
412                             int is_final)
413{
414    if (parser->xp == NULL) {
415        parser->error = APR_XML_ERROR_PARSE_DONE;
416    }
417    else {
418        int rv = XML_Parse(parser->xp, data, (int)len, is_final);
419
420        if (rv == 0) {
421            parser->error = APR_XML_ERROR_EXPAT;
422            parser->xp_err = XML_GetErrorCode(parser->xp);
423        }
424    }
425
426    /* ### better error code? */
427    return parser->error ? APR_EGENERAL : APR_SUCCESS;
428}
429
430APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
431                                              const char *data,
432                                              apr_size_t len)
433{
434    return do_parse(parser, data, len, 0 /* is_final */);
435}
436
437APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
438                                              apr_xml_doc **pdoc)
439{
440    char end;
441    apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
442
443    /* get rid of the parser */
444    (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
445
446    if (status)
447        return status;
448
449    if (pdoc != NULL)
450        *pdoc = parser->doc;
451    return APR_SUCCESS;
452}
453
454APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
455                                            char *errbuf,
456                                            apr_size_t errbufsize)
457{
458    int error = parser->error;
459    const char *msg;
460
461    /* clear our record of an error */
462    parser->error = 0;
463
464    switch (error) {
465    case 0:
466        msg = "No error.";
467        break;
468
469    case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
470        msg = "An undefined namespace prefix was used.";
471        break;
472
473    case APR_XML_NS_ERROR_INVALID_DECL:
474        msg = "A namespace prefix was defined with an empty URI.";
475        break;
476
477    case APR_XML_ERROR_EXPAT:
478        (void) apr_snprintf(errbuf, errbufsize,
479                            "XML parser error code: %s (%d)",
480                            XML_ErrorString(parser->xp_err), parser->xp_err);
481        return errbuf;
482
483    case APR_XML_ERROR_PARSE_DONE:
484        msg = "The parser is not active.";
485        break;
486
487    default:
488        msg = "There was an unknown error within the XML body.";
489        break;
490    }
491
492    (void) apr_cpystrn(errbuf, msg, errbufsize);
493    return errbuf;
494}
495
496APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
497                                             apr_xml_parser **parser,
498                                             apr_xml_doc **ppdoc,
499                                             apr_file_t *xmlfd,
500                                             apr_size_t buffer_length)
501{
502    apr_status_t rv;
503    char *buffer;
504    apr_size_t length;
505
506    *parser = apr_xml_parser_create(p);
507    if (*parser == NULL) {
508        /* FIXME: returning an error code would be nice,
509         * but we dont get one ;( */
510        return APR_EGENERAL;
511    }
512    buffer = apr_palloc(p, buffer_length);
513    length = buffer_length;
514
515    rv = apr_file_read(xmlfd, buffer, &length);
516
517    while (rv == APR_SUCCESS) {
518        rv = apr_xml_parser_feed(*parser, buffer, length);
519        if (rv != APR_SUCCESS) {
520            return rv;
521        }
522
523        length = buffer_length;
524        rv = apr_file_read(xmlfd, buffer, &length);
525    }
526    if (rv != APR_EOF) {
527        return rv;
528    }
529    rv = apr_xml_parser_done(*parser, ppdoc);
530    *parser = NULL;
531    return rv;
532}
533
534APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
535                                  const char *text)
536{
537    apr_text *t = apr_palloc(p, sizeof(*t));
538
539    t->text = text;
540    t->next = NULL;
541
542    if (hdr->first == NULL) {
543	/* no text elements yet */
544	hdr->first = hdr->last = t;
545    }
546    else {
547	/* append to the last text element */
548	hdr->last->next = t;
549	hdr->last = t;
550    }
551}
552
553
554/* ---------------------------------------------------------------
555**
556** XML UTILITY FUNCTIONS
557*/
558
559/*
560** apr_xml_quote_string: quote an XML string
561**
562** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
563** If quotes is true, then replace '"' with '&quot;'.
564**
565** quotes is typically set to true for XML strings that will occur within
566** double quotes -- attribute values.
567*/
568APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
569                                               int quotes)
570{
571    const char *scan;
572    apr_size_t len = 0;
573    apr_size_t extra = 0;
574    char *qstr;
575    char *qscan;
576    char c;
577
578    for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
579	if (c == '<' || c == '>')
580	    extra += 3;		/* &lt; or &gt; */
581	else if (c == '&')
582	    extra += 4;		/* &amp; */
583	else if (quotes && c == '"')
584	    extra += 5;		/* &quot; */
585    }
586
587    /* nothing to do? */
588    if (extra == 0)
589	return s;
590
591    qstr = apr_palloc(p, len + extra + 1);
592    for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
593	if (c == '<') {
594	    *qscan++ = '&';
595	    *qscan++ = 'l';
596	    *qscan++ = 't';
597	    *qscan++ = ';';
598	}
599	else if (c == '>') {
600	    *qscan++ = '&';
601	    *qscan++ = 'g';
602	    *qscan++ = 't';
603	    *qscan++ = ';';
604	}
605	else if (c == '&') {
606	    *qscan++ = '&';
607	    *qscan++ = 'a';
608	    *qscan++ = 'm';
609	    *qscan++ = 'p';
610	    *qscan++ = ';';
611	}
612	else if (quotes && c == '"') {
613	    *qscan++ = '&';
614	    *qscan++ = 'q';
615	    *qscan++ = 'u';
616	    *qscan++ = 'o';
617	    *qscan++ = 't';
618	    *qscan++ = ';';
619	}
620	else {
621	    *qscan++ = c;
622	}
623    }
624
625    *qscan = '\0';
626    return qstr;
627}
628
629/* how many characters for the given integer? */
630#define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
631                            (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
632                            (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
633                            (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
634
635static apr_size_t text_size(const apr_text *t)
636{
637    apr_size_t size = 0;
638
639    for (; t; t = t->next)
640	size += strlen(t->text);
641    return size;
642}
643
644static apr_size_t elem_size(const apr_xml_elem *elem, int style,
645                            apr_array_header_t *namespaces, int *ns_map)
646{
647    apr_size_t size;
648
649    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
650	const apr_xml_attr *attr;
651
652	size = 0;
653
654	if (style == APR_XML_X2T_FULL_NS_LANG) {
655	    int i;
656
657	    /*
658	    ** The outer element will contain xmlns:ns%d="%s" attributes
659	    ** and an xml:lang attribute, if applicable.
660	    */
661
662	    for (i = namespaces->nelts; i--;) {
663		/* compute size of: ' xmlns:ns%d="%s"' */
664		size += (9 + APR_XML_NS_LEN(i) + 2 +
665			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
666	    }
667
668	    if (elem->lang != NULL) {
669		/* compute size of: ' xml:lang="%s"' */
670		size += 11 + strlen(elem->lang) + 1;
671	    }
672	}
673
674	if (elem->ns == APR_XML_NS_NONE) {
675	    /* compute size of: <%s> */
676	    size += 1 + strlen(elem->name) + 1;
677	}
678	else {
679	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
680
681	    /* compute size of: <ns%d:%s> */
682	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
683	}
684
685	if (APR_XML_ELEM_IS_EMPTY(elem)) {
686	    /* insert a closing "/" */
687	    size += 1;
688	}
689	else {
690	    /*
691	     * two of above plus "/":
692	     *     <ns%d:%s> ... </ns%d:%s>
693	     * OR  <%s> ... </%s>
694	     */
695	    size = 2 * size + 1;
696	}
697
698	for (attr = elem->attr; attr; attr = attr->next) {
699	    if (attr->ns == APR_XML_NS_NONE) {
700		/* compute size of: ' %s="%s"' */
701		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
702	    }
703	    else {
704		/* compute size of: ' ns%d:%s="%s"' */
705                int ns = ns_map ? ns_map[attr->ns] : attr->ns;
706                size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
707	    }
708	}
709
710	/*
711	** If the element has an xml:lang value that is *different* from
712	** its parent, then add the thing in: ' xml:lang="%s"'.
713	**
714	** NOTE: we take advantage of the pointer equality established by
715	** the parsing for "inheriting" the xml:lang values from parents.
716	*/
717	if (elem->lang != NULL &&
718	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
719	    size += 11 + strlen(elem->lang) + 1;
720	}
721    }
722    else if (style == APR_XML_X2T_LANG_INNER) {
723	/*
724	 * This style prepends the xml:lang value plus a null terminator.
725	 * If a lang value is not present, then we insert a null term.
726	 */
727	size = elem->lang ? strlen(elem->lang) + 1 : 1;
728    }
729    else
730	size = 0;
731
732    size += text_size(elem->first_cdata.first);
733
734    for (elem = elem->first_child; elem; elem = elem->next) {
735	/* the size of the child element plus the CDATA that follows it */
736	size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
737		 text_size(elem->following_cdata.first));
738    }
739
740    return size;
741}
742
743static char *write_text(char *s, const apr_text *t)
744{
745    for (; t; t = t->next) {
746	apr_size_t len = strlen(t->text);
747	memcpy(s, t->text, len);
748	s += len;
749    }
750    return s;
751}
752
753static char *write_elem(char *s, const apr_xml_elem *elem, int style,
754			apr_array_header_t *namespaces, int *ns_map)
755{
756    const apr_xml_elem *child;
757    apr_size_t len;
758    int ns;
759
760    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
761	int empty = APR_XML_ELEM_IS_EMPTY(elem);
762	const apr_xml_attr *attr;
763
764	if (elem->ns == APR_XML_NS_NONE) {
765	    len = sprintf(s, "<%s", elem->name);
766	}
767	else {
768	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
769	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
770	}
771	s += len;
772
773	for (attr = elem->attr; attr; attr = attr->next) {
774	    if (attr->ns == APR_XML_NS_NONE)
775		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
776            else {
777                ns = ns_map ? ns_map[attr->ns] : attr->ns;
778                len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
779            }
780	    s += len;
781	}
782
783	/* add the xml:lang value if necessary */
784	if (elem->lang != NULL &&
785	    (style == APR_XML_X2T_FULL_NS_LANG ||
786	     elem->parent == NULL ||
787	     elem->lang != elem->parent->lang)) {
788	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
789	    s += len;
790	}
791
792	/* add namespace definitions, if required */
793	if (style == APR_XML_X2T_FULL_NS_LANG) {
794	    int i;
795
796	    for (i = namespaces->nelts; i--;) {
797		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
798			      APR_XML_GET_URI_ITEM(namespaces, i));
799		s += len;
800	    }
801	}
802
803	/* no more to do. close it up and go. */
804	if (empty) {
805	    *s++ = '/';
806	    *s++ = '>';
807	    return s;
808	}
809
810	/* just close it */
811	*s++ = '>';
812    }
813    else if (style == APR_XML_X2T_LANG_INNER) {
814	/* prepend the xml:lang value */
815	if (elem->lang != NULL) {
816	    len = strlen(elem->lang);
817	    memcpy(s, elem->lang, len);
818	    s += len;
819	}
820	*s++ = '\0';
821    }
822
823    s = write_text(s, elem->first_cdata.first);
824
825    for (child = elem->first_child; child; child = child->next) {
826	s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
827	s = write_text(s, child->following_cdata.first);
828    }
829
830    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
831	if (elem->ns == APR_XML_NS_NONE) {
832	    len = sprintf(s, "</%s>", elem->name);
833	}
834	else {
835	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
836	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
837	}
838	s += len;
839    }
840
841    return s;
842}
843
844APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
845{
846    apr_text *scan_txt;
847    apr_xml_attr *scan_attr;
848    apr_xml_elem *scan_elem;
849
850    /* convert the element's text */
851    for (scan_txt = elem->first_cdata.first;
852	 scan_txt != NULL;
853	 scan_txt = scan_txt->next) {
854	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
855    }
856    for (scan_txt = elem->following_cdata.first;
857	 scan_txt != NULL;
858	 scan_txt = scan_txt->next) {
859	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
860    }
861
862    /* convert the attribute values */
863    for (scan_attr = elem->attr;
864	 scan_attr != NULL;
865	 scan_attr = scan_attr->next) {
866	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
867    }
868
869    /* convert the child elements */
870    for (scan_elem = elem->first_child;
871	 scan_elem != NULL;
872	 scan_elem = scan_elem->next) {
873	apr_xml_quote_elem(p, scan_elem);
874    }
875}
876
877/* convert an element to a text string */
878APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
879                                  int style, apr_array_header_t *namespaces,
880                                  int *ns_map, const char **pbuf,
881                                  apr_size_t *psize)
882{
883    /* get the exact size, plus a null terminator */
884    apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
885    char *s = apr_palloc(p, size);
886
887    (void) write_elem(s, elem, style, namespaces, ns_map);
888    s[size - 1] = '\0';
889
890    *pbuf = s;
891    if (psize)
892	*psize = size;
893}
894
895APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
896                                             const apr_xml_elem *elem)
897{
898    if (elem->ns == APR_XML_NS_NONE) {
899	/*
900	 * The prefix (xml...) is already within the prop name, or
901	 * the element simply has no prefix.
902	 */
903	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
904    }
905
906    return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
907}
908
909/* return the URI's (existing) index, or insert it and return a new index */
910APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
911                                    const char *uri)
912{
913    int i;
914    const char **pelt;
915
916    /* never insert an empty URI; this index is always APR_XML_NS_NONE */
917    if (*uri == '\0')
918        return APR_XML_NS_NONE;
919
920    for (i = uri_array->nelts; i--;) {
921	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
922	    return i;
923    }
924
925    pelt = apr_array_push(uri_array);
926    *pelt = uri;		/* assume uri is const or in a pool */
927    return uri_array->nelts - 1;
928}
929
930/* convert the element to EBCDIC */
931#if APR_CHARSET_EBCDIC
932static apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
933                                                apr_xlate_t *convset)
934{
935    apr_xml_attr *a;
936    apr_xml_elem *ec;
937    apr_text *t;
938    apr_size_t inbytes_left, outbytes_left;
939    apr_status_t status;
940
941    inbytes_left = outbytes_left = strlen(e->name);
942    status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
943    if (status) {
944        return status;
945    }
946
947    for (t = e->first_cdata.first; t != NULL; t = t->next) {
948        inbytes_left = outbytes_left = strlen(t->text);
949        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
950        if (status) {
951            return status;
952        }
953    }
954
955    for (t = e->following_cdata.first;  t != NULL; t = t->next) {
956        inbytes_left = outbytes_left = strlen(t->text);
957        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
958        if (status) {
959            return status;
960        }
961    }
962
963    for (a = e->attr; a != NULL; a = a->next) {
964        inbytes_left = outbytes_left = strlen(a->name);
965        status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
966        if (status) {
967            return status;
968        }
969        inbytes_left = outbytes_left = strlen(a->value);
970        status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
971        if (status) {
972            return status;
973        }
974    }
975
976    for (ec = e->first_child; ec != NULL; ec = ec->next) {
977        status = apr_xml_parser_convert_elem(ec, convset);
978        if (status) {
979            return status;
980        }
981    }
982    return APR_SUCCESS;
983}
984
985/* convert the whole document to EBCDIC */
986APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
987                                                     apr_xml_doc *pdoc,
988                                                     apr_xlate_t *convset)
989{
990    apr_status_t status;
991    /* Don't convert the namespaces: they are constant! */
992    if (pdoc->namespaces != NULL) {
993        int i;
994        apr_array_header_t *namespaces;
995        namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
996        if (namespaces == NULL)
997            return APR_ENOMEM;
998        for (i = 0; i < pdoc->namespaces->nelts; i++) {
999            apr_size_t inbytes_left, outbytes_left;
1000            char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
1001            ptr = apr_pstrdup(pool, ptr);
1002            if ( ptr == NULL)
1003                return APR_ENOMEM;
1004            inbytes_left = outbytes_left = strlen(ptr);
1005            status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
1006            if (status) {
1007                return status;
1008            }
1009            apr_xml_insert_uri(namespaces, ptr);
1010        }
1011        pdoc->namespaces = namespaces;
1012    }
1013    return apr_xml_parser_convert_elem(pdoc->root, convset);
1014}
1015#endif
1016