1251876Speter/* Licensed to the Apache Software Foundation (ASF) under one or more
2251876Speter * contributor license agreements.  See the NOTICE file distributed with
3251876Speter * this work for additional information regarding copyright ownership.
4251876Speter * The ASF licenses this file to You under the Apache License, Version 2.0
5251876Speter * (the "License"); you may not use this file except in compliance with
6251876Speter * the License.  You may obtain a copy of the License at
7251876Speter *
8251876Speter *     http://www.apache.org/licenses/LICENSE-2.0
9251876Speter *
10251876Speter * Unless required by applicable law or agreed to in writing, software
11251876Speter * distributed under the License is distributed on an "AS IS" BASIS,
12251876Speter * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13251876Speter * See the License for the specific language governing permissions and
14251876Speter * limitations under the License.
15251876Speter */
16251876Speter
17251876Speter#include "apr.h"
18251876Speter#include "apr_strings.h"
19251876Speter
20251876Speter#define APR_WANT_STDIO          /* for sprintf() */
21251876Speter#define APR_WANT_STRFUNC
22251876Speter#include "apr_want.h"
23251876Speter
24251876Speter#include "apr_xml.h"
25251876Speter
26251876Speter#include "apu_config.h"
27251876Speter
28251876Speter#if defined(HAVE_XMLPARSE_XMLPARSE_H)
29251876Speter#include <xmlparse/xmlparse.h>
30251876Speter#elif defined(HAVE_XMLTOK_XMLPARSE_H)
31251876Speter#include <xmltok/xmlparse.h>
32251876Speter#elif defined(HAVE_XML_XMLPARSE_H)
33251876Speter#include <xml/xmlparse.h>
34251876Speter#else
35251876Speter#include <expat.h>
36251876Speter#endif
37251876Speter
38251876Speter#define DEBUG_CR "\r\n"
39251876Speter
40251876Speterstatic const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' };
41251876Speterstatic const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' };
42251876Speterstatic const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' };
43251876Speter
44251876Speter/* errors related to namespace processing */
45251876Speter#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
46251876Speter#define APR_XML_NS_ERROR_INVALID_DECL (-1001)
47251876Speter
48251876Speter/* test for a namespace prefix that begins with [Xx][Mm][Ll] */
49251876Speter#define APR_XML_NS_IS_RESERVED(name) \
50251876Speter	( (name[0] == 0x58 || name[0] == 0x78) && \
51251876Speter	  (name[1] == 0x4D || name[1] == 0x6D) && \
52251876Speter	  (name[2] == 0x4C || name[2] == 0x6C) )
53251876Speter
54251876Speter
55251876Speter/* the real (internal) definition of the parser context */
56251876Speterstruct apr_xml_parser {
57251876Speter    apr_xml_doc *doc;		/* the doc we're parsing */
58251876Speter    apr_pool_t *p;		/* the pool we allocate from */
59251876Speter    apr_xml_elem *cur_elem;	/* current element */
60251876Speter
61251876Speter    int error;			/* an error has occurred */
62251876Speter#define APR_XML_ERROR_EXPAT             1
63251876Speter#define APR_XML_ERROR_PARSE_DONE        2
64251876Speter/* also: public APR_XML_NS_ERROR_* values (if any) */
65251876Speter
66251876Speter    XML_Parser xp;              /* the actual (Expat) XML parser */
67251876Speter    enum XML_Error xp_err;      /* stored Expat error code */
68251876Speter};
69251876Speter
70251876Speter/* struct for scoping namespace declarations */
71251876Spetertypedef struct apr_xml_ns_scope {
72251876Speter    const char *prefix;		/* prefix used for this ns */
73251876Speter    int ns;			/* index into namespace table */
74251876Speter    int emptyURI;		/* the namespace URI is the empty string */
75251876Speter    struct apr_xml_ns_scope *next;	/* next scoped namespace */
76251876Speter} apr_xml_ns_scope;
77251876Speter
78251876Speter
79251876Speter/* return namespace table index for a given prefix */
80251876Speterstatic int find_prefix(apr_xml_parser *parser, const char *prefix)
81251876Speter{
82251876Speter    apr_xml_elem *elem = parser->cur_elem;
83251876Speter
84251876Speter    /*
85251876Speter    ** Walk up the tree, looking for a namespace scope that defines this
86251876Speter    ** prefix.
87251876Speter    */
88251876Speter    for (; elem; elem = elem->parent) {
89251876Speter	apr_xml_ns_scope *ns_scope = elem->ns_scope;
90251876Speter
91251876Speter	for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
92251876Speter	    if (strcmp(prefix, ns_scope->prefix) == 0) {
93251876Speter		if (ns_scope->emptyURI) {
94251876Speter		    /*
95251876Speter		    ** It is possible to set the default namespace to an
96251876Speter		    ** empty URI string; this resets the default namespace
97251876Speter		    ** to mean "no namespace." We just found the prefix
98251876Speter		    ** refers to an empty URI, so return "no namespace."
99251876Speter		    */
100251876Speter		    return APR_XML_NS_NONE;
101251876Speter		}
102251876Speter
103251876Speter		return ns_scope->ns;
104251876Speter	    }
105251876Speter	}
106251876Speter    }
107251876Speter
108251876Speter    /*
109251876Speter     * If the prefix is empty (""), this means that a prefix was not
110251876Speter     * specified in the element/attribute. The search that was performed
111251876Speter     * just above did not locate a default namespace URI (which is stored
112251876Speter     * into ns_scope with an empty prefix). This means the element/attribute
113251876Speter     * has "no namespace". We have a reserved value for this.
114251876Speter     */
115251876Speter    if (*prefix == '\0') {
116251876Speter	return APR_XML_NS_NONE;
117251876Speter    }
118251876Speter
119251876Speter    /* not found */
120251876Speter    return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
121251876Speter}
122251876Speter
123251876Speterstatic void start_handler(void *userdata, const char *name, const char **attrs)
124251876Speter{
125251876Speter    apr_xml_parser *parser = userdata;
126251876Speter    apr_xml_elem *elem;
127251876Speter    apr_xml_attr *attr;
128251876Speter    apr_xml_attr *prev;
129251876Speter    char *colon;
130251876Speter    const char *quoted;
131251876Speter    char *elem_name;
132251876Speter
133251876Speter    /* punt once we find an error */
134251876Speter    if (parser->error)
135251876Speter	return;
136251876Speter
137251876Speter    elem = apr_pcalloc(parser->p, sizeof(*elem));
138251876Speter
139251876Speter    /* prep the element */
140251876Speter    elem->name = elem_name = apr_pstrdup(parser->p, name);
141251876Speter
142251876Speter    /* fill in the attributes (note: ends up in reverse order) */
143251876Speter    while (*attrs) {
144251876Speter	attr = apr_palloc(parser->p, sizeof(*attr));
145251876Speter	attr->name = apr_pstrdup(parser->p, *attrs++);
146251876Speter	attr->value = apr_pstrdup(parser->p, *attrs++);
147251876Speter	attr->next = elem->attr;
148251876Speter	elem->attr = attr;
149251876Speter    }
150251876Speter
151251876Speter    /* hook the element into the tree */
152251876Speter    if (parser->cur_elem == NULL) {
153251876Speter	/* no current element; this also becomes the root */
154251876Speter	parser->cur_elem = parser->doc->root = elem;
155251876Speter    }
156251876Speter    else {
157251876Speter	/* this element appeared within the current elem */
158251876Speter	elem->parent = parser->cur_elem;
159251876Speter
160251876Speter	/* set up the child/sibling links */
161251876Speter	if (elem->parent->last_child == NULL) {
162251876Speter	    /* no first child either */
163251876Speter	    elem->parent->first_child = elem->parent->last_child = elem;
164251876Speter	}
165251876Speter	else {
166251876Speter	    /* hook onto the end of the parent's children */
167251876Speter	    elem->parent->last_child->next = elem;
168251876Speter	    elem->parent->last_child = elem;
169251876Speter	}
170251876Speter
171251876Speter	/* this element is now the current element */
172251876Speter	parser->cur_elem = elem;
173251876Speter    }
174251876Speter
175251876Speter    /* scan the attributes for namespace declarations */
176251876Speter    for (prev = NULL, attr = elem->attr;
177251876Speter	 attr;
178251876Speter	 attr = attr->next) {
179251876Speter	if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) {
180251876Speter	    const char *prefix = &attr->name[5];
181251876Speter	    apr_xml_ns_scope *ns_scope;
182251876Speter
183251876Speter	    /* test for xmlns:foo= form and xmlns= form */
184251876Speter	    if (*prefix == 0x3A) {
185251876Speter                /* a namespace prefix declaration must have a
186251876Speter                   non-empty value. */
187251876Speter                if (attr->value[0] == '\0') {
188251876Speter                    parser->error = APR_XML_NS_ERROR_INVALID_DECL;
189251876Speter                    return;
190251876Speter                }
191251876Speter		++prefix;
192251876Speter            }
193251876Speter	    else if (*prefix != '\0') {
194251876Speter		/* advance "prev" since "attr" is still present */
195251876Speter		prev = attr;
196251876Speter		continue;
197251876Speter	    }
198251876Speter
199251876Speter	    /* quote the URI before we ever start working with it */
200251876Speter	    quoted = apr_xml_quote_string(parser->p, attr->value, 1);
201251876Speter
202251876Speter	    /* build and insert the new scope */
203251876Speter	    ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
204251876Speter	    ns_scope->prefix = prefix;
205251876Speter	    ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
206251876Speter	    ns_scope->emptyURI = *quoted == '\0';
207251876Speter	    ns_scope->next = elem->ns_scope;
208251876Speter	    elem->ns_scope = ns_scope;
209251876Speter
210251876Speter	    /* remove this attribute from the element */
211251876Speter	    if (prev == NULL)
212251876Speter		elem->attr = attr->next;
213251876Speter	    else
214251876Speter		prev->next = attr->next;
215251876Speter
216251876Speter	    /* Note: prev will not be advanced since we just removed "attr" */
217251876Speter	}
218251876Speter	else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) {
219251876Speter	    /* save away the language (in quoted form) */
220251876Speter	    elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
221251876Speter
222251876Speter	    /* remove this attribute from the element */
223251876Speter	    if (prev == NULL)
224251876Speter		elem->attr = attr->next;
225251876Speter	    else
226251876Speter		prev->next = attr->next;
227251876Speter
228251876Speter	    /* Note: prev will not be advanced since we just removed "attr" */
229251876Speter	}
230251876Speter	else {
231251876Speter	    /* advance "prev" since "attr" is still present */
232251876Speter	    prev = attr;
233251876Speter	}
234251876Speter    }
235251876Speter
236251876Speter    /*
237251876Speter    ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
238251876Speter    ** language from the parent element (if present).
239251876Speter    **
240251876Speter    ** NOTE: elem_size() *depends* upon this pointer equality.
241251876Speter    */
242251876Speter    if (elem->lang == NULL && elem->parent != NULL)
243251876Speter	elem->lang = elem->parent->lang;
244251876Speter
245251876Speter    /* adjust the element's namespace */
246251876Speter    colon = strchr(elem_name, 0x3A);
247251876Speter    if (colon == NULL) {
248251876Speter	/*
249251876Speter	 * The element is using the default namespace, which will always
250251876Speter	 * be found. Either it will be "no namespace", or a default
251251876Speter	 * namespace URI has been specified at some point.
252251876Speter	 */
253251876Speter	elem->ns = find_prefix(parser, "");
254251876Speter    }
255251876Speter    else if (APR_XML_NS_IS_RESERVED(elem->name)) {
256251876Speter	elem->ns = APR_XML_NS_NONE;
257251876Speter    }
258251876Speter    else {
259251876Speter	*colon = '\0';
260251876Speter	elem->ns = find_prefix(parser, elem->name);
261251876Speter	elem->name = colon + 1;
262251876Speter
263251876Speter	if (APR_XML_NS_IS_ERROR(elem->ns)) {
264251876Speter	    parser->error = elem->ns;
265251876Speter	    return;
266251876Speter	}
267251876Speter    }
268251876Speter
269251876Speter    /* adjust all remaining attributes' namespaces */
270251876Speter    for (attr = elem->attr; attr; attr = attr->next) {
271251876Speter        /*
272251876Speter         * apr_xml_attr defines this as "const" but we dup'd it, so we
273251876Speter         * know that we can change it. a bit hacky, but the existing
274251876Speter         * structure def is best.
275251876Speter         */
276251876Speter        char *attr_name = (char *)attr->name;
277251876Speter
278251876Speter	colon = strchr(attr_name, 0x3A);
279251876Speter	if (colon == NULL) {
280251876Speter	    /*
281251876Speter	     * Attributes do NOT use the default namespace. Therefore,
282251876Speter	     * we place them into the "no namespace" category.
283251876Speter	     */
284251876Speter	    attr->ns = APR_XML_NS_NONE;
285251876Speter	}
286251876Speter	else if (APR_XML_NS_IS_RESERVED(attr->name)) {
287251876Speter	    attr->ns = APR_XML_NS_NONE;
288251876Speter	}
289251876Speter	else {
290251876Speter	    *colon = '\0';
291251876Speter	    attr->ns = find_prefix(parser, attr->name);
292251876Speter	    attr->name = colon + 1;
293251876Speter
294251876Speter	    if (APR_XML_NS_IS_ERROR(attr->ns)) {
295251876Speter		parser->error = attr->ns;
296251876Speter		return;
297251876Speter	    }
298251876Speter	}
299251876Speter    }
300251876Speter}
301251876Speter
302251876Speterstatic void end_handler(void *userdata, const char *name)
303251876Speter{
304251876Speter    apr_xml_parser *parser = userdata;
305251876Speter
306251876Speter    /* punt once we find an error */
307251876Speter    if (parser->error)
308251876Speter	return;
309251876Speter
310251876Speter    /* pop up one level */
311251876Speter    parser->cur_elem = parser->cur_elem->parent;
312251876Speter}
313251876Speter
314251876Speterstatic void cdata_handler(void *userdata, const char *data, int len)
315251876Speter{
316251876Speter    apr_xml_parser *parser = userdata;
317251876Speter    apr_xml_elem *elem;
318251876Speter    apr_text_header *hdr;
319251876Speter    const char *s;
320251876Speter
321251876Speter    /* punt once we find an error */
322251876Speter    if (parser->error)
323251876Speter	return;
324251876Speter
325251876Speter    elem = parser->cur_elem;
326251876Speter    s = apr_pstrndup(parser->p, data, len);
327251876Speter
328251876Speter    if (elem->last_child == NULL) {
329251876Speter	/* no children yet. this cdata follows the start tag */
330251876Speter	hdr = &elem->first_cdata;
331251876Speter    }
332251876Speter    else {
333251876Speter	/* child elements exist. this cdata follows the last child. */
334251876Speter	hdr = &elem->last_child->following_cdata;
335251876Speter    }
336251876Speter
337251876Speter    apr_text_append(parser->p, hdr, s);
338251876Speter}
339251876Speter
340251876Speterstatic apr_status_t cleanup_parser(void *ctx)
341251876Speter{
342251876Speter    apr_xml_parser *parser = ctx;
343251876Speter
344251876Speter    XML_ParserFree(parser->xp);
345251876Speter    parser->xp = NULL;
346251876Speter
347251876Speter    return APR_SUCCESS;
348251876Speter}
349251876Speter
350251876Speter#if XML_MAJOR_VERSION > 1
351251876Speter/* Stop the parser if an entity declaration is hit. */
352251876Speterstatic void entity_declaration(void *userData, const XML_Char *entityName,
353251876Speter                               int is_parameter_entity, const XML_Char *value,
354251876Speter                               int value_length, const XML_Char *base,
355251876Speter                               const XML_Char *systemId, const XML_Char *publicId,
356251876Speter                               const XML_Char *notationName)
357251876Speter{
358251876Speter    apr_xml_parser *parser = userData;
359251876Speter
360251876Speter    XML_StopParser(parser->xp, XML_FALSE);
361251876Speter}
362251876Speter#else
363251876Speter/* A noop default_handler. */
364251876Speterstatic void default_handler(void *userData, const XML_Char *s, int len)
365251876Speter{
366251876Speter}
367251876Speter#endif
368251876Speter
369251876SpeterAPU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
370251876Speter{
371251876Speter    apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
372251876Speter
373251876Speter    parser->p = pool;
374251876Speter    parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
375251876Speter
376251876Speter    parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
377251876Speter
378251876Speter    /* ### is there a way to avoid hard-coding this? */
379251876Speter    apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV);
380251876Speter
381251876Speter    parser->xp = XML_ParserCreate(NULL);
382251876Speter    if (parser->xp == NULL) {
383251876Speter        (*apr_pool_abort_get(pool))(APR_ENOMEM);
384251876Speter        return NULL;
385251876Speter    }
386251876Speter
387251876Speter    apr_pool_cleanup_register(pool, parser, cleanup_parser,
388251876Speter                              apr_pool_cleanup_null);
389251876Speter
390251876Speter    XML_SetUserData(parser->xp, parser);
391251876Speter    XML_SetElementHandler(parser->xp, start_handler, end_handler);
392251876Speter    XML_SetCharacterDataHandler(parser->xp, cdata_handler);
393251876Speter
394251876Speter    /* Prevent the "billion laughs" attack against expat by disabling
395251876Speter     * internal entity expansion.  With 2.x, forcibly stop the parser
396251876Speter     * if an entity is declared - this is safer and a more obvious
397251876Speter     * failure mode.  With older versions, installing a noop
398251876Speter     * DefaultHandler means that internal entities will be expanded as
399251876Speter     * the empty string, which is also sufficient to prevent the
400251876Speter     * attack. */
401251876Speter#if XML_MAJOR_VERSION > 1
402251876Speter    XML_SetEntityDeclHandler(parser->xp, entity_declaration);
403251876Speter#else
404251876Speter    XML_SetDefaultHandler(parser->xp, default_handler);
405251876Speter#endif
406251876Speter
407251876Speter    return parser;
408251876Speter}
409251876Speter
410251876Speterstatic apr_status_t do_parse(apr_xml_parser *parser,
411251876Speter                             const char *data, apr_size_t len,
412251876Speter                             int is_final)
413251876Speter{
414251876Speter    if (parser->xp == NULL) {
415251876Speter        parser->error = APR_XML_ERROR_PARSE_DONE;
416251876Speter    }
417251876Speter    else {
418251876Speter        int rv = XML_Parse(parser->xp, data, (int)len, is_final);
419251876Speter
420251876Speter        if (rv == 0) {
421251876Speter            parser->error = APR_XML_ERROR_EXPAT;
422251876Speter            parser->xp_err = XML_GetErrorCode(parser->xp);
423251876Speter        }
424251876Speter    }
425251876Speter
426251876Speter    /* ### better error code? */
427251876Speter    return parser->error ? APR_EGENERAL : APR_SUCCESS;
428251876Speter}
429251876Speter
430251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
431251876Speter                                              const char *data,
432251876Speter                                              apr_size_t len)
433251876Speter{
434251876Speter    return do_parse(parser, data, len, 0 /* is_final */);
435251876Speter}
436251876Speter
437251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
438251876Speter                                              apr_xml_doc **pdoc)
439251876Speter{
440251876Speter    char end;
441251876Speter    apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
442251876Speter
443251876Speter    /* get rid of the parser */
444251876Speter    (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
445251876Speter
446251876Speter    if (status)
447251876Speter        return status;
448251876Speter
449251876Speter    if (pdoc != NULL)
450251876Speter        *pdoc = parser->doc;
451251876Speter    return APR_SUCCESS;
452251876Speter}
453251876Speter
454251876SpeterAPU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
455251876Speter                                            char *errbuf,
456251876Speter                                            apr_size_t errbufsize)
457251876Speter{
458251876Speter    int error = parser->error;
459251876Speter    const char *msg;
460251876Speter
461251876Speter    /* clear our record of an error */
462251876Speter    parser->error = 0;
463251876Speter
464251876Speter    switch (error) {
465251876Speter    case 0:
466251876Speter        msg = "No error.";
467251876Speter        break;
468251876Speter
469251876Speter    case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
470251876Speter        msg = "An undefined namespace prefix was used.";
471251876Speter        break;
472251876Speter
473251876Speter    case APR_XML_NS_ERROR_INVALID_DECL:
474251876Speter        msg = "A namespace prefix was defined with an empty URI.";
475251876Speter        break;
476251876Speter
477251876Speter    case APR_XML_ERROR_EXPAT:
478251876Speter        (void) apr_snprintf(errbuf, errbufsize,
479251876Speter                            "XML parser error code: %s (%d)",
480251876Speter                            XML_ErrorString(parser->xp_err), parser->xp_err);
481251876Speter        return errbuf;
482251876Speter
483251876Speter    case APR_XML_ERROR_PARSE_DONE:
484251876Speter        msg = "The parser is not active.";
485251876Speter        break;
486251876Speter
487251876Speter    default:
488251876Speter        msg = "There was an unknown error within the XML body.";
489251876Speter        break;
490251876Speter    }
491251876Speter
492251876Speter    (void) apr_cpystrn(errbuf, msg, errbufsize);
493251876Speter    return errbuf;
494251876Speter}
495251876Speter
496251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
497251876Speter                                             apr_xml_parser **parser,
498251876Speter                                             apr_xml_doc **ppdoc,
499251876Speter                                             apr_file_t *xmlfd,
500251876Speter                                             apr_size_t buffer_length)
501251876Speter{
502251876Speter    apr_status_t rv;
503251876Speter    char *buffer;
504251876Speter    apr_size_t length;
505251876Speter
506251876Speter    *parser = apr_xml_parser_create(p);
507251876Speter    if (*parser == NULL) {
508251876Speter        /* FIXME: returning an error code would be nice,
509251876Speter         * but we dont get one ;( */
510251876Speter        return APR_EGENERAL;
511251876Speter    }
512251876Speter    buffer = apr_palloc(p, buffer_length);
513251876Speter    length = buffer_length;
514251876Speter
515251876Speter    rv = apr_file_read(xmlfd, buffer, &length);
516251876Speter
517251876Speter    while (rv == APR_SUCCESS) {
518251876Speter        rv = apr_xml_parser_feed(*parser, buffer, length);
519251876Speter        if (rv != APR_SUCCESS) {
520251876Speter            return rv;
521251876Speter        }
522251876Speter
523251876Speter        length = buffer_length;
524251876Speter        rv = apr_file_read(xmlfd, buffer, &length);
525251876Speter    }
526251876Speter    if (rv != APR_EOF) {
527251876Speter        return rv;
528251876Speter    }
529251876Speter    rv = apr_xml_parser_done(*parser, ppdoc);
530251876Speter    *parser = NULL;
531251876Speter    return rv;
532251876Speter}
533251876Speter
534251876SpeterAPU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
535251876Speter                                  const char *text)
536251876Speter{
537251876Speter    apr_text *t = apr_palloc(p, sizeof(*t));
538251876Speter
539251876Speter    t->text = text;
540251876Speter    t->next = NULL;
541251876Speter
542251876Speter    if (hdr->first == NULL) {
543251876Speter	/* no text elements yet */
544251876Speter	hdr->first = hdr->last = t;
545251876Speter    }
546251876Speter    else {
547251876Speter	/* append to the last text element */
548251876Speter	hdr->last->next = t;
549251876Speter	hdr->last = t;
550251876Speter    }
551251876Speter}
552251876Speter
553251876Speter
554251876Speter/* ---------------------------------------------------------------
555251876Speter**
556251876Speter** XML UTILITY FUNCTIONS
557251876Speter*/
558251876Speter
559251876Speter/*
560251876Speter** apr_xml_quote_string: quote an XML string
561251876Speter**
562251876Speter** Replace '<', '>', and '&' with '&lt;', '&gt;', and '&amp;'.
563251876Speter** If quotes is true, then replace '"' with '&quot;'.
564251876Speter**
565251876Speter** quotes is typically set to true for XML strings that will occur within
566251876Speter** double quotes -- attribute values.
567251876Speter*/
568251876SpeterAPU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
569251876Speter                                               int quotes)
570251876Speter{
571251876Speter    const char *scan;
572251876Speter    apr_size_t len = 0;
573251876Speter    apr_size_t extra = 0;
574251876Speter    char *qstr;
575251876Speter    char *qscan;
576251876Speter    char c;
577251876Speter
578251876Speter    for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
579251876Speter	if (c == '<' || c == '>')
580251876Speter	    extra += 3;		/* &lt; or &gt; */
581251876Speter	else if (c == '&')
582251876Speter	    extra += 4;		/* &amp; */
583251876Speter	else if (quotes && c == '"')
584251876Speter	    extra += 5;		/* &quot; */
585251876Speter    }
586251876Speter
587251876Speter    /* nothing to do? */
588251876Speter    if (extra == 0)
589251876Speter	return s;
590251876Speter
591251876Speter    qstr = apr_palloc(p, len + extra + 1);
592251876Speter    for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
593251876Speter	if (c == '<') {
594251876Speter	    *qscan++ = '&';
595251876Speter	    *qscan++ = 'l';
596251876Speter	    *qscan++ = 't';
597251876Speter	    *qscan++ = ';';
598251876Speter	}
599251876Speter	else if (c == '>') {
600251876Speter	    *qscan++ = '&';
601251876Speter	    *qscan++ = 'g';
602251876Speter	    *qscan++ = 't';
603251876Speter	    *qscan++ = ';';
604251876Speter	}
605251876Speter	else if (c == '&') {
606251876Speter	    *qscan++ = '&';
607251876Speter	    *qscan++ = 'a';
608251876Speter	    *qscan++ = 'm';
609251876Speter	    *qscan++ = 'p';
610251876Speter	    *qscan++ = ';';
611251876Speter	}
612251876Speter	else if (quotes && c == '"') {
613251876Speter	    *qscan++ = '&';
614251876Speter	    *qscan++ = 'q';
615251876Speter	    *qscan++ = 'u';
616251876Speter	    *qscan++ = 'o';
617251876Speter	    *qscan++ = 't';
618251876Speter	    *qscan++ = ';';
619251876Speter	}
620251876Speter	else {
621251876Speter	    *qscan++ = c;
622251876Speter	}
623251876Speter    }
624251876Speter
625251876Speter    *qscan = '\0';
626251876Speter    return qstr;
627251876Speter}
628251876Speter
629251876Speter/* how many characters for the given integer? */
630251876Speter#define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
631251876Speter                            (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
632251876Speter                            (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
633251876Speter                            (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
634251876Speter
635251876Speterstatic apr_size_t text_size(const apr_text *t)
636251876Speter{
637251876Speter    apr_size_t size = 0;
638251876Speter
639251876Speter    for (; t; t = t->next)
640251876Speter	size += strlen(t->text);
641251876Speter    return size;
642251876Speter}
643251876Speter
644251876Speterstatic apr_size_t elem_size(const apr_xml_elem *elem, int style,
645251876Speter                            apr_array_header_t *namespaces, int *ns_map)
646251876Speter{
647251876Speter    apr_size_t size;
648251876Speter
649251876Speter    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
650251876Speter	const apr_xml_attr *attr;
651251876Speter
652251876Speter	size = 0;
653251876Speter
654251876Speter	if (style == APR_XML_X2T_FULL_NS_LANG) {
655251876Speter	    int i;
656251876Speter
657251876Speter	    /*
658251876Speter	    ** The outer element will contain xmlns:ns%d="%s" attributes
659251876Speter	    ** and an xml:lang attribute, if applicable.
660251876Speter	    */
661251876Speter
662251876Speter	    for (i = namespaces->nelts; i--;) {
663251876Speter		/* compute size of: ' xmlns:ns%d="%s"' */
664251876Speter		size += (9 + APR_XML_NS_LEN(i) + 2 +
665251876Speter			 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
666251876Speter	    }
667251876Speter
668251876Speter	    if (elem->lang != NULL) {
669251876Speter		/* compute size of: ' xml:lang="%s"' */
670251876Speter		size += 11 + strlen(elem->lang) + 1;
671251876Speter	    }
672251876Speter	}
673251876Speter
674251876Speter	if (elem->ns == APR_XML_NS_NONE) {
675251876Speter	    /* compute size of: <%s> */
676251876Speter	    size += 1 + strlen(elem->name) + 1;
677251876Speter	}
678251876Speter	else {
679251876Speter	    int ns = ns_map ? ns_map[elem->ns] : elem->ns;
680251876Speter
681251876Speter	    /* compute size of: <ns%d:%s> */
682251876Speter	    size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
683251876Speter	}
684251876Speter
685251876Speter	if (APR_XML_ELEM_IS_EMPTY(elem)) {
686251876Speter	    /* insert a closing "/" */
687251876Speter	    size += 1;
688251876Speter	}
689251876Speter	else {
690251876Speter	    /*
691251876Speter	     * two of above plus "/":
692251876Speter	     *     <ns%d:%s> ... </ns%d:%s>
693251876Speter	     * OR  <%s> ... </%s>
694251876Speter	     */
695251876Speter	    size = 2 * size + 1;
696251876Speter	}
697251876Speter
698251876Speter	for (attr = elem->attr; attr; attr = attr->next) {
699251876Speter	    if (attr->ns == APR_XML_NS_NONE) {
700251876Speter		/* compute size of: ' %s="%s"' */
701251876Speter		size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
702251876Speter	    }
703251876Speter	    else {
704251876Speter		/* compute size of: ' ns%d:%s="%s"' */
705251876Speter                int ns = ns_map ? ns_map[attr->ns] : attr->ns;
706251876Speter                size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
707251876Speter	    }
708251876Speter	}
709251876Speter
710251876Speter	/*
711251876Speter	** If the element has an xml:lang value that is *different* from
712251876Speter	** its parent, then add the thing in: ' xml:lang="%s"'.
713251876Speter	**
714251876Speter	** NOTE: we take advantage of the pointer equality established by
715251876Speter	** the parsing for "inheriting" the xml:lang values from parents.
716251876Speter	*/
717251876Speter	if (elem->lang != NULL &&
718251876Speter	    (elem->parent == NULL || elem->lang != elem->parent->lang)) {
719251876Speter	    size += 11 + strlen(elem->lang) + 1;
720251876Speter	}
721251876Speter    }
722251876Speter    else if (style == APR_XML_X2T_LANG_INNER) {
723251876Speter	/*
724251876Speter	 * This style prepends the xml:lang value plus a null terminator.
725251876Speter	 * If a lang value is not present, then we insert a null term.
726251876Speter	 */
727251876Speter	size = elem->lang ? strlen(elem->lang) + 1 : 1;
728251876Speter    }
729251876Speter    else
730251876Speter	size = 0;
731251876Speter
732251876Speter    size += text_size(elem->first_cdata.first);
733251876Speter
734251876Speter    for (elem = elem->first_child; elem; elem = elem->next) {
735251876Speter	/* the size of the child element plus the CDATA that follows it */
736251876Speter	size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
737251876Speter		 text_size(elem->following_cdata.first));
738251876Speter    }
739251876Speter
740251876Speter    return size;
741251876Speter}
742251876Speter
743251876Speterstatic char *write_text(char *s, const apr_text *t)
744251876Speter{
745251876Speter    for (; t; t = t->next) {
746251876Speter	apr_size_t len = strlen(t->text);
747251876Speter	memcpy(s, t->text, len);
748251876Speter	s += len;
749251876Speter    }
750251876Speter    return s;
751251876Speter}
752251876Speter
753251876Speterstatic char *write_elem(char *s, const apr_xml_elem *elem, int style,
754251876Speter			apr_array_header_t *namespaces, int *ns_map)
755251876Speter{
756251876Speter    const apr_xml_elem *child;
757251876Speter    apr_size_t len;
758251876Speter    int ns;
759251876Speter
760251876Speter    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
761251876Speter	int empty = APR_XML_ELEM_IS_EMPTY(elem);
762251876Speter	const apr_xml_attr *attr;
763251876Speter
764251876Speter	if (elem->ns == APR_XML_NS_NONE) {
765251876Speter	    len = sprintf(s, "<%s", elem->name);
766251876Speter	}
767251876Speter	else {
768251876Speter	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
769251876Speter	    len = sprintf(s, "<ns%d:%s", ns, elem->name);
770251876Speter	}
771251876Speter	s += len;
772251876Speter
773251876Speter	for (attr = elem->attr; attr; attr = attr->next) {
774251876Speter	    if (attr->ns == APR_XML_NS_NONE)
775251876Speter		len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
776251876Speter            else {
777251876Speter                ns = ns_map ? ns_map[attr->ns] : attr->ns;
778251876Speter                len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
779251876Speter            }
780251876Speter	    s += len;
781251876Speter	}
782251876Speter
783251876Speter	/* add the xml:lang value if necessary */
784251876Speter	if (elem->lang != NULL &&
785251876Speter	    (style == APR_XML_X2T_FULL_NS_LANG ||
786251876Speter	     elem->parent == NULL ||
787251876Speter	     elem->lang != elem->parent->lang)) {
788251876Speter	    len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
789251876Speter	    s += len;
790251876Speter	}
791251876Speter
792251876Speter	/* add namespace definitions, if required */
793251876Speter	if (style == APR_XML_X2T_FULL_NS_LANG) {
794251876Speter	    int i;
795251876Speter
796251876Speter	    for (i = namespaces->nelts; i--;) {
797251876Speter		len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
798251876Speter			      APR_XML_GET_URI_ITEM(namespaces, i));
799251876Speter		s += len;
800251876Speter	    }
801251876Speter	}
802251876Speter
803251876Speter	/* no more to do. close it up and go. */
804251876Speter	if (empty) {
805251876Speter	    *s++ = '/';
806251876Speter	    *s++ = '>';
807251876Speter	    return s;
808251876Speter	}
809251876Speter
810251876Speter	/* just close it */
811251876Speter	*s++ = '>';
812251876Speter    }
813251876Speter    else if (style == APR_XML_X2T_LANG_INNER) {
814251876Speter	/* prepend the xml:lang value */
815251876Speter	if (elem->lang != NULL) {
816251876Speter	    len = strlen(elem->lang);
817251876Speter	    memcpy(s, elem->lang, len);
818251876Speter	    s += len;
819251876Speter	}
820251876Speter	*s++ = '\0';
821251876Speter    }
822251876Speter
823251876Speter    s = write_text(s, elem->first_cdata.first);
824251876Speter
825251876Speter    for (child = elem->first_child; child; child = child->next) {
826251876Speter	s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
827251876Speter	s = write_text(s, child->following_cdata.first);
828251876Speter    }
829251876Speter
830251876Speter    if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
831251876Speter	if (elem->ns == APR_XML_NS_NONE) {
832251876Speter	    len = sprintf(s, "</%s>", elem->name);
833251876Speter	}
834251876Speter	else {
835251876Speter	    ns = ns_map ? ns_map[elem->ns] : elem->ns;
836251876Speter	    len = sprintf(s, "</ns%d:%s>", ns, elem->name);
837251876Speter	}
838251876Speter	s += len;
839251876Speter    }
840251876Speter
841251876Speter    return s;
842251876Speter}
843251876Speter
844251876SpeterAPU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
845251876Speter{
846251876Speter    apr_text *scan_txt;
847251876Speter    apr_xml_attr *scan_attr;
848251876Speter    apr_xml_elem *scan_elem;
849251876Speter
850251876Speter    /* convert the element's text */
851251876Speter    for (scan_txt = elem->first_cdata.first;
852251876Speter	 scan_txt != NULL;
853251876Speter	 scan_txt = scan_txt->next) {
854251876Speter	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
855251876Speter    }
856251876Speter    for (scan_txt = elem->following_cdata.first;
857251876Speter	 scan_txt != NULL;
858251876Speter	 scan_txt = scan_txt->next) {
859251876Speter	scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
860251876Speter    }
861251876Speter
862251876Speter    /* convert the attribute values */
863251876Speter    for (scan_attr = elem->attr;
864251876Speter	 scan_attr != NULL;
865251876Speter	 scan_attr = scan_attr->next) {
866251876Speter	scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
867251876Speter    }
868251876Speter
869251876Speter    /* convert the child elements */
870251876Speter    for (scan_elem = elem->first_child;
871251876Speter	 scan_elem != NULL;
872251876Speter	 scan_elem = scan_elem->next) {
873251876Speter	apr_xml_quote_elem(p, scan_elem);
874251876Speter    }
875251876Speter}
876251876Speter
877251876Speter/* convert an element to a text string */
878251876SpeterAPU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
879251876Speter                                  int style, apr_array_header_t *namespaces,
880251876Speter                                  int *ns_map, const char **pbuf,
881251876Speter                                  apr_size_t *psize)
882251876Speter{
883251876Speter    /* get the exact size, plus a null terminator */
884251876Speter    apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
885251876Speter    char *s = apr_palloc(p, size);
886251876Speter
887251876Speter    (void) write_elem(s, elem, style, namespaces, ns_map);
888251876Speter    s[size - 1] = '\0';
889251876Speter
890251876Speter    *pbuf = s;
891251876Speter    if (psize)
892251876Speter	*psize = size;
893251876Speter}
894251876Speter
895251876SpeterAPU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
896251876Speter                                             const apr_xml_elem *elem)
897251876Speter{
898251876Speter    if (elem->ns == APR_XML_NS_NONE) {
899251876Speter	/*
900251876Speter	 * The prefix (xml...) is already within the prop name, or
901251876Speter	 * the element simply has no prefix.
902251876Speter	 */
903251876Speter	return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
904251876Speter    }
905251876Speter
906251876Speter    return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
907251876Speter}
908251876Speter
909251876Speter/* return the URI's (existing) index, or insert it and return a new index */
910251876SpeterAPU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
911251876Speter                                    const char *uri)
912251876Speter{
913251876Speter    int i;
914251876Speter    const char **pelt;
915251876Speter
916251876Speter    /* never insert an empty URI; this index is always APR_XML_NS_NONE */
917251876Speter    if (*uri == '\0')
918251876Speter        return APR_XML_NS_NONE;
919251876Speter
920251876Speter    for (i = uri_array->nelts; i--;) {
921251876Speter	if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
922251876Speter	    return i;
923251876Speter    }
924251876Speter
925251876Speter    pelt = apr_array_push(uri_array);
926251876Speter    *pelt = uri;		/* assume uri is const or in a pool */
927251876Speter    return uri_array->nelts - 1;
928251876Speter}
929251876Speter
930251876Speter/* convert the element to EBCDIC */
931251876Speter#if APR_CHARSET_EBCDIC
932251876Speterstatic apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e,
933251876Speter                                                apr_xlate_t *convset)
934251876Speter{
935251876Speter    apr_xml_attr *a;
936251876Speter    apr_xml_elem *ec;
937251876Speter    apr_text *t;
938251876Speter    apr_size_t inbytes_left, outbytes_left;
939251876Speter    apr_status_t status;
940251876Speter
941251876Speter    inbytes_left = outbytes_left = strlen(e->name);
942251876Speter    status = apr_xlate_conv_buffer(convset, e->name,  &inbytes_left, (char *) e->name, &outbytes_left);
943251876Speter    if (status) {
944251876Speter        return status;
945251876Speter    }
946251876Speter
947251876Speter    for (t = e->first_cdata.first; t != NULL; t = t->next) {
948251876Speter        inbytes_left = outbytes_left = strlen(t->text);
949251876Speter        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
950251876Speter        if (status) {
951251876Speter            return status;
952251876Speter        }
953251876Speter    }
954251876Speter
955251876Speter    for (t = e->following_cdata.first;  t != NULL; t = t->next) {
956251876Speter        inbytes_left = outbytes_left = strlen(t->text);
957251876Speter        status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left);
958251876Speter        if (status) {
959251876Speter            return status;
960251876Speter        }
961251876Speter    }
962251876Speter
963251876Speter    for (a = e->attr; a != NULL; a = a->next) {
964251876Speter        inbytes_left = outbytes_left = strlen(a->name);
965251876Speter        status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left);
966251876Speter        if (status) {
967251876Speter            return status;
968251876Speter        }
969251876Speter        inbytes_left = outbytes_left = strlen(a->value);
970251876Speter        status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left);
971251876Speter        if (status) {
972251876Speter            return status;
973251876Speter        }
974251876Speter    }
975251876Speter
976251876Speter    for (ec = e->first_child; ec != NULL; ec = ec->next) {
977251876Speter        status = apr_xml_parser_convert_elem(ec, convset);
978251876Speter        if (status) {
979251876Speter            return status;
980251876Speter        }
981251876Speter    }
982251876Speter    return APR_SUCCESS;
983251876Speter}
984251876Speter
985251876Speter/* convert the whole document to EBCDIC */
986251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool,
987251876Speter                                                     apr_xml_doc *pdoc,
988251876Speter                                                     apr_xlate_t *convset)
989251876Speter{
990251876Speter    apr_status_t status;
991251876Speter    /* Don't convert the namespaces: they are constant! */
992251876Speter    if (pdoc->namespaces != NULL) {
993251876Speter        int i;
994251876Speter        apr_array_header_t *namespaces;
995251876Speter        namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *));
996251876Speter        if (namespaces == NULL)
997251876Speter            return APR_ENOMEM;
998251876Speter        for (i = 0; i < pdoc->namespaces->nelts; i++) {
999251876Speter            apr_size_t inbytes_left, outbytes_left;
1000251876Speter            char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i);
1001251876Speter            ptr = apr_pstrdup(pool, ptr);
1002251876Speter            if ( ptr == NULL)
1003251876Speter                return APR_ENOMEM;
1004251876Speter            inbytes_left = outbytes_left = strlen(ptr);
1005251876Speter            status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left);
1006251876Speter            if (status) {
1007251876Speter                return status;
1008251876Speter            }
1009251876Speter            apr_xml_insert_uri(namespaces, ptr);
1010251876Speter        }
1011251876Speter        pdoc->namespaces = namespaces;
1012251876Speter    }
1013251876Speter    return apr_xml_parser_convert_elem(pdoc->root, convset);
1014251876Speter}
1015251876Speter#endif
1016