1251876Speter/* Licensed to the Apache Software Foundation (ASF) under one or more 2251876Speter * contributor license agreements. See the NOTICE file distributed with 3251876Speter * this work for additional information regarding copyright ownership. 4251876Speter * The ASF licenses this file to You under the Apache License, Version 2.0 5251876Speter * (the "License"); you may not use this file except in compliance with 6251876Speter * the License. You may obtain a copy of the License at 7251876Speter * 8251876Speter * http://www.apache.org/licenses/LICENSE-2.0 9251876Speter * 10251876Speter * Unless required by applicable law or agreed to in writing, software 11251876Speter * distributed under the License is distributed on an "AS IS" BASIS, 12251876Speter * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13251876Speter * See the License for the specific language governing permissions and 14251876Speter * limitations under the License. 15251876Speter */ 16251876Speter 17251876Speter#include "apr.h" 18251876Speter#include "apr_strings.h" 19251876Speter 20251876Speter#define APR_WANT_STDIO /* for sprintf() */ 21251876Speter#define APR_WANT_STRFUNC 22251876Speter#include "apr_want.h" 23251876Speter 24251876Speter#include "apr_xml.h" 25251876Speter 26251876Speter#include "apu_config.h" 27251876Speter 28251876Speter#if defined(HAVE_XMLPARSE_XMLPARSE_H) 29251876Speter#include <xmlparse/xmlparse.h> 30251876Speter#elif defined(HAVE_XMLTOK_XMLPARSE_H) 31251876Speter#include <xmltok/xmlparse.h> 32251876Speter#elif defined(HAVE_XML_XMLPARSE_H) 33251876Speter#include <xml/xmlparse.h> 34251876Speter#else 35251876Speter#include <expat.h> 36251876Speter#endif 37251876Speter 38251876Speter#define DEBUG_CR "\r\n" 39251876Speter 40251876Speterstatic const char APR_KW_xmlns[] = { 0x78, 0x6D, 0x6C, 0x6E, 0x73, '\0' }; 41251876Speterstatic const char APR_KW_xmlns_lang[] = { 0x78, 0x6D, 0x6C, 0x3A, 0x6C, 0x61, 0x6E, 0x67, '\0' }; 42251876Speterstatic const char APR_KW_DAV[] = { 0x44, 0x41, 0x56, 0x3A, '\0' }; 43251876Speter 44251876Speter/* errors related to namespace processing */ 45251876Speter#define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000) 46251876Speter#define APR_XML_NS_ERROR_INVALID_DECL (-1001) 47251876Speter 48251876Speter/* test for a namespace prefix that begins with [Xx][Mm][Ll] */ 49251876Speter#define APR_XML_NS_IS_RESERVED(name) \ 50251876Speter ( (name[0] == 0x58 || name[0] == 0x78) && \ 51251876Speter (name[1] == 0x4D || name[1] == 0x6D) && \ 52251876Speter (name[2] == 0x4C || name[2] == 0x6C) ) 53251876Speter 54251876Speter 55251876Speter/* the real (internal) definition of the parser context */ 56251876Speterstruct apr_xml_parser { 57251876Speter apr_xml_doc *doc; /* the doc we're parsing */ 58251876Speter apr_pool_t *p; /* the pool we allocate from */ 59251876Speter apr_xml_elem *cur_elem; /* current element */ 60251876Speter 61251876Speter int error; /* an error has occurred */ 62251876Speter#define APR_XML_ERROR_EXPAT 1 63251876Speter#define APR_XML_ERROR_PARSE_DONE 2 64251876Speter/* also: public APR_XML_NS_ERROR_* values (if any) */ 65251876Speter 66251876Speter XML_Parser xp; /* the actual (Expat) XML parser */ 67251876Speter enum XML_Error xp_err; /* stored Expat error code */ 68251876Speter}; 69251876Speter 70251876Speter/* struct for scoping namespace declarations */ 71251876Spetertypedef struct apr_xml_ns_scope { 72251876Speter const char *prefix; /* prefix used for this ns */ 73251876Speter int ns; /* index into namespace table */ 74251876Speter int emptyURI; /* the namespace URI is the empty string */ 75251876Speter struct apr_xml_ns_scope *next; /* next scoped namespace */ 76251876Speter} apr_xml_ns_scope; 77251876Speter 78251876Speter 79251876Speter/* return namespace table index for a given prefix */ 80251876Speterstatic int find_prefix(apr_xml_parser *parser, const char *prefix) 81251876Speter{ 82251876Speter apr_xml_elem *elem = parser->cur_elem; 83251876Speter 84251876Speter /* 85251876Speter ** Walk up the tree, looking for a namespace scope that defines this 86251876Speter ** prefix. 87251876Speter */ 88251876Speter for (; elem; elem = elem->parent) { 89251876Speter apr_xml_ns_scope *ns_scope = elem->ns_scope; 90251876Speter 91251876Speter for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) { 92251876Speter if (strcmp(prefix, ns_scope->prefix) == 0) { 93251876Speter if (ns_scope->emptyURI) { 94251876Speter /* 95251876Speter ** It is possible to set the default namespace to an 96251876Speter ** empty URI string; this resets the default namespace 97251876Speter ** to mean "no namespace." We just found the prefix 98251876Speter ** refers to an empty URI, so return "no namespace." 99251876Speter */ 100251876Speter return APR_XML_NS_NONE; 101251876Speter } 102251876Speter 103251876Speter return ns_scope->ns; 104251876Speter } 105251876Speter } 106251876Speter } 107251876Speter 108251876Speter /* 109251876Speter * If the prefix is empty (""), this means that a prefix was not 110251876Speter * specified in the element/attribute. The search that was performed 111251876Speter * just above did not locate a default namespace URI (which is stored 112251876Speter * into ns_scope with an empty prefix). This means the element/attribute 113251876Speter * has "no namespace". We have a reserved value for this. 114251876Speter */ 115251876Speter if (*prefix == '\0') { 116251876Speter return APR_XML_NS_NONE; 117251876Speter } 118251876Speter 119251876Speter /* not found */ 120251876Speter return APR_XML_NS_ERROR_UNKNOWN_PREFIX; 121251876Speter} 122251876Speter 123251876Speterstatic void start_handler(void *userdata, const char *name, const char **attrs) 124251876Speter{ 125251876Speter apr_xml_parser *parser = userdata; 126251876Speter apr_xml_elem *elem; 127251876Speter apr_xml_attr *attr; 128251876Speter apr_xml_attr *prev; 129251876Speter char *colon; 130251876Speter const char *quoted; 131251876Speter char *elem_name; 132251876Speter 133251876Speter /* punt once we find an error */ 134251876Speter if (parser->error) 135251876Speter return; 136251876Speter 137251876Speter elem = apr_pcalloc(parser->p, sizeof(*elem)); 138251876Speter 139251876Speter /* prep the element */ 140251876Speter elem->name = elem_name = apr_pstrdup(parser->p, name); 141251876Speter 142251876Speter /* fill in the attributes (note: ends up in reverse order) */ 143251876Speter while (*attrs) { 144251876Speter attr = apr_palloc(parser->p, sizeof(*attr)); 145251876Speter attr->name = apr_pstrdup(parser->p, *attrs++); 146251876Speter attr->value = apr_pstrdup(parser->p, *attrs++); 147251876Speter attr->next = elem->attr; 148251876Speter elem->attr = attr; 149251876Speter } 150251876Speter 151251876Speter /* hook the element into the tree */ 152251876Speter if (parser->cur_elem == NULL) { 153251876Speter /* no current element; this also becomes the root */ 154251876Speter parser->cur_elem = parser->doc->root = elem; 155251876Speter } 156251876Speter else { 157251876Speter /* this element appeared within the current elem */ 158251876Speter elem->parent = parser->cur_elem; 159251876Speter 160251876Speter /* set up the child/sibling links */ 161251876Speter if (elem->parent->last_child == NULL) { 162251876Speter /* no first child either */ 163251876Speter elem->parent->first_child = elem->parent->last_child = elem; 164251876Speter } 165251876Speter else { 166251876Speter /* hook onto the end of the parent's children */ 167251876Speter elem->parent->last_child->next = elem; 168251876Speter elem->parent->last_child = elem; 169251876Speter } 170251876Speter 171251876Speter /* this element is now the current element */ 172251876Speter parser->cur_elem = elem; 173251876Speter } 174251876Speter 175251876Speter /* scan the attributes for namespace declarations */ 176251876Speter for (prev = NULL, attr = elem->attr; 177251876Speter attr; 178251876Speter attr = attr->next) { 179251876Speter if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) { 180251876Speter const char *prefix = &attr->name[5]; 181251876Speter apr_xml_ns_scope *ns_scope; 182251876Speter 183251876Speter /* test for xmlns:foo= form and xmlns= form */ 184251876Speter if (*prefix == 0x3A) { 185251876Speter /* a namespace prefix declaration must have a 186251876Speter non-empty value. */ 187251876Speter if (attr->value[0] == '\0') { 188251876Speter parser->error = APR_XML_NS_ERROR_INVALID_DECL; 189251876Speter return; 190251876Speter } 191251876Speter ++prefix; 192251876Speter } 193251876Speter else if (*prefix != '\0') { 194251876Speter /* advance "prev" since "attr" is still present */ 195251876Speter prev = attr; 196251876Speter continue; 197251876Speter } 198251876Speter 199251876Speter /* quote the URI before we ever start working with it */ 200251876Speter quoted = apr_xml_quote_string(parser->p, attr->value, 1); 201251876Speter 202251876Speter /* build and insert the new scope */ 203251876Speter ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope)); 204251876Speter ns_scope->prefix = prefix; 205251876Speter ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted); 206251876Speter ns_scope->emptyURI = *quoted == '\0'; 207251876Speter ns_scope->next = elem->ns_scope; 208251876Speter elem->ns_scope = ns_scope; 209251876Speter 210251876Speter /* remove this attribute from the element */ 211251876Speter if (prev == NULL) 212251876Speter elem->attr = attr->next; 213251876Speter else 214251876Speter prev->next = attr->next; 215251876Speter 216251876Speter /* Note: prev will not be advanced since we just removed "attr" */ 217251876Speter } 218251876Speter else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) { 219251876Speter /* save away the language (in quoted form) */ 220251876Speter elem->lang = apr_xml_quote_string(parser->p, attr->value, 1); 221251876Speter 222251876Speter /* remove this attribute from the element */ 223251876Speter if (prev == NULL) 224251876Speter elem->attr = attr->next; 225251876Speter else 226251876Speter prev->next = attr->next; 227251876Speter 228251876Speter /* Note: prev will not be advanced since we just removed "attr" */ 229251876Speter } 230251876Speter else { 231251876Speter /* advance "prev" since "attr" is still present */ 232251876Speter prev = attr; 233251876Speter } 234251876Speter } 235251876Speter 236251876Speter /* 237251876Speter ** If an xml:lang attribute didn't exist (lang==NULL), then copy the 238251876Speter ** language from the parent element (if present). 239251876Speter ** 240251876Speter ** NOTE: elem_size() *depends* upon this pointer equality. 241251876Speter */ 242251876Speter if (elem->lang == NULL && elem->parent != NULL) 243251876Speter elem->lang = elem->parent->lang; 244251876Speter 245251876Speter /* adjust the element's namespace */ 246251876Speter colon = strchr(elem_name, 0x3A); 247251876Speter if (colon == NULL) { 248251876Speter /* 249251876Speter * The element is using the default namespace, which will always 250251876Speter * be found. Either it will be "no namespace", or a default 251251876Speter * namespace URI has been specified at some point. 252251876Speter */ 253251876Speter elem->ns = find_prefix(parser, ""); 254251876Speter } 255251876Speter else if (APR_XML_NS_IS_RESERVED(elem->name)) { 256251876Speter elem->ns = APR_XML_NS_NONE; 257251876Speter } 258251876Speter else { 259251876Speter *colon = '\0'; 260251876Speter elem->ns = find_prefix(parser, elem->name); 261251876Speter elem->name = colon + 1; 262251876Speter 263251876Speter if (APR_XML_NS_IS_ERROR(elem->ns)) { 264251876Speter parser->error = elem->ns; 265251876Speter return; 266251876Speter } 267251876Speter } 268251876Speter 269251876Speter /* adjust all remaining attributes' namespaces */ 270251876Speter for (attr = elem->attr; attr; attr = attr->next) { 271251876Speter /* 272251876Speter * apr_xml_attr defines this as "const" but we dup'd it, so we 273251876Speter * know that we can change it. a bit hacky, but the existing 274251876Speter * structure def is best. 275251876Speter */ 276251876Speter char *attr_name = (char *)attr->name; 277251876Speter 278251876Speter colon = strchr(attr_name, 0x3A); 279251876Speter if (colon == NULL) { 280251876Speter /* 281251876Speter * Attributes do NOT use the default namespace. Therefore, 282251876Speter * we place them into the "no namespace" category. 283251876Speter */ 284251876Speter attr->ns = APR_XML_NS_NONE; 285251876Speter } 286251876Speter else if (APR_XML_NS_IS_RESERVED(attr->name)) { 287251876Speter attr->ns = APR_XML_NS_NONE; 288251876Speter } 289251876Speter else { 290251876Speter *colon = '\0'; 291251876Speter attr->ns = find_prefix(parser, attr->name); 292251876Speter attr->name = colon + 1; 293251876Speter 294251876Speter if (APR_XML_NS_IS_ERROR(attr->ns)) { 295251876Speter parser->error = attr->ns; 296251876Speter return; 297251876Speter } 298251876Speter } 299251876Speter } 300251876Speter} 301251876Speter 302251876Speterstatic void end_handler(void *userdata, const char *name) 303251876Speter{ 304251876Speter apr_xml_parser *parser = userdata; 305251876Speter 306251876Speter /* punt once we find an error */ 307251876Speter if (parser->error) 308251876Speter return; 309251876Speter 310251876Speter /* pop up one level */ 311251876Speter parser->cur_elem = parser->cur_elem->parent; 312251876Speter} 313251876Speter 314251876Speterstatic void cdata_handler(void *userdata, const char *data, int len) 315251876Speter{ 316251876Speter apr_xml_parser *parser = userdata; 317251876Speter apr_xml_elem *elem; 318251876Speter apr_text_header *hdr; 319251876Speter const char *s; 320251876Speter 321251876Speter /* punt once we find an error */ 322251876Speter if (parser->error) 323251876Speter return; 324251876Speter 325251876Speter elem = parser->cur_elem; 326251876Speter s = apr_pstrndup(parser->p, data, len); 327251876Speter 328251876Speter if (elem->last_child == NULL) { 329251876Speter /* no children yet. this cdata follows the start tag */ 330251876Speter hdr = &elem->first_cdata; 331251876Speter } 332251876Speter else { 333251876Speter /* child elements exist. this cdata follows the last child. */ 334251876Speter hdr = &elem->last_child->following_cdata; 335251876Speter } 336251876Speter 337251876Speter apr_text_append(parser->p, hdr, s); 338251876Speter} 339251876Speter 340251876Speterstatic apr_status_t cleanup_parser(void *ctx) 341251876Speter{ 342251876Speter apr_xml_parser *parser = ctx; 343251876Speter 344251876Speter XML_ParserFree(parser->xp); 345251876Speter parser->xp = NULL; 346251876Speter 347251876Speter return APR_SUCCESS; 348251876Speter} 349251876Speter 350251876Speter#if XML_MAJOR_VERSION > 1 351251876Speter/* Stop the parser if an entity declaration is hit. */ 352251876Speterstatic void entity_declaration(void *userData, const XML_Char *entityName, 353251876Speter int is_parameter_entity, const XML_Char *value, 354251876Speter int value_length, const XML_Char *base, 355251876Speter const XML_Char *systemId, const XML_Char *publicId, 356251876Speter const XML_Char *notationName) 357251876Speter{ 358251876Speter apr_xml_parser *parser = userData; 359251876Speter 360251876Speter XML_StopParser(parser->xp, XML_FALSE); 361251876Speter} 362251876Speter#else 363251876Speter/* A noop default_handler. */ 364251876Speterstatic void default_handler(void *userData, const XML_Char *s, int len) 365251876Speter{ 366251876Speter} 367251876Speter#endif 368251876Speter 369251876SpeterAPU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool) 370251876Speter{ 371251876Speter apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser)); 372251876Speter 373251876Speter parser->p = pool; 374251876Speter parser->doc = apr_pcalloc(pool, sizeof(*parser->doc)); 375251876Speter 376251876Speter parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *)); 377251876Speter 378251876Speter /* ### is there a way to avoid hard-coding this? */ 379251876Speter apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV); 380251876Speter 381251876Speter parser->xp = XML_ParserCreate(NULL); 382251876Speter if (parser->xp == NULL) { 383251876Speter (*apr_pool_abort_get(pool))(APR_ENOMEM); 384251876Speter return NULL; 385251876Speter } 386251876Speter 387251876Speter apr_pool_cleanup_register(pool, parser, cleanup_parser, 388251876Speter apr_pool_cleanup_null); 389251876Speter 390251876Speter XML_SetUserData(parser->xp, parser); 391251876Speter XML_SetElementHandler(parser->xp, start_handler, end_handler); 392251876Speter XML_SetCharacterDataHandler(parser->xp, cdata_handler); 393251876Speter 394251876Speter /* Prevent the "billion laughs" attack against expat by disabling 395251876Speter * internal entity expansion. With 2.x, forcibly stop the parser 396251876Speter * if an entity is declared - this is safer and a more obvious 397251876Speter * failure mode. With older versions, installing a noop 398251876Speter * DefaultHandler means that internal entities will be expanded as 399251876Speter * the empty string, which is also sufficient to prevent the 400251876Speter * attack. */ 401251876Speter#if XML_MAJOR_VERSION > 1 402251876Speter XML_SetEntityDeclHandler(parser->xp, entity_declaration); 403251876Speter#else 404251876Speter XML_SetDefaultHandler(parser->xp, default_handler); 405251876Speter#endif 406251876Speter 407251876Speter return parser; 408251876Speter} 409251876Speter 410251876Speterstatic apr_status_t do_parse(apr_xml_parser *parser, 411251876Speter const char *data, apr_size_t len, 412251876Speter int is_final) 413251876Speter{ 414251876Speter if (parser->xp == NULL) { 415251876Speter parser->error = APR_XML_ERROR_PARSE_DONE; 416251876Speter } 417251876Speter else { 418251876Speter int rv = XML_Parse(parser->xp, data, (int)len, is_final); 419251876Speter 420251876Speter if (rv == 0) { 421251876Speter parser->error = APR_XML_ERROR_EXPAT; 422251876Speter parser->xp_err = XML_GetErrorCode(parser->xp); 423251876Speter } 424251876Speter } 425251876Speter 426251876Speter /* ### better error code? */ 427251876Speter return parser->error ? APR_EGENERAL : APR_SUCCESS; 428251876Speter} 429251876Speter 430251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser, 431251876Speter const char *data, 432251876Speter apr_size_t len) 433251876Speter{ 434251876Speter return do_parse(parser, data, len, 0 /* is_final */); 435251876Speter} 436251876Speter 437251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser, 438251876Speter apr_xml_doc **pdoc) 439251876Speter{ 440251876Speter char end; 441251876Speter apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */); 442251876Speter 443251876Speter /* get rid of the parser */ 444251876Speter (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser); 445251876Speter 446251876Speter if (status) 447251876Speter return status; 448251876Speter 449251876Speter if (pdoc != NULL) 450251876Speter *pdoc = parser->doc; 451251876Speter return APR_SUCCESS; 452251876Speter} 453251876Speter 454251876SpeterAPU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser, 455251876Speter char *errbuf, 456251876Speter apr_size_t errbufsize) 457251876Speter{ 458251876Speter int error = parser->error; 459251876Speter const char *msg; 460251876Speter 461251876Speter /* clear our record of an error */ 462251876Speter parser->error = 0; 463251876Speter 464251876Speter switch (error) { 465251876Speter case 0: 466251876Speter msg = "No error."; 467251876Speter break; 468251876Speter 469251876Speter case APR_XML_NS_ERROR_UNKNOWN_PREFIX: 470251876Speter msg = "An undefined namespace prefix was used."; 471251876Speter break; 472251876Speter 473251876Speter case APR_XML_NS_ERROR_INVALID_DECL: 474251876Speter msg = "A namespace prefix was defined with an empty URI."; 475251876Speter break; 476251876Speter 477251876Speter case APR_XML_ERROR_EXPAT: 478251876Speter (void) apr_snprintf(errbuf, errbufsize, 479251876Speter "XML parser error code: %s (%d)", 480251876Speter XML_ErrorString(parser->xp_err), parser->xp_err); 481251876Speter return errbuf; 482251876Speter 483251876Speter case APR_XML_ERROR_PARSE_DONE: 484251876Speter msg = "The parser is not active."; 485251876Speter break; 486251876Speter 487251876Speter default: 488251876Speter msg = "There was an unknown error within the XML body."; 489251876Speter break; 490251876Speter } 491251876Speter 492251876Speter (void) apr_cpystrn(errbuf, msg, errbufsize); 493251876Speter return errbuf; 494251876Speter} 495251876Speter 496251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p, 497251876Speter apr_xml_parser **parser, 498251876Speter apr_xml_doc **ppdoc, 499251876Speter apr_file_t *xmlfd, 500251876Speter apr_size_t buffer_length) 501251876Speter{ 502251876Speter apr_status_t rv; 503251876Speter char *buffer; 504251876Speter apr_size_t length; 505251876Speter 506251876Speter *parser = apr_xml_parser_create(p); 507251876Speter if (*parser == NULL) { 508251876Speter /* FIXME: returning an error code would be nice, 509251876Speter * but we dont get one ;( */ 510251876Speter return APR_EGENERAL; 511251876Speter } 512251876Speter buffer = apr_palloc(p, buffer_length); 513251876Speter length = buffer_length; 514251876Speter 515251876Speter rv = apr_file_read(xmlfd, buffer, &length); 516251876Speter 517251876Speter while (rv == APR_SUCCESS) { 518251876Speter rv = apr_xml_parser_feed(*parser, buffer, length); 519251876Speter if (rv != APR_SUCCESS) { 520251876Speter return rv; 521251876Speter } 522251876Speter 523251876Speter length = buffer_length; 524251876Speter rv = apr_file_read(xmlfd, buffer, &length); 525251876Speter } 526251876Speter if (rv != APR_EOF) { 527251876Speter return rv; 528251876Speter } 529251876Speter rv = apr_xml_parser_done(*parser, ppdoc); 530251876Speter *parser = NULL; 531251876Speter return rv; 532251876Speter} 533251876Speter 534251876SpeterAPU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr, 535251876Speter const char *text) 536251876Speter{ 537251876Speter apr_text *t = apr_palloc(p, sizeof(*t)); 538251876Speter 539251876Speter t->text = text; 540251876Speter t->next = NULL; 541251876Speter 542251876Speter if (hdr->first == NULL) { 543251876Speter /* no text elements yet */ 544251876Speter hdr->first = hdr->last = t; 545251876Speter } 546251876Speter else { 547251876Speter /* append to the last text element */ 548251876Speter hdr->last->next = t; 549251876Speter hdr->last = t; 550251876Speter } 551251876Speter} 552251876Speter 553251876Speter 554251876Speter/* --------------------------------------------------------------- 555251876Speter** 556251876Speter** XML UTILITY FUNCTIONS 557251876Speter*/ 558251876Speter 559251876Speter/* 560251876Speter** apr_xml_quote_string: quote an XML string 561251876Speter** 562251876Speter** Replace '<', '>', and '&' with '<', '>', and '&'. 563251876Speter** If quotes is true, then replace '"' with '"'. 564251876Speter** 565251876Speter** quotes is typically set to true for XML strings that will occur within 566251876Speter** double quotes -- attribute values. 567251876Speter*/ 568251876SpeterAPU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s, 569251876Speter int quotes) 570251876Speter{ 571251876Speter const char *scan; 572251876Speter apr_size_t len = 0; 573251876Speter apr_size_t extra = 0; 574251876Speter char *qstr; 575251876Speter char *qscan; 576251876Speter char c; 577251876Speter 578251876Speter for (scan = s; (c = *scan) != '\0'; ++scan, ++len) { 579251876Speter if (c == '<' || c == '>') 580251876Speter extra += 3; /* < or > */ 581251876Speter else if (c == '&') 582251876Speter extra += 4; /* & */ 583251876Speter else if (quotes && c == '"') 584251876Speter extra += 5; /* " */ 585251876Speter } 586251876Speter 587251876Speter /* nothing to do? */ 588251876Speter if (extra == 0) 589251876Speter return s; 590251876Speter 591251876Speter qstr = apr_palloc(p, len + extra + 1); 592251876Speter for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) { 593251876Speter if (c == '<') { 594251876Speter *qscan++ = '&'; 595251876Speter *qscan++ = 'l'; 596251876Speter *qscan++ = 't'; 597251876Speter *qscan++ = ';'; 598251876Speter } 599251876Speter else if (c == '>') { 600251876Speter *qscan++ = '&'; 601251876Speter *qscan++ = 'g'; 602251876Speter *qscan++ = 't'; 603251876Speter *qscan++ = ';'; 604251876Speter } 605251876Speter else if (c == '&') { 606251876Speter *qscan++ = '&'; 607251876Speter *qscan++ = 'a'; 608251876Speter *qscan++ = 'm'; 609251876Speter *qscan++ = 'p'; 610251876Speter *qscan++ = ';'; 611251876Speter } 612251876Speter else if (quotes && c == '"') { 613251876Speter *qscan++ = '&'; 614251876Speter *qscan++ = 'q'; 615251876Speter *qscan++ = 'u'; 616251876Speter *qscan++ = 'o'; 617251876Speter *qscan++ = 't'; 618251876Speter *qscan++ = ';'; 619251876Speter } 620251876Speter else { 621251876Speter *qscan++ = c; 622251876Speter } 623251876Speter } 624251876Speter 625251876Speter *qscan = '\0'; 626251876Speter return qstr; 627251876Speter} 628251876Speter 629251876Speter/* how many characters for the given integer? */ 630251876Speter#define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \ 631251876Speter (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \ 632251876Speter (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \ 633251876Speter (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10) 634251876Speter 635251876Speterstatic apr_size_t text_size(const apr_text *t) 636251876Speter{ 637251876Speter apr_size_t size = 0; 638251876Speter 639251876Speter for (; t; t = t->next) 640251876Speter size += strlen(t->text); 641251876Speter return size; 642251876Speter} 643251876Speter 644251876Speterstatic apr_size_t elem_size(const apr_xml_elem *elem, int style, 645251876Speter apr_array_header_t *namespaces, int *ns_map) 646251876Speter{ 647251876Speter apr_size_t size; 648251876Speter 649251876Speter if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) { 650251876Speter const apr_xml_attr *attr; 651251876Speter 652251876Speter size = 0; 653251876Speter 654251876Speter if (style == APR_XML_X2T_FULL_NS_LANG) { 655251876Speter int i; 656251876Speter 657251876Speter /* 658251876Speter ** The outer element will contain xmlns:ns%d="%s" attributes 659251876Speter ** and an xml:lang attribute, if applicable. 660251876Speter */ 661251876Speter 662251876Speter for (i = namespaces->nelts; i--;) { 663251876Speter /* compute size of: ' xmlns:ns%d="%s"' */ 664251876Speter size += (9 + APR_XML_NS_LEN(i) + 2 + 665251876Speter strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1); 666251876Speter } 667251876Speter 668251876Speter if (elem->lang != NULL) { 669251876Speter /* compute size of: ' xml:lang="%s"' */ 670251876Speter size += 11 + strlen(elem->lang) + 1; 671251876Speter } 672251876Speter } 673251876Speter 674251876Speter if (elem->ns == APR_XML_NS_NONE) { 675251876Speter /* compute size of: <%s> */ 676251876Speter size += 1 + strlen(elem->name) + 1; 677251876Speter } 678251876Speter else { 679251876Speter int ns = ns_map ? ns_map[elem->ns] : elem->ns; 680251876Speter 681251876Speter /* compute size of: <ns%d:%s> */ 682251876Speter size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1; 683251876Speter } 684251876Speter 685251876Speter if (APR_XML_ELEM_IS_EMPTY(elem)) { 686251876Speter /* insert a closing "/" */ 687251876Speter size += 1; 688251876Speter } 689251876Speter else { 690251876Speter /* 691251876Speter * two of above plus "/": 692251876Speter * <ns%d:%s> ... </ns%d:%s> 693251876Speter * OR <%s> ... </%s> 694251876Speter */ 695251876Speter size = 2 * size + 1; 696251876Speter } 697251876Speter 698251876Speter for (attr = elem->attr; attr; attr = attr->next) { 699251876Speter if (attr->ns == APR_XML_NS_NONE) { 700251876Speter /* compute size of: ' %s="%s"' */ 701251876Speter size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1; 702251876Speter } 703251876Speter else { 704251876Speter /* compute size of: ' ns%d:%s="%s"' */ 705251876Speter int ns = ns_map ? ns_map[attr->ns] : attr->ns; 706251876Speter size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1; 707251876Speter } 708251876Speter } 709251876Speter 710251876Speter /* 711251876Speter ** If the element has an xml:lang value that is *different* from 712251876Speter ** its parent, then add the thing in: ' xml:lang="%s"'. 713251876Speter ** 714251876Speter ** NOTE: we take advantage of the pointer equality established by 715251876Speter ** the parsing for "inheriting" the xml:lang values from parents. 716251876Speter */ 717251876Speter if (elem->lang != NULL && 718251876Speter (elem->parent == NULL || elem->lang != elem->parent->lang)) { 719251876Speter size += 11 + strlen(elem->lang) + 1; 720251876Speter } 721251876Speter } 722251876Speter else if (style == APR_XML_X2T_LANG_INNER) { 723251876Speter /* 724251876Speter * This style prepends the xml:lang value plus a null terminator. 725251876Speter * If a lang value is not present, then we insert a null term. 726251876Speter */ 727251876Speter size = elem->lang ? strlen(elem->lang) + 1 : 1; 728251876Speter } 729251876Speter else 730251876Speter size = 0; 731251876Speter 732251876Speter size += text_size(elem->first_cdata.first); 733251876Speter 734251876Speter for (elem = elem->first_child; elem; elem = elem->next) { 735251876Speter /* the size of the child element plus the CDATA that follows it */ 736251876Speter size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) + 737251876Speter text_size(elem->following_cdata.first)); 738251876Speter } 739251876Speter 740251876Speter return size; 741251876Speter} 742251876Speter 743251876Speterstatic char *write_text(char *s, const apr_text *t) 744251876Speter{ 745251876Speter for (; t; t = t->next) { 746251876Speter apr_size_t len = strlen(t->text); 747251876Speter memcpy(s, t->text, len); 748251876Speter s += len; 749251876Speter } 750251876Speter return s; 751251876Speter} 752251876Speter 753251876Speterstatic char *write_elem(char *s, const apr_xml_elem *elem, int style, 754251876Speter apr_array_header_t *namespaces, int *ns_map) 755251876Speter{ 756251876Speter const apr_xml_elem *child; 757251876Speter apr_size_t len; 758251876Speter int ns; 759251876Speter 760251876Speter if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) { 761251876Speter int empty = APR_XML_ELEM_IS_EMPTY(elem); 762251876Speter const apr_xml_attr *attr; 763251876Speter 764251876Speter if (elem->ns == APR_XML_NS_NONE) { 765251876Speter len = sprintf(s, "<%s", elem->name); 766251876Speter } 767251876Speter else { 768251876Speter ns = ns_map ? ns_map[elem->ns] : elem->ns; 769251876Speter len = sprintf(s, "<ns%d:%s", ns, elem->name); 770251876Speter } 771251876Speter s += len; 772251876Speter 773251876Speter for (attr = elem->attr; attr; attr = attr->next) { 774251876Speter if (attr->ns == APR_XML_NS_NONE) 775251876Speter len = sprintf(s, " %s=\"%s\"", attr->name, attr->value); 776251876Speter else { 777251876Speter ns = ns_map ? ns_map[attr->ns] : attr->ns; 778251876Speter len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value); 779251876Speter } 780251876Speter s += len; 781251876Speter } 782251876Speter 783251876Speter /* add the xml:lang value if necessary */ 784251876Speter if (elem->lang != NULL && 785251876Speter (style == APR_XML_X2T_FULL_NS_LANG || 786251876Speter elem->parent == NULL || 787251876Speter elem->lang != elem->parent->lang)) { 788251876Speter len = sprintf(s, " xml:lang=\"%s\"", elem->lang); 789251876Speter s += len; 790251876Speter } 791251876Speter 792251876Speter /* add namespace definitions, if required */ 793251876Speter if (style == APR_XML_X2T_FULL_NS_LANG) { 794251876Speter int i; 795251876Speter 796251876Speter for (i = namespaces->nelts; i--;) { 797251876Speter len = sprintf(s, " xmlns:ns%d=\"%s\"", i, 798251876Speter APR_XML_GET_URI_ITEM(namespaces, i)); 799251876Speter s += len; 800251876Speter } 801251876Speter } 802251876Speter 803251876Speter /* no more to do. close it up and go. */ 804251876Speter if (empty) { 805251876Speter *s++ = '/'; 806251876Speter *s++ = '>'; 807251876Speter return s; 808251876Speter } 809251876Speter 810251876Speter /* just close it */ 811251876Speter *s++ = '>'; 812251876Speter } 813251876Speter else if (style == APR_XML_X2T_LANG_INNER) { 814251876Speter /* prepend the xml:lang value */ 815251876Speter if (elem->lang != NULL) { 816251876Speter len = strlen(elem->lang); 817251876Speter memcpy(s, elem->lang, len); 818251876Speter s += len; 819251876Speter } 820251876Speter *s++ = '\0'; 821251876Speter } 822251876Speter 823251876Speter s = write_text(s, elem->first_cdata.first); 824251876Speter 825251876Speter for (child = elem->first_child; child; child = child->next) { 826251876Speter s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map); 827251876Speter s = write_text(s, child->following_cdata.first); 828251876Speter } 829251876Speter 830251876Speter if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) { 831251876Speter if (elem->ns == APR_XML_NS_NONE) { 832251876Speter len = sprintf(s, "</%s>", elem->name); 833251876Speter } 834251876Speter else { 835251876Speter ns = ns_map ? ns_map[elem->ns] : elem->ns; 836251876Speter len = sprintf(s, "</ns%d:%s>", ns, elem->name); 837251876Speter } 838251876Speter s += len; 839251876Speter } 840251876Speter 841251876Speter return s; 842251876Speter} 843251876Speter 844251876SpeterAPU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem) 845251876Speter{ 846251876Speter apr_text *scan_txt; 847251876Speter apr_xml_attr *scan_attr; 848251876Speter apr_xml_elem *scan_elem; 849251876Speter 850251876Speter /* convert the element's text */ 851251876Speter for (scan_txt = elem->first_cdata.first; 852251876Speter scan_txt != NULL; 853251876Speter scan_txt = scan_txt->next) { 854251876Speter scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0); 855251876Speter } 856251876Speter for (scan_txt = elem->following_cdata.first; 857251876Speter scan_txt != NULL; 858251876Speter scan_txt = scan_txt->next) { 859251876Speter scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0); 860251876Speter } 861251876Speter 862251876Speter /* convert the attribute values */ 863251876Speter for (scan_attr = elem->attr; 864251876Speter scan_attr != NULL; 865251876Speter scan_attr = scan_attr->next) { 866251876Speter scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1); 867251876Speter } 868251876Speter 869251876Speter /* convert the child elements */ 870251876Speter for (scan_elem = elem->first_child; 871251876Speter scan_elem != NULL; 872251876Speter scan_elem = scan_elem->next) { 873251876Speter apr_xml_quote_elem(p, scan_elem); 874251876Speter } 875251876Speter} 876251876Speter 877251876Speter/* convert an element to a text string */ 878251876SpeterAPU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem, 879251876Speter int style, apr_array_header_t *namespaces, 880251876Speter int *ns_map, const char **pbuf, 881251876Speter apr_size_t *psize) 882251876Speter{ 883251876Speter /* get the exact size, plus a null terminator */ 884251876Speter apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1; 885251876Speter char *s = apr_palloc(p, size); 886251876Speter 887251876Speter (void) write_elem(s, elem, style, namespaces, ns_map); 888251876Speter s[size - 1] = '\0'; 889251876Speter 890251876Speter *pbuf = s; 891251876Speter if (psize) 892251876Speter *psize = size; 893251876Speter} 894251876Speter 895251876SpeterAPU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p, 896251876Speter const apr_xml_elem *elem) 897251876Speter{ 898251876Speter if (elem->ns == APR_XML_NS_NONE) { 899251876Speter /* 900251876Speter * The prefix (xml...) is already within the prop name, or 901251876Speter * the element simply has no prefix. 902251876Speter */ 903251876Speter return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name); 904251876Speter } 905251876Speter 906251876Speter return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name); 907251876Speter} 908251876Speter 909251876Speter/* return the URI's (existing) index, or insert it and return a new index */ 910251876SpeterAPU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array, 911251876Speter const char *uri) 912251876Speter{ 913251876Speter int i; 914251876Speter const char **pelt; 915251876Speter 916251876Speter /* never insert an empty URI; this index is always APR_XML_NS_NONE */ 917251876Speter if (*uri == '\0') 918251876Speter return APR_XML_NS_NONE; 919251876Speter 920251876Speter for (i = uri_array->nelts; i--;) { 921251876Speter if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0) 922251876Speter return i; 923251876Speter } 924251876Speter 925251876Speter pelt = apr_array_push(uri_array); 926251876Speter *pelt = uri; /* assume uri is const or in a pool */ 927251876Speter return uri_array->nelts - 1; 928251876Speter} 929251876Speter 930251876Speter/* convert the element to EBCDIC */ 931251876Speter#if APR_CHARSET_EBCDIC 932251876Speterstatic apr_status_t apr_xml_parser_convert_elem(apr_xml_elem *e, 933251876Speter apr_xlate_t *convset) 934251876Speter{ 935251876Speter apr_xml_attr *a; 936251876Speter apr_xml_elem *ec; 937251876Speter apr_text *t; 938251876Speter apr_size_t inbytes_left, outbytes_left; 939251876Speter apr_status_t status; 940251876Speter 941251876Speter inbytes_left = outbytes_left = strlen(e->name); 942251876Speter status = apr_xlate_conv_buffer(convset, e->name, &inbytes_left, (char *) e->name, &outbytes_left); 943251876Speter if (status) { 944251876Speter return status; 945251876Speter } 946251876Speter 947251876Speter for (t = e->first_cdata.first; t != NULL; t = t->next) { 948251876Speter inbytes_left = outbytes_left = strlen(t->text); 949251876Speter status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left); 950251876Speter if (status) { 951251876Speter return status; 952251876Speter } 953251876Speter } 954251876Speter 955251876Speter for (t = e->following_cdata.first; t != NULL; t = t->next) { 956251876Speter inbytes_left = outbytes_left = strlen(t->text); 957251876Speter status = apr_xlate_conv_buffer(convset, t->text, &inbytes_left, (char *) t->text, &outbytes_left); 958251876Speter if (status) { 959251876Speter return status; 960251876Speter } 961251876Speter } 962251876Speter 963251876Speter for (a = e->attr; a != NULL; a = a->next) { 964251876Speter inbytes_left = outbytes_left = strlen(a->name); 965251876Speter status = apr_xlate_conv_buffer(convset, a->name, &inbytes_left, (char *) a->name, &outbytes_left); 966251876Speter if (status) { 967251876Speter return status; 968251876Speter } 969251876Speter inbytes_left = outbytes_left = strlen(a->value); 970251876Speter status = apr_xlate_conv_buffer(convset, a->value, &inbytes_left, (char *) a->value, &outbytes_left); 971251876Speter if (status) { 972251876Speter return status; 973251876Speter } 974251876Speter } 975251876Speter 976251876Speter for (ec = e->first_child; ec != NULL; ec = ec->next) { 977251876Speter status = apr_xml_parser_convert_elem(ec, convset); 978251876Speter if (status) { 979251876Speter return status; 980251876Speter } 981251876Speter } 982251876Speter return APR_SUCCESS; 983251876Speter} 984251876Speter 985251876Speter/* convert the whole document to EBCDIC */ 986251876SpeterAPU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool, 987251876Speter apr_xml_doc *pdoc, 988251876Speter apr_xlate_t *convset) 989251876Speter{ 990251876Speter apr_status_t status; 991251876Speter /* Don't convert the namespaces: they are constant! */ 992251876Speter if (pdoc->namespaces != NULL) { 993251876Speter int i; 994251876Speter apr_array_header_t *namespaces; 995251876Speter namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *)); 996251876Speter if (namespaces == NULL) 997251876Speter return APR_ENOMEM; 998251876Speter for (i = 0; i < pdoc->namespaces->nelts; i++) { 999251876Speter apr_size_t inbytes_left, outbytes_left; 1000251876Speter char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i); 1001251876Speter ptr = apr_pstrdup(pool, ptr); 1002251876Speter if ( ptr == NULL) 1003251876Speter return APR_ENOMEM; 1004251876Speter inbytes_left = outbytes_left = strlen(ptr); 1005251876Speter status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left); 1006251876Speter if (status) { 1007251876Speter return status; 1008251876Speter } 1009251876Speter apr_xml_insert_uri(namespaces, ptr); 1010251876Speter } 1011251876Speter pdoc->namespaces = namespaces; 1012251876Speter } 1013251876Speter return apr_xml_parser_convert_elem(pdoc->root, convset); 1014251876Speter} 1015251876Speter#endif 1016