xml.c revision 309512
1/*
2 * xml.c :  standard XML parsing routines for ra_serf
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#include <apr_uri.h>
27#include <expat.h>
28#include <serf.h>
29
30#include "svn_hash.h"
31#include "svn_pools.h"
32#include "svn_ra.h"
33#include "svn_dav.h"
34#include "svn_xml.h"
35#include "../libsvn_ra/ra_loader.h"
36#include "svn_config.h"
37#include "svn_delta.h"
38#include "svn_path.h"
39
40#include "svn_private_config.h"
41#include "private/svn_string_private.h"
42
43#include "ra_serf.h"
44
45
46/* Fix for older expat 1.95.x's that do not define
47 * XML_STATUS_OK/XML_STATUS_ERROR
48 */
49#ifndef XML_STATUS_OK
50#define XML_STATUS_OK    1
51#define XML_STATUS_ERROR 0
52#endif
53
54#ifndef XML_VERSION_AT_LEAST
55#define XML_VERSION_AT_LEAST(major,minor,patch)                  \
56(((major) < XML_MAJOR_VERSION)                                       \
57 || ((major) == XML_MAJOR_VERSION && (minor) < XML_MINOR_VERSION)    \
58 || ((major) == XML_MAJOR_VERSION && (minor) == XML_MINOR_VERSION && \
59     (patch) <= XML_MICRO_VERSION))
60#endif /* XML_VERSION_AT_LEAST */
61
62/* Read/write chunks of this size into the spillbuf.  */
63#define PARSE_CHUNK_SIZE 8000
64
65
66struct svn_ra_serf__xml_context_t {
67  /* Current state information.  */
68  svn_ra_serf__xml_estate_t *current;
69
70  /* If WAITING >= then we are waiting for an element to close before
71     resuming events. The number stored here is the amount of nested
72     elements open. The Xml parser will make sure the document is well
73     formed. */
74  int waiting;
75
76  /* The transition table.  */
77  const svn_ra_serf__xml_transition_t *ttable;
78
79  /* The callback information.  */
80  svn_ra_serf__xml_opened_t opened_cb;
81  svn_ra_serf__xml_closed_t closed_cb;
82  svn_ra_serf__xml_cdata_t cdata_cb;
83  void *baton;
84
85  /* Linked list of free states.  */
86  svn_ra_serf__xml_estate_t *free_states;
87
88#ifdef SVN_DEBUG
89  /* Used to verify we are not re-entering a callback, specifically to
90     ensure SCRATCH_POOL is not cleared while an outer callback is
91     trying to use it.  */
92  svn_boolean_t within_callback;
93#define START_CALLBACK(xmlctx) \
94  do {                                                    \
95    svn_ra_serf__xml_context_t *xmlctx__tmp = (xmlctx);   \
96    SVN_ERR_ASSERT(!xmlctx__tmp->within_callback);        \
97    xmlctx__tmp->within_callback = TRUE;                  \
98  } while (0)
99#define END_CALLBACK(xmlctx) ((xmlctx)->within_callback = FALSE)
100#else
101#define START_CALLBACK(xmlctx)  /* empty */
102#define END_CALLBACK(xmlctx)  /* empty */
103#endif /* SVN_DEBUG  */
104
105  apr_pool_t *scratch_pool;
106
107};
108
109/* Structure which represents an XML namespace. */
110typedef struct svn_ra_serf__ns_t {
111  /* The assigned name. */
112  const char *xmlns;
113  /* The full URL for this namespace. */
114  const char *url;
115  /* The next namespace in our list. */
116  struct svn_ra_serf__ns_t *next;
117} svn_ra_serf__ns_t;
118
119struct svn_ra_serf__xml_estate_t {
120  /* The current state value.  */
121  int state;
122
123  /* The xml tag that opened this state. Waiting for the tag to close.  */
124  svn_ra_serf__dav_props_t tag;
125
126  /* Should the CLOSED_CB function be called for custom processing when
127     this tag is closed?  */
128  svn_boolean_t custom_close;
129
130  /* A pool may be constructed for this state.  */
131  apr_pool_t *state_pool;
132
133  /* The namespaces extent for this state/element. This will start with
134     the parent's NS_LIST, and we will push new namespaces into our
135     local list. The parent will be unaffected by our locally-scoped data. */
136  svn_ra_serf__ns_t *ns_list;
137
138  /* Any collected attribute values. char * -> svn_string_t *. May be NULL
139     if no attributes have been collected.  */
140  apr_hash_t *attrs;
141
142  /* Any collected cdata. May be NULL if no cdata is being collected.  */
143  svn_stringbuf_t *cdata;
144
145  /* Previous/outer state.  */
146  svn_ra_serf__xml_estate_t *prev;
147
148};
149
150struct expat_ctx_t {
151  svn_ra_serf__xml_context_t *xmlctx;
152  XML_Parser parser;
153  svn_ra_serf__handler_t *handler;
154  const int *expected_status;
155
156  svn_error_t *inner_error;
157
158  /* Do not use this pool for allocation. It is merely recorded for running
159     the cleanup handler.  */
160  apr_pool_t *cleanup_pool;
161};
162
163
164static void
165define_namespaces(svn_ra_serf__ns_t **ns_list,
166                  const char *const *attrs,
167                  apr_pool_t *(*get_pool)(void *baton),
168                  void *baton)
169{
170  const char *const *tmp_attrs = attrs;
171
172  for (tmp_attrs = attrs; *tmp_attrs != NULL; tmp_attrs += 2)
173    {
174      if (strncmp(*tmp_attrs, "xmlns", 5) == 0)
175        {
176          const svn_ra_serf__ns_t *cur_ns;
177          svn_boolean_t found = FALSE;
178          const char *prefix;
179
180          /* The empty prefix, or a named-prefix.  */
181          if (tmp_attrs[0][5] == ':')
182            prefix = &tmp_attrs[0][6];
183          else
184            prefix = "";
185
186          /* Have we already defined this ns previously? */
187          for (cur_ns = *ns_list; cur_ns; cur_ns = cur_ns->next)
188            {
189              if (strcmp(cur_ns->xmlns, prefix) == 0)
190                {
191                  found = TRUE;
192                  break;
193                }
194            }
195
196          if (!found)
197            {
198              apr_pool_t *pool;
199              svn_ra_serf__ns_t *new_ns;
200
201              if (get_pool)
202                pool = get_pool(baton);
203              else
204                pool = baton;
205              new_ns = apr_palloc(pool, sizeof(*new_ns));
206              new_ns->xmlns = apr_pstrdup(pool, prefix);
207              new_ns->url = apr_pstrdup(pool, tmp_attrs[1]);
208
209              /* Push into the front of NS_LIST. Parent states will point
210                 to later in the chain, so will be unaffected by
211                 shadowing/other namespaces pushed onto NS_LIST.  */
212              new_ns->next = *ns_list;
213              *ns_list = new_ns;
214            }
215        }
216    }
217}
218
219/*
220 * Look up @a name in the @a ns_list list for previously declared namespace
221 * definitions.
222 *
223 * Return (in @a *returned_prop_name) a #svn_ra_serf__dav_props_t tuple
224 * representing the expanded name.
225 */
226static void
227expand_ns(svn_ra_serf__dav_props_t *returned_prop_name,
228                       const svn_ra_serf__ns_t *ns_list,
229                       const char *name)
230{
231  const char *colon;
232
233  colon = strchr(name, ':');
234  if (colon)
235    {
236      const svn_ra_serf__ns_t *ns;
237
238      for (ns = ns_list; ns; ns = ns->next)
239        {
240          if (strncmp(ns->xmlns, name, colon - name) == 0)
241            {
242              returned_prop_name->xmlns = ns->url;
243              returned_prop_name->name = colon + 1;
244              return;
245            }
246        }
247    }
248  else
249    {
250      const svn_ra_serf__ns_t *ns;
251
252      for (ns = ns_list; ns; ns = ns->next)
253        {
254          if (! ns->xmlns[0])
255            {
256              returned_prop_name->xmlns = ns->url;
257              returned_prop_name->name = name;
258              return;
259            }
260        }
261    }
262
263  /* If the prefix is not found, then the name is NOT within a
264     namespace.  */
265  returned_prop_name->xmlns = "";
266  returned_prop_name->name = name;
267}
268
269
270#define XML_HEADER "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
271
272void
273svn_ra_serf__add_xml_header_buckets(serf_bucket_t *agg_bucket,
274                                    serf_bucket_alloc_t *bkt_alloc)
275{
276  serf_bucket_t *tmp;
277
278  tmp = SERF_BUCKET_SIMPLE_STRING_LEN(XML_HEADER, sizeof(XML_HEADER) - 1,
279                                      bkt_alloc);
280  serf_bucket_aggregate_append(agg_bucket, tmp);
281}
282
283void
284svn_ra_serf__add_open_tag_buckets(serf_bucket_t *agg_bucket,
285                                  serf_bucket_alloc_t *bkt_alloc,
286                                  const char *tag, ...)
287{
288  va_list ap;
289  const char *key;
290  serf_bucket_t *tmp;
291
292  tmp = SERF_BUCKET_SIMPLE_STRING_LEN("<", 1, bkt_alloc);
293  serf_bucket_aggregate_append(agg_bucket, tmp);
294
295  tmp = SERF_BUCKET_SIMPLE_STRING(tag, bkt_alloc);
296  serf_bucket_aggregate_append(agg_bucket, tmp);
297
298  va_start(ap, tag);
299  while ((key = va_arg(ap, char *)) != NULL)
300    {
301      const char *val = va_arg(ap, const char *);
302      if (val)
303        {
304          tmp = SERF_BUCKET_SIMPLE_STRING_LEN(" ", 1, bkt_alloc);
305          serf_bucket_aggregate_append(agg_bucket, tmp);
306
307          tmp = SERF_BUCKET_SIMPLE_STRING(key, bkt_alloc);
308          serf_bucket_aggregate_append(agg_bucket, tmp);
309
310          tmp = SERF_BUCKET_SIMPLE_STRING_LEN("=\"", 2, bkt_alloc);
311          serf_bucket_aggregate_append(agg_bucket, tmp);
312
313          tmp = SERF_BUCKET_SIMPLE_STRING(val, bkt_alloc);
314          serf_bucket_aggregate_append(agg_bucket, tmp);
315
316          tmp = SERF_BUCKET_SIMPLE_STRING_LEN("\"", 1, bkt_alloc);
317          serf_bucket_aggregate_append(agg_bucket, tmp);
318        }
319    }
320  va_end(ap);
321
322  tmp = SERF_BUCKET_SIMPLE_STRING_LEN(">", 1, bkt_alloc);
323  serf_bucket_aggregate_append(agg_bucket, tmp);
324}
325
326void
327svn_ra_serf__add_empty_tag_buckets(serf_bucket_t *agg_bucket,
328                                   serf_bucket_alloc_t *bkt_alloc,
329                                   const char *tag, ...)
330{
331  va_list ap;
332  const char *key;
333  serf_bucket_t *tmp;
334
335  tmp = SERF_BUCKET_SIMPLE_STRING_LEN("<", 1, bkt_alloc);
336  serf_bucket_aggregate_append(agg_bucket, tmp);
337
338  tmp = SERF_BUCKET_SIMPLE_STRING(tag, bkt_alloc);
339  serf_bucket_aggregate_append(agg_bucket, tmp);
340
341  va_start(ap, tag);
342  while ((key = va_arg(ap, char *)) != NULL)
343    {
344      const char *val = va_arg(ap, const char *);
345      if (val)
346        {
347          tmp = SERF_BUCKET_SIMPLE_STRING_LEN(" ", 1, bkt_alloc);
348          serf_bucket_aggregate_append(agg_bucket, tmp);
349
350          tmp = SERF_BUCKET_SIMPLE_STRING(key, bkt_alloc);
351          serf_bucket_aggregate_append(agg_bucket, tmp);
352
353          tmp = SERF_BUCKET_SIMPLE_STRING_LEN("=\"", 2, bkt_alloc);
354          serf_bucket_aggregate_append(agg_bucket, tmp);
355
356          tmp = SERF_BUCKET_SIMPLE_STRING(val, bkt_alloc);
357          serf_bucket_aggregate_append(agg_bucket, tmp);
358
359          tmp = SERF_BUCKET_SIMPLE_STRING_LEN("\"", 1, bkt_alloc);
360          serf_bucket_aggregate_append(agg_bucket, tmp);
361        }
362    }
363  va_end(ap);
364
365  tmp = SERF_BUCKET_SIMPLE_STRING_LEN("/>", 2, bkt_alloc);
366  serf_bucket_aggregate_append(agg_bucket, tmp);
367}
368
369void
370svn_ra_serf__add_close_tag_buckets(serf_bucket_t *agg_bucket,
371                                   serf_bucket_alloc_t *bkt_alloc,
372                                   const char *tag)
373{
374  serf_bucket_t *tmp;
375
376  tmp = SERF_BUCKET_SIMPLE_STRING_LEN("</", 2, bkt_alloc);
377  serf_bucket_aggregate_append(agg_bucket, tmp);
378
379  tmp = SERF_BUCKET_SIMPLE_STRING(tag, bkt_alloc);
380  serf_bucket_aggregate_append(agg_bucket, tmp);
381
382  tmp = SERF_BUCKET_SIMPLE_STRING_LEN(">", 1, bkt_alloc);
383  serf_bucket_aggregate_append(agg_bucket, tmp);
384}
385
386void
387svn_ra_serf__add_cdata_len_buckets(serf_bucket_t *agg_bucket,
388                                   serf_bucket_alloc_t *bkt_alloc,
389                                   const char *data, apr_size_t len)
390{
391  const char *end = data + len;
392  const char *p = data, *q;
393  serf_bucket_t *tmp_bkt;
394
395  while (1)
396    {
397      /* Find a character which needs to be quoted and append bytes up
398         to that point.  Strictly speaking, '>' only needs to be
399         quoted if it follows "]]", but it's easier to quote it all
400         the time.
401
402         So, why are we escaping '\r' here?  Well, according to the
403         XML spec, '\r\n' gets converted to '\n' during XML parsing.
404         Also, any '\r' not followed by '\n' is converted to '\n'.  By
405         golly, if we say we want to escape a '\r', we want to make
406         sure it remains a '\r'!  */
407      q = p;
408      while (q < end && *q != '&' && *q != '<' && *q != '>' && *q != '\r')
409        q++;
410
411
412      tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN(p, q - p, bkt_alloc);
413      serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
414
415      /* We may already be a winner.  */
416      if (q == end)
417        break;
418
419      /* Append the entity reference for the character.  */
420      if (*q == '&')
421        {
422          tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("&amp;", sizeof("&amp;") - 1,
423                                                  bkt_alloc);
424          serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
425        }
426      else if (*q == '<')
427        {
428          tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("&lt;", sizeof("&lt;") - 1,
429                                                  bkt_alloc);
430          serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
431        }
432      else if (*q == '>')
433        {
434          tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("&gt;", sizeof("&gt;") - 1,
435                                                  bkt_alloc);
436          serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
437        }
438      else if (*q == '\r')
439        {
440          tmp_bkt = SERF_BUCKET_SIMPLE_STRING_LEN("&#13;", sizeof("&#13;") - 1,
441                                                  bkt_alloc);
442          serf_bucket_aggregate_append(agg_bucket, tmp_bkt);
443        }
444
445      p = q + 1;
446    }
447}
448
449void svn_ra_serf__add_tag_buckets(serf_bucket_t *agg_bucket, const char *tag,
450                                  const char *value,
451                                  serf_bucket_alloc_t *bkt_alloc)
452{
453  svn_ra_serf__add_open_tag_buckets(agg_bucket, bkt_alloc, tag, SVN_VA_NULL);
454
455  if (value)
456    {
457      svn_ra_serf__add_cdata_len_buckets(agg_bucket, bkt_alloc,
458                                         value, strlen(value));
459    }
460
461  svn_ra_serf__add_close_tag_buckets(agg_bucket, bkt_alloc, tag);
462}
463
464/* Return a pool for XES to use for self-alloc (and other specifics).  */
465static apr_pool_t *
466xes_pool(const svn_ra_serf__xml_estate_t *xes)
467{
468  /* Move up through parent states looking for one with a pool. This
469     will always terminate since the initial state has a pool.  */
470  while (xes->state_pool == NULL)
471    xes = xes->prev;
472  return xes->state_pool;
473}
474
475
476static void
477ensure_pool(svn_ra_serf__xml_estate_t *xes)
478{
479  if (xes->state_pool == NULL)
480    xes->state_pool = svn_pool_create(xes_pool(xes));
481}
482
483
484/* This callback is used by define_namespaces() to wait until a pool is
485   required before constructing it.  */
486static apr_pool_t *
487lazy_create_pool(void *baton)
488{
489  svn_ra_serf__xml_estate_t *xes = baton;
490
491  ensure_pool(xes);
492  return xes->state_pool;
493}
494
495svn_error_t *
496svn_ra_serf__xml_context_done(svn_ra_serf__xml_context_t *xmlctx)
497{
498  if (xmlctx->current->prev)
499    {
500      /* Probably unreachable as this would be an xml parser error */
501      return svn_error_createf(SVN_ERR_XML_MALFORMED, NULL,
502                               _("XML stream truncated: closing '%s' missing"),
503                               xmlctx->current->tag.name);
504    }
505  else if (! xmlctx->free_states)
506    {
507      /* If we have no items on the free_states list, we didn't push anything,
508         which tells us that we found an empty xml body */
509      const svn_ra_serf__xml_transition_t *scan;
510      const svn_ra_serf__xml_transition_t *document = NULL;
511      const char *msg;
512
513      for (scan = xmlctx->ttable; scan->ns != NULL; ++scan)
514        {
515          if (scan->from_state == XML_STATE_INITIAL)
516            {
517              if (document != NULL)
518                {
519                  document = NULL; /* Multiple document elements defined */
520                  break;
521                }
522              document = scan;
523            }
524        }
525
526      if (document)
527        msg = apr_psprintf(xmlctx->scratch_pool, "'%s' element not found",
528                            document->name);
529      else
530        msg = _("document element not found");
531
532      return svn_error_createf(SVN_ERR_XML_MALFORMED, NULL,
533                               _("XML stream truncated: %s"),
534                               msg);
535    }
536
537  svn_pool_destroy(xmlctx->scratch_pool);
538  return SVN_NO_ERROR;
539}
540
541svn_ra_serf__xml_context_t *
542svn_ra_serf__xml_context_create(
543  const svn_ra_serf__xml_transition_t *ttable,
544  svn_ra_serf__xml_opened_t opened_cb,
545  svn_ra_serf__xml_closed_t closed_cb,
546  svn_ra_serf__xml_cdata_t cdata_cb,
547  void *baton,
548  apr_pool_t *result_pool)
549{
550  svn_ra_serf__xml_context_t *xmlctx;
551  svn_ra_serf__xml_estate_t *xes;
552
553  xmlctx = apr_pcalloc(result_pool, sizeof(*xmlctx));
554  xmlctx->ttable = ttable;
555  xmlctx->opened_cb = opened_cb;
556  xmlctx->closed_cb = closed_cb;
557  xmlctx->cdata_cb = cdata_cb;
558  xmlctx->baton = baton;
559  xmlctx->scratch_pool = svn_pool_create(result_pool);
560
561  xes = apr_pcalloc(result_pool, sizeof(*xes));
562  /* XES->STATE == 0  */
563
564  /* Child states may use this pool to allocate themselves. If a child
565     needs to collect information, then it will construct a subpool and
566     will use that to allocate itself and its collected data.  */
567  xes->state_pool = result_pool;
568
569  xmlctx->current = xes;
570
571  return xmlctx;
572}
573
574
575apr_hash_t *
576svn_ra_serf__xml_gather_since(svn_ra_serf__xml_estate_t *xes,
577                              int stop_state)
578{
579  apr_hash_t *data;
580  apr_pool_t *pool;
581
582  ensure_pool(xes);
583  pool = xes->state_pool;
584
585  data = apr_hash_make(pool);
586
587  for (; xes != NULL; xes = xes->prev)
588    {
589      if (xes->attrs != NULL)
590        {
591          apr_hash_index_t *hi;
592
593          for (hi = apr_hash_first(pool, xes->attrs); hi;
594               hi = apr_hash_next(hi))
595            {
596              const void *key;
597              apr_ssize_t klen;
598              void *val;
599
600              /* Parent name/value lifetimes are at least as long as POOL.  */
601              apr_hash_this(hi, &key, &klen, &val);
602              apr_hash_set(data, key, klen, val);
603            }
604        }
605
606      if (xes->state == stop_state)
607        break;
608    }
609
610  return data;
611}
612
613
614void
615svn_ra_serf__xml_note(svn_ra_serf__xml_estate_t *xes,
616                      int state,
617                      const char *name,
618                      const char *value)
619{
620  svn_ra_serf__xml_estate_t *scan;
621
622  for (scan = xes; scan != NULL && scan->state != state; scan = scan->prev)
623    /* pass */ ;
624
625  SVN_ERR_ASSERT_NO_RETURN(scan != NULL);
626
627  /* Make sure the target state has a pool.  */
628  ensure_pool(scan);
629
630  /* ... and attribute storage.  */
631  if (scan->attrs == NULL)
632    scan->attrs = apr_hash_make(scan->state_pool);
633
634  /* In all likelihood, NAME is a string constant. But we can't really
635     be sure. And it isn't like we're storing a billion of these into
636     the state pool.  */
637  svn_hash_sets(scan->attrs,
638                apr_pstrdup(scan->state_pool, name),
639                apr_pstrdup(scan->state_pool, value));
640}
641
642
643apr_pool_t *
644svn_ra_serf__xml_state_pool(svn_ra_serf__xml_estate_t *xes)
645{
646  /* If they asked for a pool, then ensure that we have one to provide.  */
647  ensure_pool(xes);
648
649  return xes->state_pool;
650}
651
652
653static svn_error_t *
654xml_cb_start(svn_ra_serf__xml_context_t *xmlctx,
655             const char *raw_name,
656             const char *const *attrs)
657{
658  svn_ra_serf__xml_estate_t *current = xmlctx->current;
659  svn_ra_serf__dav_props_t elemname;
660  const svn_ra_serf__xml_transition_t *scan;
661  apr_pool_t *new_pool;
662  svn_ra_serf__xml_estate_t *new_xes;
663
664  /* If we're waiting for an element to close, then just ignore all
665     other element-opens.  */
666  if (xmlctx->waiting > 0)
667    {
668      xmlctx->waiting++;
669      return SVN_NO_ERROR;
670    }
671
672  /* Look for xmlns: attributes. Lazily create the state pool if any
673     were found.  */
674  define_namespaces(&current->ns_list, attrs, lazy_create_pool, current);
675
676  expand_ns(&elemname, current->ns_list, raw_name);
677
678  for (scan = xmlctx->ttable; scan->ns != NULL; ++scan)
679    {
680      if (scan->from_state != current->state)
681        continue;
682
683      /* Wildcard tag match.  */
684      if (*scan->name == '*')
685        break;
686
687      /* Found a specific transition.  */
688      if (strcmp(elemname.name, scan->name) == 0
689          && strcmp(elemname.xmlns, scan->ns) == 0)
690        break;
691    }
692  if (scan->ns == NULL)
693    {
694      if (current->state == XML_STATE_INITIAL)
695        {
696          return svn_error_createf(
697                        SVN_ERR_XML_UNEXPECTED_ELEMENT, NULL,
698                        _("XML Parsing failed: Unexpected root element '%s'"),
699                        elemname.name);
700        }
701
702      xmlctx->waiting++; /* Start waiting for the close tag */
703      return SVN_NO_ERROR;
704    }
705
706  /* We should not be told to collect cdata if the closed_cb will not
707     be called.  */
708  SVN_ERR_ASSERT(!scan->collect_cdata || scan->custom_close);
709
710  /* Found a transition. Make it happen.  */
711
712  /* ### todo. push state  */
713
714  /* ### how to use free states?  */
715  /* This state should be allocated in the extent pool. If we will be
716     collecting information for this state, then construct a subpool.
717
718     ### potentially optimize away the subpool if none of the
719     ### attributes are present. subpools are cheap, tho...  */
720  new_pool = xes_pool(current);
721  if (scan->collect_cdata || scan->collect_attrs[0])
722    {
723      new_pool = svn_pool_create(new_pool);
724
725      /* Prep the new state.  */
726      new_xes = apr_pcalloc(new_pool, sizeof(*new_xes));
727      new_xes->state_pool = new_pool;
728
729      /* If we're supposed to collect cdata, then set up a buffer for
730         this. The existence of this buffer will instruct our cdata
731         callback to collect the cdata.  */
732      if (scan->collect_cdata)
733        new_xes->cdata = svn_stringbuf_create_empty(new_pool);
734
735      if (scan->collect_attrs[0] != NULL)
736        {
737          const char *const *saveattr = &scan->collect_attrs[0];
738
739          new_xes->attrs = apr_hash_make(new_pool);
740          for (; *saveattr != NULL; ++saveattr)
741            {
742              const char *name;
743              const char *value;
744
745              if (**saveattr == '?')
746                {
747                  name = *saveattr + 1;
748                  value = svn_xml_get_attr_value(name, attrs);
749                }
750              else
751                {
752                  name = *saveattr;
753                  value = svn_xml_get_attr_value(name, attrs);
754                  if (value == NULL)
755                    return svn_error_createf(
756                                SVN_ERR_XML_ATTRIB_NOT_FOUND,
757                                NULL,
758                                _("Missing XML attribute '%s' on '%s' element"),
759                                name, scan->name);
760                }
761
762              if (value)
763                svn_hash_sets(new_xes->attrs, name,
764                              apr_pstrdup(new_pool, value));
765            }
766        }
767    }
768  else
769    {
770      /* Prep the new state.  */
771      new_xes = apr_pcalloc(new_pool, sizeof(*new_xes));
772      /* STATE_POOL remains NULL.  */
773    }
774
775  /* Some basic copies to set up the new estate.  */
776  new_xes->state = scan->to_state;
777  new_xes->tag.name = apr_pstrdup(new_pool, elemname.name);
778  new_xes->tag.xmlns = apr_pstrdup(new_pool, elemname.xmlns);
779  new_xes->custom_close = scan->custom_close;
780
781  /* Start with the parent's namespace set.  */
782  new_xes->ns_list = current->ns_list;
783
784  /* The new state is prepared. Make it current.  */
785  new_xes->prev = current;
786  xmlctx->current = new_xes;
787
788  if (xmlctx->opened_cb)
789    {
790      START_CALLBACK(xmlctx);
791      SVN_ERR(xmlctx->opened_cb(new_xes, xmlctx->baton,
792                                new_xes->state, &new_xes->tag,
793                                xmlctx->scratch_pool));
794      END_CALLBACK(xmlctx);
795      svn_pool_clear(xmlctx->scratch_pool);
796    }
797
798  return SVN_NO_ERROR;
799}
800
801
802static svn_error_t *
803xml_cb_end(svn_ra_serf__xml_context_t *xmlctx,
804           const char *raw_name)
805{
806  svn_ra_serf__xml_estate_t *xes = xmlctx->current;
807
808  if (xmlctx->waiting > 0)
809    {
810      xmlctx->waiting--;
811      return SVN_NO_ERROR;
812    }
813
814  if (xes->custom_close)
815    {
816      const svn_string_t *cdata;
817
818      if (xes->cdata)
819        {
820          cdata = svn_stringbuf__morph_into_string(xes->cdata);
821#ifdef SVN_DEBUG
822          /* We might toss the pool holding this structure, but it could also
823             be within a parent pool. In any case, for safety's sake, disable
824             the stringbuf against future Badness.  */
825          xes->cdata->pool = NULL;
826#endif
827        }
828      else
829        cdata = NULL;
830
831      START_CALLBACK(xmlctx);
832      SVN_ERR(xmlctx->closed_cb(xes, xmlctx->baton, xes->state,
833                                cdata, xes->attrs,
834                                xmlctx->scratch_pool));
835      END_CALLBACK(xmlctx);
836      svn_pool_clear(xmlctx->scratch_pool);
837    }
838
839  /* Pop the state.  */
840  xmlctx->current = xes->prev;
841
842  /* ### not everything should go on the free state list. XES may go
843     ### away with the state pool.  */
844  xes->prev = xmlctx->free_states;
845  xmlctx->free_states = xes;
846
847  /* If there is a STATE_POOL, then toss it. This will get rid of as much
848     memory as possible. Potentially the XES (if we didn't create a pool
849     right away, then XES may be in a parent pool).  */
850  if (xes->state_pool)
851    svn_pool_destroy(xes->state_pool);
852
853  return SVN_NO_ERROR;
854}
855
856
857static svn_error_t *
858xml_cb_cdata(svn_ra_serf__xml_context_t *xmlctx,
859             const char *data,
860             apr_size_t len)
861{
862  /* If we are waiting for a closing tag, then we are uninterested in
863     the cdata. Just return.  */
864  if (xmlctx->waiting > 0)
865    return SVN_NO_ERROR;
866
867  /* If the current state is collecting cdata, then copy the cdata.  */
868  if (xmlctx->current->cdata != NULL)
869    {
870      svn_stringbuf_appendbytes(xmlctx->current->cdata, data, len);
871    }
872  /* ... else if a CDATA_CB has been supplied, then invoke it for
873     all states.  */
874  else if (xmlctx->cdata_cb != NULL)
875    {
876      START_CALLBACK(xmlctx);
877      SVN_ERR(xmlctx->cdata_cb(xmlctx->current,
878                               xmlctx->baton,
879                               xmlctx->current->state,
880                               data, len,
881                               xmlctx->scratch_pool));
882      END_CALLBACK(xmlctx);
883      svn_pool_clear(xmlctx->scratch_pool);
884    }
885
886  return SVN_NO_ERROR;
887}
888
889/* svn_error_t * wrapper around XML_Parse */
890static APR_INLINE svn_error_t *
891parse_xml(struct expat_ctx_t *ectx, const char *data, apr_size_t len, svn_boolean_t is_final)
892{
893  int xml_status = XML_Parse(ectx->parser, data, (int)len, is_final);
894  const char *msg;
895  int xml_code;
896
897  if (xml_status == XML_STATUS_OK)
898    return ectx->inner_error;
899
900  xml_code = XML_GetErrorCode(ectx->parser);
901
902#if XML_VERSION_AT_LEAST(1, 95, 8)
903  /* If we called XML_StopParser() expat will return an abort error. If we
904     have a better error stored we should ignore it as it will not help
905     the end-user to store it in the error chain. */
906  if (xml_code == XML_ERROR_ABORTED && ectx->inner_error)
907    return ectx->inner_error;
908#endif
909
910  msg = XML_ErrorString(xml_code);
911
912  return svn_error_compose_create(
913            ectx->inner_error,
914            svn_error_create(SVN_ERR_RA_DAV_MALFORMED_DATA,
915                             svn_error_createf(SVN_ERR_XML_MALFORMED, NULL,
916                                               _("Malformed XML: %s"),
917                                               msg),
918                             _("The XML response contains invalid XML")));
919}
920
921/* Apr pool cleanup handler to release an XML_Parser in success and error
922   conditions */
923static apr_status_t
924xml_parser_cleanup(void *baton)
925{
926  XML_Parser *xmlp = baton;
927
928  if (*xmlp)
929    {
930      (void) XML_ParserFree(*xmlp);
931      *xmlp = NULL;
932    }
933
934  return APR_SUCCESS;
935}
936
937/* Conforms to Expat's XML_StartElementHandler  */
938static void
939expat_start(void *userData, const char *raw_name, const char **attrs)
940{
941  struct expat_ctx_t *ectx = userData;
942
943  if (ectx->inner_error != NULL)
944    return;
945
946  ectx->inner_error = svn_error_trace(xml_cb_start(ectx->xmlctx,
947                                                   raw_name, attrs));
948
949#if XML_VERSION_AT_LEAST(1, 95, 8)
950  if (ectx->inner_error)
951    (void) XML_StopParser(ectx->parser, 0 /* resumable */);
952#endif
953}
954
955
956/* Conforms to Expat's XML_EndElementHandler  */
957static void
958expat_end(void *userData, const char *raw_name)
959{
960  struct expat_ctx_t *ectx = userData;
961
962  if (ectx->inner_error != NULL)
963    return;
964
965  ectx->inner_error = svn_error_trace(xml_cb_end(ectx->xmlctx, raw_name));
966
967#if XML_VERSION_AT_LEAST(1, 95, 8)
968  if (ectx->inner_error)
969    (void) XML_StopParser(ectx->parser, 0 /* resumable */);
970#endif
971}
972
973
974/* Conforms to Expat's XML_CharacterDataHandler  */
975static void
976expat_cdata(void *userData, const char *data, int len)
977{
978  struct expat_ctx_t *ectx = userData;
979
980  if (ectx->inner_error != NULL)
981    return;
982
983  ectx->inner_error = svn_error_trace(xml_cb_cdata(ectx->xmlctx, data, len));
984
985#if XML_VERSION_AT_LEAST(1, 95, 8)
986  if (ectx->inner_error)
987    (void) XML_StopParser(ectx->parser, 0 /* resumable */);
988#endif
989}
990
991#if XML_VERSION_AT_LEAST(1, 95, 8)
992static void
993expat_entity_declaration(void *userData,
994                         const XML_Char *entityName,
995                         int is_parameter_entity,
996                         const XML_Char *value,
997                         int value_length,
998                         const XML_Char *base,
999                         const XML_Char *systemId,
1000                         const XML_Char *publicId,
1001                         const XML_Char *notationName)
1002{
1003  struct expat_ctx_t *ectx = userData;
1004
1005  /* Stop the parser if an entity declaration is hit. */
1006  XML_StopParser(ectx->parser, 0 /* resumable */);
1007}
1008#else
1009/* A noop default_handler. */
1010static void
1011expat_default_handler(void *userData, const XML_Char *s, int len)
1012{
1013}
1014#endif
1015
1016/* Implements svn_ra_serf__response_handler_t */
1017static svn_error_t *
1018expat_response_handler(serf_request_t *request,
1019                       serf_bucket_t *response,
1020                       void *baton,
1021                       apr_pool_t *scratch_pool)
1022{
1023  struct expat_ctx_t *ectx = baton;
1024  svn_boolean_t got_expected_status;
1025
1026  if (ectx->expected_status)
1027    {
1028      const int *status = ectx->expected_status;
1029      got_expected_status = FALSE;
1030
1031      while (*status && ectx->handler->sline.code != *status)
1032        status++;
1033
1034      got_expected_status = (*status) != 0;
1035    }
1036  else
1037    got_expected_status = (ectx->handler->sline.code == 200);
1038
1039  if (!ectx->handler->server_error
1040      && ((ectx->handler->sline.code < 200) || (ectx->handler->sline.code >= 300)
1041          || ! got_expected_status))
1042    {
1043      /* By deferring to expect_empty_body(), it will make a choice on
1044         how to handle the body. Whatever the decision, the core handler
1045         will take over, and we will not be called again.  */
1046
1047      /* ### This handles xml bodies as svn-errors (returned via serf context
1048         ### loop), but ignores non-xml errors.
1049
1050         Current code depends on this behavior and checks itself while other
1051         continues, and then verifies if work has been performed.
1052
1053         ### TODO: Make error checking consistent */
1054
1055      /* ### If !GOT_EXPECTED_STATUS, this should always produce an error */
1056      return svn_error_trace(svn_ra_serf__expect_empty_body(
1057                               request, response, ectx->handler,
1058                               scratch_pool));
1059    }
1060
1061  if (!ectx->parser)
1062    {
1063      ectx->parser = XML_ParserCreate(NULL);
1064      apr_pool_cleanup_register(ectx->cleanup_pool, &ectx->parser,
1065                                xml_parser_cleanup, apr_pool_cleanup_null);
1066      XML_SetUserData(ectx->parser, ectx);
1067      XML_SetElementHandler(ectx->parser, expat_start, expat_end);
1068      XML_SetCharacterDataHandler(ectx->parser, expat_cdata);
1069
1070#if XML_VERSION_AT_LEAST(1, 95, 8)
1071      XML_SetEntityDeclHandler(ectx->parser, expat_entity_declaration);
1072#else
1073      XML_SetDefaultHandler(ectx->parser, expat_default_handler);
1074#endif
1075    }
1076
1077  while (1)
1078    {
1079      apr_status_t status;
1080      const char *data;
1081      apr_size_t len;
1082      svn_error_t *err;
1083      svn_boolean_t at_eof = FALSE;
1084
1085      status = serf_bucket_read(response, PARSE_CHUNK_SIZE, &data, &len);
1086      if (SERF_BUCKET_READ_ERROR(status))
1087        return svn_ra_serf__wrap_err(status, NULL);
1088      else if (APR_STATUS_IS_EOF(status))
1089        at_eof = TRUE;
1090
1091      err = parse_xml(ectx, data, len, at_eof /* isFinal */);
1092
1093      if (at_eof || err)
1094        {
1095          /* Release xml parser state/tables. */
1096          apr_pool_cleanup_run(ectx->cleanup_pool, &ectx->parser,
1097                               xml_parser_cleanup);
1098        }
1099
1100      SVN_ERR(err);
1101
1102      /* The parsing went fine. What has the bucket told us?  */
1103      if (at_eof)
1104        {
1105          /* Make sure we actually got xml and clean up after parsing */
1106          SVN_ERR(svn_ra_serf__xml_context_done(ectx->xmlctx));
1107        }
1108
1109      if (status && !SERF_BUCKET_READ_ERROR(status))
1110        {
1111          return svn_ra_serf__wrap_err(status, NULL);
1112        }
1113    }
1114
1115  /* NOTREACHED */
1116}
1117
1118
1119svn_ra_serf__handler_t *
1120svn_ra_serf__create_expat_handler(svn_ra_serf__session_t *session,
1121                                  svn_ra_serf__xml_context_t *xmlctx,
1122                                  const int *expected_status,
1123                                  apr_pool_t *result_pool)
1124{
1125  svn_ra_serf__handler_t *handler;
1126  struct expat_ctx_t *ectx;
1127
1128  ectx = apr_pcalloc(result_pool, sizeof(*ectx));
1129  ectx->xmlctx = xmlctx;
1130  ectx->parser = NULL;
1131  ectx->expected_status = expected_status;
1132  ectx->cleanup_pool = result_pool;
1133
1134  handler = svn_ra_serf__create_handler(session, result_pool);
1135  handler->response_handler = expat_response_handler;
1136  handler->response_baton = ectx;
1137
1138  ectx->handler = handler;
1139
1140  return handler;
1141}
1142