runtests.c revision 302385
1/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2   See the file COPYING for copying permission.
3
4   runtest.c : run the Expat test suite
5*/
6
7#ifdef HAVE_EXPAT_CONFIG_H
8#include <expat_config.h>
9#endif
10
11#include <assert.h>
12#include <stdlib.h>
13#include <stdio.h>
14#include <string.h>
15#include <stdint.h>
16#include <stddef.h>  /* ptrdiff_t */
17#ifndef __cplusplus
18# include <stdbool.h>
19#endif
20
21#include "expat.h"
22#include "chardata.h"
23#include "internal.h"  /* for UNUSED_P only */
24#include "minicheck.h"
25
26#if defined(__amigaos__) && defined(__USE_INLINE__)
27#include <proto/expat.h>
28#endif
29
30#ifdef XML_LARGE_SIZE
31#define XML_FMT_INT_MOD "ll"
32#else
33#define XML_FMT_INT_MOD "l"
34#endif
35
36static XML_Parser parser;
37
38
39static void
40basic_setup(void)
41{
42    parser = XML_ParserCreate(NULL);
43    if (parser == NULL)
44        fail("Parser not created.");
45}
46
47static void
48basic_teardown(void)
49{
50    if (parser != NULL)
51        XML_ParserFree(parser);
52}
53
54/* Generate a failure using the parser state to create an error message;
55   this should be used when the parser reports an error we weren't
56   expecting.
57*/
58static void
59_xml_failure(XML_Parser parser, const char *file, int line)
60{
61    char buffer[1024];
62    enum XML_Error err = XML_GetErrorCode(parser);
63    sprintf(buffer,
64            "    %d: %s (line %" XML_FMT_INT_MOD "u, offset %"\
65                XML_FMT_INT_MOD "u)\n    reported from %s, line %d\n",
66            err,
67            XML_ErrorString(err),
68            XML_GetCurrentLineNumber(parser),
69            XML_GetCurrentColumnNumber(parser),
70            file, line);
71    _fail_unless(0, file, line, buffer);
72}
73
74static enum XML_Status
75_XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
76{
77    enum XML_Status res = XML_STATUS_ERROR;
78    int offset = 0;
79
80    if (len == 0) {
81        return XML_Parse(parser, s, len, isFinal);
82    }
83
84    for (; offset < len; offset++) {
85        const int innerIsFinal = (offset == len - 1) && isFinal;
86        const char c = s[offset]; /* to help out-of-bounds detection */
87        res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
88        if (res != XML_STATUS_OK) {
89            return res;
90        }
91    }
92    return res;
93}
94
95#define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
96
97static void
98_expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
99                const char *file, int lineno)
100{
101    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
102        /* Hackish use of _fail_unless() macro, but let's us report
103           the right filename and line number. */
104        _fail_unless(0, file, lineno, errorMessage);
105    if (XML_GetErrorCode(parser) != errorCode)
106        _xml_failure(parser, file, lineno);
107}
108
109#define expect_failure(text, errorCode, errorMessage) \
110        _expect_failure((text), (errorCode), (errorMessage), \
111                        __FILE__, __LINE__)
112
113/* Dummy handlers for when we need to set a handler to tickle a bug,
114   but it doesn't need to do anything.
115*/
116
117static void XMLCALL
118dummy_start_doctype_handler(void           *UNUSED_P(userData),
119                            const XML_Char *UNUSED_P(doctypeName),
120                            const XML_Char *UNUSED_P(sysid),
121                            const XML_Char *UNUSED_P(pubid),
122                            int            UNUSED_P(has_internal_subset))
123{}
124
125static void XMLCALL
126dummy_end_doctype_handler(void *UNUSED_P(userData))
127{}
128
129static void XMLCALL
130dummy_entity_decl_handler(void           *UNUSED_P(userData),
131                          const XML_Char *UNUSED_P(entityName),
132                          int            UNUSED_P(is_parameter_entity),
133                          const XML_Char *UNUSED_P(value),
134                          int            UNUSED_P(value_length),
135                          const XML_Char *UNUSED_P(base),
136                          const XML_Char *UNUSED_P(systemId),
137                          const XML_Char *UNUSED_P(publicId),
138                          const XML_Char *UNUSED_P(notationName))
139{}
140
141static void XMLCALL
142dummy_notation_decl_handler(void *UNUSED_P(userData),
143                            const XML_Char *UNUSED_P(notationName),
144                            const XML_Char *UNUSED_P(base),
145                            const XML_Char *UNUSED_P(systemId),
146                            const XML_Char *UNUSED_P(publicId))
147{}
148
149static void XMLCALL
150dummy_element_decl_handler(void *UNUSED_P(userData),
151                           const XML_Char *UNUSED_P(name),
152                           XML_Content *UNUSED_P(model))
153{}
154
155static void XMLCALL
156dummy_attlist_decl_handler(void           *UNUSED_P(userData),
157                           const XML_Char *UNUSED_P(elname),
158                           const XML_Char *UNUSED_P(attname),
159                           const XML_Char *UNUSED_P(att_type),
160                           const XML_Char *UNUSED_P(dflt),
161                           int            UNUSED_P(isrequired))
162{}
163
164static void XMLCALL
165dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
166{}
167
168static void XMLCALL
169dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
170{}
171
172static void XMLCALL
173dummy_start_element(void *UNUSED_P(userData),
174                    const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
175{}
176
177
178/*
179 * Character & encoding tests.
180 */
181
182START_TEST(test_nul_byte)
183{
184    char text[] = "<doc>\0</doc>";
185
186    /* test that a NUL byte (in US-ASCII data) is an error */
187    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
188        fail("Parser did not report error on NUL-byte.");
189    if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
190        xml_failure(parser);
191}
192END_TEST
193
194
195START_TEST(test_u0000_char)
196{
197    /* test that a NUL byte (in US-ASCII data) is an error */
198    expect_failure("<doc>&#0;</doc>",
199                   XML_ERROR_BAD_CHAR_REF,
200                   "Parser did not report error on NUL-byte.");
201}
202END_TEST
203
204START_TEST(test_bom_utf8)
205{
206    /* This test is really just making sure we don't core on a UTF-8 BOM. */
207    const char *text = "\357\273\277<e/>";
208
209    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
210        xml_failure(parser);
211}
212END_TEST
213
214START_TEST(test_bom_utf16_be)
215{
216    char text[] = "\376\377\0<\0e\0/\0>";
217
218    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
219        xml_failure(parser);
220}
221END_TEST
222
223START_TEST(test_bom_utf16_le)
224{
225    char text[] = "\377\376<\0e\0/\0>\0";
226
227    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
228        xml_failure(parser);
229}
230END_TEST
231
232static void XMLCALL
233accumulate_characters(void *userData, const XML_Char *s, int len)
234{
235    CharData_AppendXMLChars((CharData *)userData, s, len);
236}
237
238static void XMLCALL
239accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
240                     const XML_Char **atts)
241{
242    CharData *storage = (CharData *)userData;
243    if (storage->count < 0 && atts != NULL && atts[0] != NULL) {
244        /* "accumulate" the value of the first attribute we see */
245        CharData_AppendXMLChars(storage, atts[1], -1);
246    }
247}
248
249
250static void
251_run_character_check(const XML_Char *text, const XML_Char *expected,
252                     const char *file, int line)
253{
254    CharData storage;
255
256    CharData_Init(&storage);
257    XML_SetUserData(parser, &storage);
258    XML_SetCharacterDataHandler(parser, accumulate_characters);
259    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
260        _xml_failure(parser, file, line);
261    CharData_CheckXMLChars(&storage, expected);
262}
263
264#define run_character_check(text, expected) \
265        _run_character_check(text, expected, __FILE__, __LINE__)
266
267static void
268_run_attribute_check(const XML_Char *text, const XML_Char *expected,
269                     const char *file, int line)
270{
271    CharData storage;
272
273    CharData_Init(&storage);
274    XML_SetUserData(parser, &storage);
275    XML_SetStartElementHandler(parser, accumulate_attribute);
276    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
277        _xml_failure(parser, file, line);
278    CharData_CheckXMLChars(&storage, expected);
279}
280
281#define run_attribute_check(text, expected) \
282        _run_attribute_check(text, expected, __FILE__, __LINE__)
283
284/* Regression test for SF bug #491986. */
285START_TEST(test_danish_latin1)
286{
287    const char *text =
288        "<?xml version='1.0' encoding='iso-8859-1'?>\n"
289        "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
290    run_character_check(text,
291             "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
292}
293END_TEST
294
295
296/* Regression test for SF bug #514281. */
297START_TEST(test_french_charref_hexidecimal)
298{
299    const char *text =
300        "<?xml version='1.0' encoding='iso-8859-1'?>\n"
301        "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
302    run_character_check(text,
303                        "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
304}
305END_TEST
306
307START_TEST(test_french_charref_decimal)
308{
309    const char *text =
310        "<?xml version='1.0' encoding='iso-8859-1'?>\n"
311        "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
312    run_character_check(text,
313                        "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
314}
315END_TEST
316
317START_TEST(test_french_latin1)
318{
319    const char *text =
320        "<?xml version='1.0' encoding='iso-8859-1'?>\n"
321        "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
322    run_character_check(text,
323                        "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
324}
325END_TEST
326
327START_TEST(test_french_utf8)
328{
329    const char *text =
330        "<?xml version='1.0' encoding='utf-8'?>\n"
331        "<doc>\xC3\xA9</doc>";
332    run_character_check(text, "\xC3\xA9");
333}
334END_TEST
335
336/* Regression test for SF bug #600479.
337   XXX There should be a test that exercises all legal XML Unicode
338   characters as PCDATA and attribute value content, and XML Name
339   characters as part of element and attribute names.
340*/
341START_TEST(test_utf8_false_rejection)
342{
343    const char *text = "<doc>\xEF\xBA\xBF</doc>";
344    run_character_check(text, "\xEF\xBA\xBF");
345}
346END_TEST
347
348/* Regression test for SF bug #477667.
349   This test assures that any 8-bit character followed by a 7-bit
350   character will not be mistakenly interpreted as a valid UTF-8
351   sequence.
352*/
353START_TEST(test_illegal_utf8)
354{
355    char text[100];
356    int i;
357
358    for (i = 128; i <= 255; ++i) {
359        sprintf(text, "<e>%ccd</e>", i);
360        if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
361            sprintf(text,
362                    "expected token error for '%c' (ordinal %d) in UTF-8 text",
363                    i, i);
364            fail(text);
365        }
366        else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
367            xml_failure(parser);
368        /* Reset the parser since we use the same parser repeatedly. */
369        XML_ParserReset(parser, NULL);
370    }
371}
372END_TEST
373
374
375/* Examples, not masks: */
376#define UTF8_LEAD_1  "\x7f"  /* 0b01111111 */
377#define UTF8_LEAD_2  "\xdf"  /* 0b11011111 */
378#define UTF8_LEAD_3  "\xef"  /* 0b11101111 */
379#define UTF8_LEAD_4  "\xf7"  /* 0b11110111 */
380#define UTF8_FOLLOW  "\xbf"  /* 0b10111111 */
381
382START_TEST(test_utf8_auto_align)
383{
384    struct TestCase {
385        ptrdiff_t expectedMovementInChars;
386        const char * input;
387    };
388
389    struct TestCase cases[] = {
390        {00, ""},
391
392        {00, UTF8_LEAD_1},
393
394        {-1, UTF8_LEAD_2},
395        {00, UTF8_LEAD_2 UTF8_FOLLOW},
396
397        {-1, UTF8_LEAD_3},
398        {-2, UTF8_LEAD_3 UTF8_FOLLOW},
399        {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
400
401        {-1, UTF8_LEAD_4},
402        {-2, UTF8_LEAD_4 UTF8_FOLLOW},
403        {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
404        {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
405    };
406
407    size_t i = 0;
408    bool success = true;
409    for (; i < sizeof(cases) / sizeof(*cases); i++) {
410        const char * fromLim = cases[i].input + strlen(cases[i].input);
411        const char * const fromLimInitially = fromLim;
412        ptrdiff_t actualMovementInChars;
413
414        align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
415
416        actualMovementInChars = (fromLim - fromLimInitially);
417        if (actualMovementInChars != cases[i].expectedMovementInChars) {
418            size_t j = 0;
419            success = false;
420            printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars"
421                    ", actually moved by %2ld chars: \"",
422                    i + 1, cases[i].expectedMovementInChars, actualMovementInChars);
423            for (; j < strlen(cases[i].input); j++) {
424                printf("\\x%02x", (unsigned char)cases[i].input[j]);
425            }
426            printf("\"\n");
427        }
428    }
429
430    if (! success) {
431        fail("UTF-8 auto-alignment is not bullet-proof\n");
432    }
433}
434END_TEST
435
436START_TEST(test_utf16)
437{
438    /* <?xml version="1.0" encoding="UTF-16"?>
439       <doc a='123'>some text</doc>
440    */
441    char text[] =
442        "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
443        "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
444        "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
445        "\000'\000?\000>\000\n"
446        "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
447        "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
448        "\000d\000o\000c\000>";
449    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
450        xml_failure(parser);
451}
452END_TEST
453
454START_TEST(test_utf16_le_epilog_newline)
455{
456    unsigned int first_chunk_bytes = 17;
457    char text[] =
458        "\xFF\xFE"                      /* BOM */
459        "<\000e\000/\000>\000"          /* document element */
460        "\r\000\n\000\r\000\n\000";     /* epilog */
461
462    if (first_chunk_bytes >= sizeof(text) - 1)
463        fail("bad value of first_chunk_bytes");
464    if (  _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
465          == XML_STATUS_ERROR)
466        xml_failure(parser);
467    else {
468        enum XML_Status rc;
469        rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
470                       sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
471        if (rc == XML_STATUS_ERROR)
472            xml_failure(parser);
473    }
474}
475END_TEST
476
477/* Regression test for SF bug #481609, #774028. */
478START_TEST(test_latin1_umlauts)
479{
480    const char *text =
481        "<?xml version='1.0' encoding='iso-8859-1'?>\n"
482        "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
483        "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
484    const char *utf8 =
485        "\xC3\xA4 \xC3\xB6 \xC3\xBC "
486        "\xC3\xA4 \xC3\xB6 \xC3\xBC "
487        "\xC3\xA4 \xC3\xB6 \xC3\xBC >";
488    run_character_check(text, utf8);
489    XML_ParserReset(parser, NULL);
490    run_attribute_check(text, utf8);
491}
492END_TEST
493
494/* Regression test #1 for SF bug #653180. */
495START_TEST(test_line_number_after_parse)
496{
497    const char *text =
498        "<tag>\n"
499        "\n"
500        "\n</tag>";
501    XML_Size lineno;
502
503    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
504        xml_failure(parser);
505    lineno = XML_GetCurrentLineNumber(parser);
506    if (lineno != 4) {
507        char buffer[100];
508        sprintf(buffer,
509            "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
510        fail(buffer);
511    }
512}
513END_TEST
514
515/* Regression test #2 for SF bug #653180. */
516START_TEST(test_column_number_after_parse)
517{
518    const char *text = "<tag></tag>";
519    XML_Size colno;
520
521    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
522        xml_failure(parser);
523    colno = XML_GetCurrentColumnNumber(parser);
524    if (colno != 11) {
525        char buffer[100];
526        sprintf(buffer,
527            "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
528        fail(buffer);
529    }
530}
531END_TEST
532
533static void XMLCALL
534start_element_event_handler2(void *userData, const XML_Char *name,
535			     const XML_Char **UNUSED_P(attr))
536{
537    CharData *storage = (CharData *) userData;
538    char buffer[100];
539
540    sprintf(buffer,
541        "<%s> at col:%" XML_FMT_INT_MOD "u line:%"\
542            XML_FMT_INT_MOD "u\n", name,
543	    XML_GetCurrentColumnNumber(parser),
544	    XML_GetCurrentLineNumber(parser));
545    CharData_AppendString(storage, buffer);
546}
547
548static void XMLCALL
549end_element_event_handler2(void *userData, const XML_Char *name)
550{
551    CharData *storage = (CharData *) userData;
552    char buffer[100];
553
554    sprintf(buffer,
555        "</%s> at col:%" XML_FMT_INT_MOD "u line:%"\
556            XML_FMT_INT_MOD "u\n", name,
557	    XML_GetCurrentColumnNumber(parser),
558	    XML_GetCurrentLineNumber(parser));
559    CharData_AppendString(storage, buffer);
560}
561
562/* Regression test #3 for SF bug #653180. */
563START_TEST(test_line_and_column_numbers_inside_handlers)
564{
565    const char *text =
566        "<a>\n"        /* Unix end-of-line */
567        "  <b>\r\n"    /* Windows end-of-line */
568        "    <c/>\r"   /* Mac OS end-of-line */
569        "  </b>\n"
570        "  <d>\n"
571        "    <f/>\n"
572        "  </d>\n"
573        "</a>";
574    const char *expected =
575        "<a> at col:0 line:1\n"
576        "<b> at col:2 line:2\n"
577        "<c> at col:4 line:3\n"
578        "</c> at col:8 line:3\n"
579        "</b> at col:2 line:4\n"
580        "<d> at col:2 line:5\n"
581        "<f> at col:4 line:6\n"
582        "</f> at col:8 line:6\n"
583        "</d> at col:2 line:7\n"
584        "</a> at col:0 line:8\n";
585    CharData storage;
586
587    CharData_Init(&storage);
588    XML_SetUserData(parser, &storage);
589    XML_SetStartElementHandler(parser, start_element_event_handler2);
590    XML_SetEndElementHandler(parser, end_element_event_handler2);
591    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
592        xml_failure(parser);
593
594    CharData_CheckString(&storage, expected);
595}
596END_TEST
597
598/* Regression test #4 for SF bug #653180. */
599START_TEST(test_line_number_after_error)
600{
601    const char *text =
602        "<a>\n"
603        "  <b>\n"
604        "  </a>";  /* missing </b> */
605    XML_Size lineno;
606    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
607        fail("Expected a parse error");
608
609    lineno = XML_GetCurrentLineNumber(parser);
610    if (lineno != 3) {
611        char buffer[100];
612        sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
613        fail(buffer);
614    }
615}
616END_TEST
617
618/* Regression test #5 for SF bug #653180. */
619START_TEST(test_column_number_after_error)
620{
621    const char *text =
622        "<a>\n"
623        "  <b>\n"
624        "  </a>";  /* missing </b> */
625    XML_Size colno;
626    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
627        fail("Expected a parse error");
628
629    colno = XML_GetCurrentColumnNumber(parser);
630    if (colno != 4) {
631        char buffer[100];
632        sprintf(buffer,
633            "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
634        fail(buffer);
635    }
636}
637END_TEST
638
639/* Regression test for SF bug #478332. */
640START_TEST(test_really_long_lines)
641{
642    /* This parses an input line longer than INIT_DATA_BUF_SIZE
643       characters long (defined to be 1024 in xmlparse.c).  We take a
644       really cheesy approach to building the input buffer, because
645       this avoids writing bugs in buffer-filling code.
646    */
647    const char *text =
648        "<e>"
649        /* 64 chars */
650        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
651        /* until we have at least 1024 characters on the line: */
652        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
653        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
654        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
655        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
656        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
657        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
658        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
659        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
660        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
661        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
662        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
663        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
664        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
665        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
666        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
667        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
668        "</e>";
669    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
670        xml_failure(parser);
671}
672END_TEST
673
674
675/*
676 * Element event tests.
677 */
678
679static void XMLCALL
680end_element_event_handler(void *userData, const XML_Char *name)
681{
682    CharData *storage = (CharData *) userData;
683    CharData_AppendString(storage, "/");
684    CharData_AppendXMLChars(storage, name, -1);
685}
686
687START_TEST(test_end_element_events)
688{
689    const char *text = "<a><b><c/></b><d><f/></d></a>";
690    const char *expected = "/c/b/f/d/a";
691    CharData storage;
692
693    CharData_Init(&storage);
694    XML_SetUserData(parser, &storage);
695    XML_SetEndElementHandler(parser, end_element_event_handler);
696    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
697        xml_failure(parser);
698    CharData_CheckString(&storage, expected);
699}
700END_TEST
701
702
703/*
704 * Attribute tests.
705 */
706
707/* Helpers used by the following test; this checks any "attr" and "refs"
708   attributes to make sure whitespace has been normalized.
709
710   Return true if whitespace has been normalized in a string, using
711   the rules for attribute value normalization.  The 'is_cdata' flag
712   is needed since CDATA attributes don't need to have multiple
713   whitespace characters collapsed to a single space, while other
714   attribute data types do.  (Section 3.3.3 of the recommendation.)
715*/
716static int
717is_whitespace_normalized(const XML_Char *s, int is_cdata)
718{
719    int blanks = 0;
720    int at_start = 1;
721    while (*s) {
722        if (*s == ' ')
723            ++blanks;
724        else if (*s == '\t' || *s == '\n' || *s == '\r')
725            return 0;
726        else {
727            if (at_start) {
728                at_start = 0;
729                if (blanks && !is_cdata)
730                    /* illegal leading blanks */
731                    return 0;
732            }
733            else if (blanks > 1 && !is_cdata)
734                return 0;
735            blanks = 0;
736        }
737        ++s;
738    }
739    if (blanks && !is_cdata)
740        return 0;
741    return 1;
742}
743
744/* Check the attribute whitespace checker: */
745static void
746testhelper_is_whitespace_normalized(void)
747{
748    assert(is_whitespace_normalized("abc", 0));
749    assert(is_whitespace_normalized("abc", 1));
750    assert(is_whitespace_normalized("abc def ghi", 0));
751    assert(is_whitespace_normalized("abc def ghi", 1));
752    assert(!is_whitespace_normalized(" abc def ghi", 0));
753    assert(is_whitespace_normalized(" abc def ghi", 1));
754    assert(!is_whitespace_normalized("abc  def ghi", 0));
755    assert(is_whitespace_normalized("abc  def ghi", 1));
756    assert(!is_whitespace_normalized("abc def ghi ", 0));
757    assert(is_whitespace_normalized("abc def ghi ", 1));
758    assert(!is_whitespace_normalized(" ", 0));
759    assert(is_whitespace_normalized(" ", 1));
760    assert(!is_whitespace_normalized("\t", 0));
761    assert(!is_whitespace_normalized("\t", 1));
762    assert(!is_whitespace_normalized("\n", 0));
763    assert(!is_whitespace_normalized("\n", 1));
764    assert(!is_whitespace_normalized("\r", 0));
765    assert(!is_whitespace_normalized("\r", 1));
766    assert(!is_whitespace_normalized("abc\t def", 1));
767}
768
769static void XMLCALL
770check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
771                                          const XML_Char *UNUSED_P(name),
772                                          const XML_Char **atts)
773{
774    int i;
775    for (i = 0; atts[i] != NULL; i += 2) {
776        const XML_Char *attrname = atts[i];
777        const XML_Char *value = atts[i + 1];
778        if (strcmp("attr", attrname) == 0
779            || strcmp("ents", attrname) == 0
780            || strcmp("refs", attrname) == 0) {
781            if (!is_whitespace_normalized(value, 0)) {
782                char buffer[256];
783                sprintf(buffer, "attribute value not normalized: %s='%s'",
784                        attrname, value);
785                fail(buffer);
786            }
787        }
788    }
789}
790
791START_TEST(test_attr_whitespace_normalization)
792{
793    const char *text =
794        "<!DOCTYPE doc [\n"
795        "  <!ATTLIST doc\n"
796        "            attr NMTOKENS #REQUIRED\n"
797        "            ents ENTITIES #REQUIRED\n"
798        "            refs IDREFS   #REQUIRED>\n"
799        "]>\n"
800        "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
801        "     ents=' ent-1   \t\r\n"
802        "            ent-2  ' >\n"
803        "  <e id='id-1'/>\n"
804        "  <e id='id-2'/>\n"
805        "</doc>";
806
807    XML_SetStartElementHandler(parser,
808                               check_attr_contains_normalized_whitespace);
809    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
810        xml_failure(parser);
811}
812END_TEST
813
814
815/*
816 * XML declaration tests.
817 */
818
819START_TEST(test_xmldecl_misplaced)
820{
821    expect_failure("\n"
822                   "<?xml version='1.0'?>\n"
823                   "<a/>",
824                   XML_ERROR_MISPLACED_XML_PI,
825                   "failed to report misplaced XML declaration");
826}
827END_TEST
828
829/* Regression test for SF bug #584832. */
830static int XMLCALL
831UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
832{
833    if (strcmp(encoding,"unsupported-encoding") == 0) {
834        int i;
835        for (i = 0; i < 256; ++i)
836            info->map[i] = i;
837        info->data = NULL;
838        info->convert = NULL;
839        info->release = NULL;
840        return XML_STATUS_OK;
841    }
842    return XML_STATUS_ERROR;
843}
844
845START_TEST(test_unknown_encoding_internal_entity)
846{
847    const char *text =
848        "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
849        "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
850        "<test a='&foo;'/>";
851
852    XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
853    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
854        xml_failure(parser);
855}
856END_TEST
857
858/* Regression test for SF bug #620106. */
859static int XMLCALL
860external_entity_loader_set_encoding(XML_Parser parser,
861                                    const XML_Char *context,
862                                    const XML_Char *UNUSED_P(base),
863                                    const XML_Char *UNUSED_P(systemId),
864                                    const XML_Char *UNUSED_P(publicId))
865{
866    /* This text says it's an unsupported encoding, but it's really
867       UTF-8, which we tell Expat using XML_SetEncoding().
868    */
869    const char *text =
870        "<?xml encoding='iso-8859-3'?>"
871        "\xC3\xA9";
872    XML_Parser extparser;
873
874    extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
875    if (extparser == NULL)
876        fail("Could not create external entity parser.");
877    if (!XML_SetEncoding(extparser, "utf-8"))
878        fail("XML_SetEncoding() ignored for external entity");
879    if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
880          == XML_STATUS_ERROR) {
881        xml_failure(parser);
882        return 0;
883    }
884    return 1;
885}
886
887START_TEST(test_ext_entity_set_encoding)
888{
889    const char *text =
890        "<!DOCTYPE doc [\n"
891        "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
892        "]>\n"
893        "<doc>&en;</doc>";
894
895    XML_SetExternalEntityRefHandler(parser,
896                                    external_entity_loader_set_encoding);
897    run_character_check(text, "\xC3\xA9");
898}
899END_TEST
900
901/* Test that no error is reported for unknown entities if we don't
902   read an external subset.  This was fixed in Expat 1.95.5.
903*/
904START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
905    const char *text =
906        "<!DOCTYPE doc SYSTEM 'foo'>\n"
907        "<doc>&entity;</doc>";
908
909    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
910        xml_failure(parser);
911}
912END_TEST
913
914/* Test that an error is reported for unknown entities if we don't
915   have an external subset.
916*/
917START_TEST(test_wfc_undeclared_entity_no_external_subset) {
918    expect_failure("<doc>&entity;</doc>",
919                   XML_ERROR_UNDEFINED_ENTITY,
920                   "Parser did not report undefined entity w/out a DTD.");
921}
922END_TEST
923
924/* Test that an error is reported for unknown entities if we don't
925   read an external subset, but have been declared standalone.
926*/
927START_TEST(test_wfc_undeclared_entity_standalone) {
928    const char *text =
929        "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
930        "<!DOCTYPE doc SYSTEM 'foo'>\n"
931        "<doc>&entity;</doc>";
932
933    expect_failure(text,
934                   XML_ERROR_UNDEFINED_ENTITY,
935                   "Parser did not report undefined entity (standalone).");
936}
937END_TEST
938
939static int XMLCALL
940external_entity_loader(XML_Parser parser,
941                       const XML_Char *context,
942                       const XML_Char *UNUSED_P(base),
943                       const XML_Char *UNUSED_P(systemId),
944                       const XML_Char *UNUSED_P(publicId))
945{
946    char *text = (char *)XML_GetUserData(parser);
947    XML_Parser extparser;
948
949    extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
950    if (extparser == NULL)
951        fail("Could not create external entity parser.");
952    if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
953          == XML_STATUS_ERROR) {
954        xml_failure(parser);
955        return XML_STATUS_ERROR;
956    }
957    return XML_STATUS_OK;
958}
959
960/* Test that an error is reported for unknown entities if we have read
961   an external subset, and standalone is true.
962*/
963START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
964    const char *text =
965        "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
966        "<!DOCTYPE doc SYSTEM 'foo'>\n"
967        "<doc>&entity;</doc>";
968    char foo_text[] =
969        "<!ELEMENT doc (#PCDATA)*>";
970
971    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
972    XML_SetUserData(parser, foo_text);
973    XML_SetExternalEntityRefHandler(parser, external_entity_loader);
974    expect_failure(text,
975                   XML_ERROR_UNDEFINED_ENTITY,
976                   "Parser did not report undefined entity (external DTD).");
977}
978END_TEST
979
980/* Test that no error is reported for unknown entities if we have read
981   an external subset, and standalone is false.
982*/
983START_TEST(test_wfc_undeclared_entity_with_external_subset) {
984    const char *text =
985        "<?xml version='1.0' encoding='us-ascii'?>\n"
986        "<!DOCTYPE doc SYSTEM 'foo'>\n"
987        "<doc>&entity;</doc>";
988    char foo_text[] =
989        "<!ELEMENT doc (#PCDATA)*>";
990
991    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
992    XML_SetUserData(parser, foo_text);
993    XML_SetExternalEntityRefHandler(parser, external_entity_loader);
994    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
995        xml_failure(parser);
996}
997END_TEST
998
999START_TEST(test_wfc_no_recursive_entity_refs)
1000{
1001    const char *text =
1002        "<!DOCTYPE doc [\n"
1003        "  <!ENTITY entity '&#38;entity;'>\n"
1004        "]>\n"
1005        "<doc>&entity;</doc>";
1006
1007    expect_failure(text,
1008                   XML_ERROR_RECURSIVE_ENTITY_REF,
1009                   "Parser did not report recursive entity reference.");
1010}
1011END_TEST
1012
1013/* Regression test for SF bug #483514. */
1014START_TEST(test_dtd_default_handling)
1015{
1016    const char *text =
1017        "<!DOCTYPE doc [\n"
1018        "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
1019        "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
1020        "<!ELEMENT doc EMPTY>\n"
1021        "<!ATTLIST doc a CDATA #IMPLIED>\n"
1022        "<?pi in dtd?>\n"
1023        "<!--comment in dtd-->\n"
1024        "]><doc/>";
1025
1026    XML_SetDefaultHandler(parser, accumulate_characters);
1027    XML_SetDoctypeDeclHandler(parser,
1028                              dummy_start_doctype_handler,
1029                              dummy_end_doctype_handler);
1030    XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler);
1031    XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler);
1032    XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
1033    XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler);
1034    XML_SetProcessingInstructionHandler(parser, dummy_pi_handler);
1035    XML_SetCommentHandler(parser, dummy_comment_handler);
1036    run_character_check(text, "\n\n\n\n\n\n\n<doc/>");
1037}
1038END_TEST
1039
1040/* See related SF bug #673791.
1041   When namespace processing is enabled, setting the namespace URI for
1042   a prefix is not allowed; this test ensures that it *is* allowed
1043   when namespace processing is not enabled.
1044   (See Namespaces in XML, section 2.)
1045*/
1046START_TEST(test_empty_ns_without_namespaces)
1047{
1048    const char *text =
1049        "<doc xmlns:prefix='http://www.example.com/'>\n"
1050        "  <e xmlns:prefix=''/>\n"
1051        "</doc>";
1052
1053    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1054        xml_failure(parser);
1055}
1056END_TEST
1057
1058/* Regression test for SF bug #824420.
1059   Checks that an xmlns:prefix attribute set in an attribute's default
1060   value isn't misinterpreted.
1061*/
1062START_TEST(test_ns_in_attribute_default_without_namespaces)
1063{
1064    const char *text =
1065        "<!DOCTYPE e:element [\n"
1066        "  <!ATTLIST e:element\n"
1067        "    xmlns:e CDATA 'http://example.com/'>\n"
1068        "      ]>\n"
1069        "<e:element/>";
1070
1071    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1072        xml_failure(parser);
1073}
1074END_TEST
1075
1076static const char *long_character_data_text =
1077    "<?xml version='1.0' encoding='iso-8859-1'?><s>"
1078    "012345678901234567890123456789012345678901234567890123456789"
1079    "012345678901234567890123456789012345678901234567890123456789"
1080    "012345678901234567890123456789012345678901234567890123456789"
1081    "012345678901234567890123456789012345678901234567890123456789"
1082    "012345678901234567890123456789012345678901234567890123456789"
1083    "012345678901234567890123456789012345678901234567890123456789"
1084    "012345678901234567890123456789012345678901234567890123456789"
1085    "012345678901234567890123456789012345678901234567890123456789"
1086    "012345678901234567890123456789012345678901234567890123456789"
1087    "012345678901234567890123456789012345678901234567890123456789"
1088    "012345678901234567890123456789012345678901234567890123456789"
1089    "012345678901234567890123456789012345678901234567890123456789"
1090    "012345678901234567890123456789012345678901234567890123456789"
1091    "012345678901234567890123456789012345678901234567890123456789"
1092    "012345678901234567890123456789012345678901234567890123456789"
1093    "012345678901234567890123456789012345678901234567890123456789"
1094    "012345678901234567890123456789012345678901234567890123456789"
1095    "012345678901234567890123456789012345678901234567890123456789"
1096    "012345678901234567890123456789012345678901234567890123456789"
1097    "012345678901234567890123456789012345678901234567890123456789"
1098    "</s>";
1099
1100static XML_Bool resumable = XML_FALSE;
1101
1102static void
1103clearing_aborting_character_handler(void *UNUSED_P(userData),
1104                                    const XML_Char *UNUSED_P(s), int UNUSED_P(len))
1105{
1106    XML_StopParser(parser, resumable);
1107    XML_SetCharacterDataHandler(parser, NULL);
1108}
1109
1110/* Regression test for SF bug #1515266: missing check of stopped
1111   parser in doContext() 'for' loop. */
1112START_TEST(test_stop_parser_between_char_data_calls)
1113{
1114    /* The sample data must be big enough that there are two calls to
1115       the character data handler from within the inner "for" loop of
1116       the XML_TOK_DATA_CHARS case in doContent(), and the character
1117       handler must stop the parser and clear the character data
1118       handler.
1119    */
1120    const char *text = long_character_data_text;
1121
1122    XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1123    resumable = XML_FALSE;
1124    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
1125        xml_failure(parser);
1126    if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
1127        xml_failure(parser);
1128}
1129END_TEST
1130
1131/* Regression test for SF bug #1515266: missing check of stopped
1132   parser in doContext() 'for' loop. */
1133START_TEST(test_suspend_parser_between_char_data_calls)
1134{
1135    /* The sample data must be big enough that there are two calls to
1136       the character data handler from within the inner "for" loop of
1137       the XML_TOK_DATA_CHARS case in doContent(), and the character
1138       handler must stop the parser and clear the character data
1139       handler.
1140    */
1141    const char *text = long_character_data_text;
1142
1143    XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1144    resumable = XML_TRUE;
1145    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
1146        xml_failure(parser);
1147    if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
1148        xml_failure(parser);
1149}
1150END_TEST
1151
1152START_TEST(test_good_cdata_ascii)
1153{
1154    const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1155    const char *expected = "<greeting>Hello, world!</greeting>";
1156
1157    CharData storage;
1158    CharData_Init(&storage);
1159    XML_SetUserData(parser, &storage);
1160    XML_SetCharacterDataHandler(parser, accumulate_characters);
1161
1162    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1163        xml_failure(parser);
1164    CharData_CheckXMLChars(&storage, expected);
1165}
1166END_TEST
1167
1168START_TEST(test_good_cdata_utf16)
1169{
1170    /* Test data is:
1171     *   <?xml version='1.0' encoding='utf-16'?>
1172     *   <a><![CDATA[hello]]></a>
1173     */
1174    const char text[] =
1175            "\0<\0?\0x\0m\0l\0"
1176                " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1177                " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
1178                "\0?\0>\0\n"
1179            "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1180    const char *expected = "hello";
1181
1182    CharData storage;
1183    CharData_Init(&storage);
1184    XML_SetUserData(parser, &storage);
1185    XML_SetCharacterDataHandler(parser, accumulate_characters);
1186
1187    if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1188        xml_failure(parser);
1189    CharData_CheckXMLChars(&storage, expected);
1190}
1191END_TEST
1192
1193START_TEST(test_bad_cdata)
1194{
1195    struct CaseData {
1196        const char *text;
1197        enum XML_Error expectedError;
1198    };
1199
1200    struct CaseData cases[] = {
1201        {"<a><", XML_ERROR_UNCLOSED_TOKEN},
1202        {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1203        {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1204        {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1205        {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1206        {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1207        {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1208        {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1209
1210        {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1211        {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1212        {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1213
1214        {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1215        {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1216        {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1217        {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1218        {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1219        {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1220        {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1221
1222        {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1223        {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1224        {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
1225    };
1226
1227    size_t i = 0;
1228    for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1229        const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1230                parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
1231        const enum XML_Error actualError = XML_GetErrorCode(parser);
1232
1233        assert(actualStatus == XML_STATUS_ERROR);
1234
1235        if (actualError != cases[i].expectedError) {
1236            char message[100];
1237            sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
1238                    cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
1239            fail(message);
1240        }
1241
1242        XML_ParserReset(parser, NULL);
1243    }
1244}
1245END_TEST
1246
1247
1248/*
1249 * Namespaces tests.
1250 */
1251
1252static void
1253namespace_setup(void)
1254{
1255    parser = XML_ParserCreateNS(NULL, ' ');
1256    if (parser == NULL)
1257        fail("Parser not created.");
1258}
1259
1260static void
1261namespace_teardown(void)
1262{
1263    basic_teardown();
1264}
1265
1266/* Check that an element name and attribute name match the expected values.
1267   The expected values are passed as an array reference of string pointers
1268   provided as the userData argument; the first is the expected
1269   element name, and the second is the expected attribute name.
1270*/
1271static void XMLCALL
1272triplet_start_checker(void *userData, const XML_Char *name,
1273                      const XML_Char **atts)
1274{
1275    char **elemstr = (char **)userData;
1276    char buffer[1024];
1277    if (strcmp(elemstr[0], name) != 0) {
1278        sprintf(buffer, "unexpected start string: '%s'", name);
1279        fail(buffer);
1280    }
1281    if (strcmp(elemstr[1], atts[0]) != 0) {
1282        sprintf(buffer, "unexpected attribute string: '%s'", atts[0]);
1283        fail(buffer);
1284    }
1285}
1286
1287/* Check that the element name passed to the end-element handler matches
1288   the expected value.  The expected value is passed as the first element
1289   in an array of strings passed as the userData argument.
1290*/
1291static void XMLCALL
1292triplet_end_checker(void *userData, const XML_Char *name)
1293{
1294    char **elemstr = (char **)userData;
1295    if (strcmp(elemstr[0], name) != 0) {
1296        char buffer[1024];
1297        sprintf(buffer, "unexpected end string: '%s'", name);
1298        fail(buffer);
1299    }
1300}
1301
1302START_TEST(test_return_ns_triplet)
1303{
1304    const char *text =
1305        "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
1306        "       xmlns:bar='http://expat.sf.net/'></foo:e>";
1307    const char *elemstr[] = {
1308        "http://expat.sf.net/ e foo",
1309        "http://expat.sf.net/ a bar"
1310    };
1311    XML_SetReturnNSTriplet(parser, XML_TRUE);
1312    XML_SetUserData(parser, elemstr);
1313    XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
1314    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1315        xml_failure(parser);
1316}
1317END_TEST
1318
1319static void XMLCALL
1320overwrite_start_checker(void *userData, const XML_Char *name,
1321                        const XML_Char **atts)
1322{
1323    CharData *storage = (CharData *) userData;
1324    CharData_AppendString(storage, "start ");
1325    CharData_AppendXMLChars(storage, name, -1);
1326    while (*atts != NULL) {
1327        CharData_AppendString(storage, "\nattribute ");
1328        CharData_AppendXMLChars(storage, *atts, -1);
1329        atts += 2;
1330    }
1331    CharData_AppendString(storage, "\n");
1332}
1333
1334static void XMLCALL
1335overwrite_end_checker(void *userData, const XML_Char *name)
1336{
1337    CharData *storage = (CharData *) userData;
1338    CharData_AppendString(storage, "end ");
1339    CharData_AppendXMLChars(storage, name, -1);
1340    CharData_AppendString(storage, "\n");
1341}
1342
1343static void
1344run_ns_tagname_overwrite_test(const char *text, const char *result)
1345{
1346    CharData storage;
1347    CharData_Init(&storage);
1348    XML_SetUserData(parser, &storage);
1349    XML_SetElementHandler(parser,
1350                          overwrite_start_checker, overwrite_end_checker);
1351    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1352        xml_failure(parser);
1353    CharData_CheckString(&storage, result);
1354}
1355
1356/* Regression test for SF bug #566334. */
1357START_TEST(test_ns_tagname_overwrite)
1358{
1359    const char *text =
1360        "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1361        "  <n:f n:attr='foo'/>\n"
1362        "  <n:g n:attr2='bar'/>\n"
1363        "</n:e>";
1364    const char *result =
1365        "start http://xml.libexpat.org/ e\n"
1366        "start http://xml.libexpat.org/ f\n"
1367        "attribute http://xml.libexpat.org/ attr\n"
1368        "end http://xml.libexpat.org/ f\n"
1369        "start http://xml.libexpat.org/ g\n"
1370        "attribute http://xml.libexpat.org/ attr2\n"
1371        "end http://xml.libexpat.org/ g\n"
1372        "end http://xml.libexpat.org/ e\n";
1373    run_ns_tagname_overwrite_test(text, result);
1374}
1375END_TEST
1376
1377/* Regression test for SF bug #566334. */
1378START_TEST(test_ns_tagname_overwrite_triplet)
1379{
1380    const char *text =
1381        "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1382        "  <n:f n:attr='foo'/>\n"
1383        "  <n:g n:attr2='bar'/>\n"
1384        "</n:e>";
1385    const char *result =
1386        "start http://xml.libexpat.org/ e n\n"
1387        "start http://xml.libexpat.org/ f n\n"
1388        "attribute http://xml.libexpat.org/ attr n\n"
1389        "end http://xml.libexpat.org/ f n\n"
1390        "start http://xml.libexpat.org/ g n\n"
1391        "attribute http://xml.libexpat.org/ attr2 n\n"
1392        "end http://xml.libexpat.org/ g n\n"
1393        "end http://xml.libexpat.org/ e n\n";
1394    XML_SetReturnNSTriplet(parser, XML_TRUE);
1395    run_ns_tagname_overwrite_test(text, result);
1396}
1397END_TEST
1398
1399
1400/* Regression test for SF bug #620343. */
1401static void XMLCALL
1402start_element_fail(void *UNUSED_P(userData),
1403                   const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
1404{
1405    /* We should never get here. */
1406    fail("should never reach start_element_fail()");
1407}
1408
1409static void XMLCALL
1410start_ns_clearing_start_element(void *userData,
1411                                const XML_Char *UNUSED_P(prefix),
1412                                const XML_Char *UNUSED_P(uri))
1413{
1414    XML_SetStartElementHandler((XML_Parser) userData, NULL);
1415}
1416
1417START_TEST(test_start_ns_clears_start_element)
1418{
1419    /* This needs to use separate start/end tags; using the empty tag
1420       syntax doesn't cause the problematic path through Expat to be
1421       taken.
1422    */
1423    const char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
1424
1425    XML_SetStartElementHandler(parser, start_element_fail);
1426    XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
1427    XML_UseParserAsHandlerArg(parser);
1428    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1429        xml_failure(parser);
1430}
1431END_TEST
1432
1433/* Regression test for SF bug #616863. */
1434static int XMLCALL
1435external_entity_handler(XML_Parser parser,
1436                        const XML_Char *context,
1437                        const XML_Char *UNUSED_P(base),
1438                        const XML_Char *UNUSED_P(systemId),
1439                        const XML_Char *UNUSED_P(publicId))
1440{
1441    intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
1442    const char *text;
1443    XML_Parser p2;
1444
1445    if (callno == 1)
1446        text = ("<!ELEMENT doc (e+)>\n"
1447                "<!ATTLIST doc xmlns CDATA #IMPLIED>\n"
1448                "<!ELEMENT e EMPTY>\n");
1449    else
1450        text = ("<?xml version='1.0' encoding='us-ascii'?>"
1451                "<e/>");
1452
1453    XML_SetUserData(parser, (void *) callno);
1454    p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
1455    if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
1456        xml_failure(p2);
1457        return 0;
1458    }
1459    XML_ParserFree(p2);
1460    return 1;
1461}
1462
1463START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
1464{
1465    const char *text =
1466        "<?xml version='1.0'?>\n"
1467        "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
1468        "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
1469        "]>\n"
1470        "<doc xmlns='http://xml.libexpat.org/ns1'>\n"
1471        "&en;\n"
1472        "</doc>";
1473
1474    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1475    XML_SetExternalEntityRefHandler(parser, external_entity_handler);
1476    /* We actually need to set this handler to tickle this bug. */
1477    XML_SetStartElementHandler(parser, dummy_start_element);
1478    XML_SetUserData(parser, NULL);
1479    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1480        xml_failure(parser);
1481}
1482END_TEST
1483
1484/* Regression test #1 for SF bug #673791. */
1485START_TEST(test_ns_prefix_with_empty_uri_1)
1486{
1487    const char *text =
1488        "<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
1489        "  <e xmlns:prefix=''/>\n"
1490        "</doc>";
1491
1492    expect_failure(text,
1493                   XML_ERROR_UNDECLARING_PREFIX,
1494                   "Did not report re-setting namespace"
1495                   " URI with prefix to ''.");
1496}
1497END_TEST
1498
1499/* Regression test #2 for SF bug #673791. */
1500START_TEST(test_ns_prefix_with_empty_uri_2)
1501{
1502    const char *text =
1503        "<?xml version='1.0'?>\n"
1504        "<docelem xmlns:pre=''/>";
1505
1506    expect_failure(text,
1507                   XML_ERROR_UNDECLARING_PREFIX,
1508                   "Did not report setting namespace URI with prefix to ''.");
1509}
1510END_TEST
1511
1512/* Regression test #3 for SF bug #673791. */
1513START_TEST(test_ns_prefix_with_empty_uri_3)
1514{
1515    const char *text =
1516        "<!DOCTYPE doc [\n"
1517        "  <!ELEMENT doc EMPTY>\n"
1518        "  <!ATTLIST doc\n"
1519        "    xmlns:prefix CDATA ''>\n"
1520        "]>\n"
1521        "<doc/>";
1522
1523    expect_failure(text,
1524                   XML_ERROR_UNDECLARING_PREFIX,
1525                   "Didn't report attr default setting NS w/ prefix to ''.");
1526}
1527END_TEST
1528
1529/* Regression test #4 for SF bug #673791. */
1530START_TEST(test_ns_prefix_with_empty_uri_4)
1531{
1532    const char *text =
1533        "<!DOCTYPE doc [\n"
1534        "  <!ELEMENT prefix:doc EMPTY>\n"
1535        "  <!ATTLIST prefix:doc\n"
1536        "    xmlns:prefix CDATA 'http://xml.libexpat.org/'>\n"
1537        "]>\n"
1538        "<prefix:doc/>";
1539    /* Packaged info expected by the end element handler;
1540       the weird structuring lets us re-use the triplet_end_checker()
1541       function also used for another test. */
1542    const char *elemstr[] = {
1543        "http://xml.libexpat.org/ doc prefix"
1544    };
1545    XML_SetReturnNSTriplet(parser, XML_TRUE);
1546    XML_SetUserData(parser, elemstr);
1547    XML_SetEndElementHandler(parser, triplet_end_checker);
1548    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1549        xml_failure(parser);
1550}
1551END_TEST
1552
1553START_TEST(test_ns_default_with_empty_uri)
1554{
1555    const char *text =
1556        "<doc xmlns='http://xml.libexpat.org/'>\n"
1557        "  <e xmlns=''/>\n"
1558        "</doc>";
1559    if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1560        xml_failure(parser);
1561}
1562END_TEST
1563
1564/* Regression test for SF bug #692964: two prefixes for one namespace. */
1565START_TEST(test_ns_duplicate_attrs_diff_prefixes)
1566{
1567    const char *text =
1568        "<doc xmlns:a='http://xml.libexpat.org/a'\n"
1569        "     xmlns:b='http://xml.libexpat.org/a'\n"
1570        "     a:a='v' b:a='v' />";
1571    expect_failure(text,
1572                   XML_ERROR_DUPLICATE_ATTRIBUTE,
1573                   "did not report multiple attributes with same URI+name");
1574}
1575END_TEST
1576
1577/* Regression test for SF bug #695401: unbound prefix. */
1578START_TEST(test_ns_unbound_prefix_on_attribute)
1579{
1580    const char *text = "<doc a:attr=''/>";
1581    expect_failure(text,
1582                   XML_ERROR_UNBOUND_PREFIX,
1583                   "did not report unbound prefix on attribute");
1584}
1585END_TEST
1586
1587/* Regression test for SF bug #695401: unbound prefix. */
1588START_TEST(test_ns_unbound_prefix_on_element)
1589{
1590    const char *text = "<a:doc/>";
1591    expect_failure(text,
1592                   XML_ERROR_UNBOUND_PREFIX,
1593                   "did not report unbound prefix on element");
1594}
1595END_TEST
1596
1597static Suite *
1598make_suite(void)
1599{
1600    Suite *s = suite_create("basic");
1601    TCase *tc_basic = tcase_create("basic tests");
1602    TCase *tc_namespace = tcase_create("XML namespaces");
1603
1604    suite_add_tcase(s, tc_basic);
1605    tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
1606    tcase_add_test(tc_basic, test_nul_byte);
1607    tcase_add_test(tc_basic, test_u0000_char);
1608    tcase_add_test(tc_basic, test_bom_utf8);
1609    tcase_add_test(tc_basic, test_bom_utf16_be);
1610    tcase_add_test(tc_basic, test_bom_utf16_le);
1611    tcase_add_test(tc_basic, test_illegal_utf8);
1612    tcase_add_test(tc_basic, test_utf8_auto_align);
1613    tcase_add_test(tc_basic, test_utf16);
1614    tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
1615    tcase_add_test(tc_basic, test_latin1_umlauts);
1616    /* Regression test for SF bug #491986. */
1617    tcase_add_test(tc_basic, test_danish_latin1);
1618    /* Regression test for SF bug #514281. */
1619    tcase_add_test(tc_basic, test_french_charref_hexidecimal);
1620    tcase_add_test(tc_basic, test_french_charref_decimal);
1621    tcase_add_test(tc_basic, test_french_latin1);
1622    tcase_add_test(tc_basic, test_french_utf8);
1623    tcase_add_test(tc_basic, test_utf8_false_rejection);
1624    tcase_add_test(tc_basic, test_line_number_after_parse);
1625    tcase_add_test(tc_basic, test_column_number_after_parse);
1626    tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
1627    tcase_add_test(tc_basic, test_line_number_after_error);
1628    tcase_add_test(tc_basic, test_column_number_after_error);
1629    tcase_add_test(tc_basic, test_really_long_lines);
1630    tcase_add_test(tc_basic, test_end_element_events);
1631    tcase_add_test(tc_basic, test_attr_whitespace_normalization);
1632    tcase_add_test(tc_basic, test_xmldecl_misplaced);
1633    tcase_add_test(tc_basic, test_unknown_encoding_internal_entity);
1634    tcase_add_test(tc_basic,
1635                   test_wfc_undeclared_entity_unread_external_subset);
1636    tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
1637    tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
1638    tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
1639    tcase_add_test(tc_basic,
1640                   test_wfc_undeclared_entity_with_external_subset_standalone);
1641    tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs);
1642    tcase_add_test(tc_basic, test_ext_entity_set_encoding);
1643    tcase_add_test(tc_basic, test_dtd_default_handling);
1644    tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
1645    tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
1646    tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
1647    tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
1648    tcase_add_test(tc_basic, test_good_cdata_ascii);
1649    tcase_add_test(tc_basic, test_good_cdata_utf16);
1650    tcase_add_test(tc_basic, test_bad_cdata);
1651
1652    suite_add_tcase(s, tc_namespace);
1653    tcase_add_checked_fixture(tc_namespace,
1654                              namespace_setup, namespace_teardown);
1655    tcase_add_test(tc_namespace, test_return_ns_triplet);
1656    tcase_add_test(tc_namespace, test_ns_tagname_overwrite);
1657    tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet);
1658    tcase_add_test(tc_namespace, test_start_ns_clears_start_element);
1659    tcase_add_test(tc_namespace, test_default_ns_from_ext_subset_and_ext_ge);
1660    tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1);
1661    tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2);
1662    tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3);
1663    tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4);
1664    tcase_add_test(tc_namespace, test_ns_default_with_empty_uri);
1665    tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes);
1666    tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute);
1667    tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element);
1668
1669    return s;
1670}
1671
1672
1673int
1674main(int argc, char *argv[])
1675{
1676    int i, nf;
1677    int verbosity = CK_NORMAL;
1678    Suite *s = make_suite();
1679    SRunner *sr = srunner_create(s);
1680
1681    /* run the tests for internal helper functions */
1682    testhelper_is_whitespace_normalized();
1683
1684    for (i = 1; i < argc; ++i) {
1685        char *opt = argv[i];
1686        if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)
1687            verbosity = CK_VERBOSE;
1688        else if (strcmp(opt, "-q") == 0 || strcmp(opt, "--quiet") == 0)
1689            verbosity = CK_SILENT;
1690        else {
1691            fprintf(stderr, "runtests: unknown option '%s'\n", opt);
1692            return 2;
1693        }
1694    }
1695    if (verbosity != CK_SILENT)
1696        printf("Expat version: %s\n", XML_ExpatVersion());
1697    srunner_run_all(sr, verbosity);
1698    nf = srunner_ntests_failed(sr);
1699    srunner_free(sr);
1700
1701    return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
1702}
1703