1/* Tests in the "miscellaneous" test case for the Expat test suite
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10   Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12   Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14   Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15   Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16   Copyright (c) 2017      Jos�� Guti��rrez de la Concha <jose@zeroc.com>
17   Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19   Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22   Licensed under the MIT license:
23
24   Permission is  hereby granted,  free of charge,  to any  person obtaining
25   a  copy  of  this  software   and  associated  documentation  files  (the
26   "Software"),  to  deal in  the  Software  without restriction,  including
27   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28   distribute, sublicense, and/or sell copies of the Software, and to permit
29   persons  to whom  the Software  is  furnished to  do so,  subject to  the
30   following conditions:
31
32   The above copyright  notice and this permission notice  shall be included
33   in all copies or substantial portions of the Software.
34
35   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41   USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#if defined(NDEBUG)
45#  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46#endif
47
48#include <assert.h>
49#include <string.h>
50
51#include "expat_config.h"
52
53#include "expat.h"
54#include "internal.h"
55#include "minicheck.h"
56#include "memcheck.h"
57#include "common.h"
58#include "ascii.h" /* for ASCII_xxx */
59#include "handlers.h"
60#include "misc_tests.h"
61
62/* Test that a failure to allocate the parser structure fails gracefully */
63START_TEST(test_misc_alloc_create_parser) {
64  XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
65  unsigned int i;
66  const unsigned int max_alloc_count = 10;
67
68  /* Something this simple shouldn't need more than 10 allocations */
69  for (i = 0; i < max_alloc_count; i++) {
70    g_allocation_count = i;
71    g_parser = XML_ParserCreate_MM(NULL, &memsuite, NULL);
72    if (g_parser != NULL)
73      break;
74  }
75  if (i == 0)
76    fail("Parser unexpectedly ignored failing allocator");
77  else if (i == max_alloc_count)
78    fail("Parser not created with max allocation count");
79}
80END_TEST
81
82/* Test memory allocation failures for a parser with an encoding */
83START_TEST(test_misc_alloc_create_parser_with_encoding) {
84  XML_Memory_Handling_Suite memsuite = {duff_allocator, realloc, free};
85  unsigned int i;
86  const unsigned int max_alloc_count = 10;
87
88  /* Try several levels of allocation */
89  for (i = 0; i < max_alloc_count; i++) {
90    g_allocation_count = i;
91    g_parser = XML_ParserCreate_MM(XCS("us-ascii"), &memsuite, NULL);
92    if (g_parser != NULL)
93      break;
94  }
95  if (i == 0)
96    fail("Parser ignored failing allocator");
97  else if (i == max_alloc_count)
98    fail("Parser not created with max allocation count");
99}
100END_TEST
101
102/* Test that freeing a NULL parser doesn't cause an explosion.
103 * (Not actually tested anywhere else)
104 */
105START_TEST(test_misc_null_parser) {
106  XML_ParserFree(NULL);
107}
108END_TEST
109
110#if defined(__has_feature)
111#  if __has_feature(undefined_behavior_sanitizer)
112#    define EXPAT_TESTS_UBSAN 1
113#  else
114#    define EXPAT_TESTS_UBSAN 0
115#  endif
116#else
117#  define EXPAT_TESTS_UBSAN 0
118#endif
119
120/* Test that XML_ErrorString rejects out-of-range codes */
121START_TEST(test_misc_error_string) {
122#if ! EXPAT_TESTS_UBSAN // because this would trigger UBSan
123  union {
124    enum XML_Error xml_error;
125    int integer;
126  } trickery;
127
128  assert_true(sizeof(enum XML_Error) == sizeof(int)); // self-test
129
130  trickery.integer = -1;
131  if (XML_ErrorString(trickery.xml_error) != NULL)
132    fail("Negative error code not rejected");
133
134  trickery.integer = 100;
135  if (XML_ErrorString(trickery.xml_error) != NULL)
136    fail("Large error code not rejected");
137#endif
138}
139END_TEST
140
141/* Test the version information is consistent */
142
143/* Since we are working in XML_LChars (potentially 16-bits), we
144 * can't use the standard C library functions for character
145 * manipulation and have to roll our own.
146 */
147static int
148parse_version(const XML_LChar *version_text,
149              XML_Expat_Version *version_struct) {
150  if (! version_text)
151    return XML_FALSE;
152
153  while (*version_text != 0x00) {
154    if (*version_text >= ASCII_0 && *version_text <= ASCII_9)
155      break;
156    version_text++;
157  }
158  if (*version_text == 0x00)
159    return XML_FALSE;
160
161  /* version_struct->major = strtoul(version_text, 10, &version_text) */
162  version_struct->major = 0;
163  while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
164    version_struct->major
165        = 10 * version_struct->major + (*version_text++ - ASCII_0);
166  }
167  if (*version_text++ != ASCII_PERIOD)
168    return XML_FALSE;
169
170  /* Now for the minor version number */
171  version_struct->minor = 0;
172  while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
173    version_struct->minor
174        = 10 * version_struct->minor + (*version_text++ - ASCII_0);
175  }
176  if (*version_text++ != ASCII_PERIOD)
177    return XML_FALSE;
178
179  /* Finally the micro version number */
180  version_struct->micro = 0;
181  while (*version_text >= ASCII_0 && *version_text <= ASCII_9) {
182    version_struct->micro
183        = 10 * version_struct->micro + (*version_text++ - ASCII_0);
184  }
185  if (*version_text != 0x00)
186    return XML_FALSE;
187  return XML_TRUE;
188}
189
190static int
191versions_equal(const XML_Expat_Version *first,
192               const XML_Expat_Version *second) {
193  return (first->major == second->major && first->minor == second->minor
194          && first->micro == second->micro);
195}
196
197START_TEST(test_misc_version) {
198  XML_Expat_Version read_version = XML_ExpatVersionInfo();
199  /* Silence compiler warning with the following assignment */
200  XML_Expat_Version parsed_version = {0, 0, 0};
201  const XML_LChar *version_text = XML_ExpatVersion();
202
203  if (version_text == NULL)
204    fail("Could not obtain version text");
205  assert(version_text != NULL);
206  if (! parse_version(version_text, &parsed_version))
207    fail("Unable to parse version text");
208  if (! versions_equal(&read_version, &parsed_version))
209    fail("Version mismatch");
210
211  if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */
212    fail("XML_*_VERSION in expat.h out of sync?\n");
213}
214END_TEST
215
216/* Test feature information */
217START_TEST(test_misc_features) {
218  const XML_Feature *features = XML_GetFeatureList();
219
220  /* Prevent problems with double-freeing parsers */
221  g_parser = NULL;
222  if (features == NULL) {
223    fail("Failed to get feature information");
224  } else {
225    /* Loop through the features checking what we can */
226    while (features->feature != XML_FEATURE_END) {
227      switch (features->feature) {
228      case XML_FEATURE_SIZEOF_XML_CHAR:
229        if (features->value != sizeof(XML_Char))
230          fail("Incorrect size of XML_Char");
231        break;
232      case XML_FEATURE_SIZEOF_XML_LCHAR:
233        if (features->value != sizeof(XML_LChar))
234          fail("Incorrect size of XML_LChar");
235        break;
236      default:
237        break;
238      }
239      features++;
240    }
241  }
242}
243END_TEST
244
245/* Regression test for GitHub Issue #17: memory leak parsing attribute
246 * values with mixed bound and unbound namespaces.
247 */
248START_TEST(test_misc_attribute_leak) {
249  const char *text = "<D xmlns:L=\"D\" l:a='' L:a=''/>";
250  XML_Memory_Handling_Suite memsuite
251      = {tracking_malloc, tracking_realloc, tracking_free};
252
253  g_parser = XML_ParserCreate_MM(XCS("UTF-8"), &memsuite, XCS("\n"));
254  expect_failure(text, XML_ERROR_UNBOUND_PREFIX, "Unbound prefixes not found");
255  XML_ParserFree(g_parser);
256  /* Prevent the teardown trying to double free */
257  g_parser = NULL;
258
259  if (! tracking_report())
260    fail("Memory leak found");
261}
262END_TEST
263
264/* Test parser created for UTF-16LE is successful */
265START_TEST(test_misc_utf16le) {
266  const char text[] =
267      /* <?xml version='1.0'?><q>Hi</q> */
268      "<\0?\0x\0m\0l\0 \0"
269      "v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0?\0>\0"
270      "<\0q\0>\0H\0i\0<\0/\0q\0>\0";
271  const XML_Char *expected = XCS("Hi");
272  CharData storage;
273
274  g_parser = XML_ParserCreate(XCS("UTF-16LE"));
275  if (g_parser == NULL)
276    fail("Parser not created");
277
278  CharData_Init(&storage);
279  XML_SetUserData(g_parser, &storage);
280  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
281  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
282      == XML_STATUS_ERROR)
283    xml_failure(g_parser);
284  CharData_CheckXMLChars(&storage, expected);
285}
286END_TEST
287
288START_TEST(test_misc_stop_during_end_handler_issue_240_1) {
289  XML_Parser parser;
290  DataIssue240 *mydata;
291  enum XML_Status result;
292  const char *const doc1 = "<doc><e1/><e><foo/></e></doc>";
293
294  parser = XML_ParserCreate(NULL);
295  XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
296  mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
297  mydata->parser = parser;
298  mydata->deep = 0;
299  XML_SetUserData(parser, mydata);
300
301  result = _XML_Parse_SINGLE_BYTES(parser, doc1, (int)strlen(doc1), 1);
302  XML_ParserFree(parser);
303  free(mydata);
304  if (result != XML_STATUS_ERROR)
305    fail("Stopping the parser did not work as expected");
306}
307END_TEST
308
309START_TEST(test_misc_stop_during_end_handler_issue_240_2) {
310  XML_Parser parser;
311  DataIssue240 *mydata;
312  enum XML_Status result;
313  const char *const doc2 = "<doc><elem/></doc>";
314
315  parser = XML_ParserCreate(NULL);
316  XML_SetElementHandler(parser, start_element_issue_240, end_element_issue_240);
317  mydata = (DataIssue240 *)malloc(sizeof(DataIssue240));
318  mydata->parser = parser;
319  mydata->deep = 0;
320  XML_SetUserData(parser, mydata);
321
322  result = _XML_Parse_SINGLE_BYTES(parser, doc2, (int)strlen(doc2), 1);
323  XML_ParserFree(parser);
324  free(mydata);
325  if (result != XML_STATUS_ERROR)
326    fail("Stopping the parser did not work as expected");
327}
328END_TEST
329
330START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
331  const char *const inputOne = "<!DOCTYPE d [\n"
332                               "<!ENTITY % e ']><d/>'>\n"
333                               "\n"
334                               "%e;";
335  const char *const inputTwo = "<!DOCTYPE d [\n"
336                               "<!ENTITY % e1 ']><d/>'><!ENTITY % e2 '&e1;'>\n"
337                               "\n"
338                               "%e2;";
339  const char *const inputThree = "<!DOCTYPE d [\n"
340                                 "<!ENTITY % e ']><d'>\n"
341                                 "\n"
342                                 "%e;";
343  const char *const inputIssue317 = "<!DOCTYPE doc [\n"
344                                    "<!ENTITY % foo ']>\n"
345                                    "<doc>Hell<oc (#PCDATA)*>'>\n"
346                                    "%foo;\n"
347                                    "]>\n"
348                                    "<doc>Hello, world</dVc>";
349
350  const char *const inputs[] = {inputOne, inputTwo, inputThree, inputIssue317};
351  size_t inputIndex = 0;
352
353  for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
354    set_subtest("%s", inputs[inputIndex]);
355    XML_Parser parser;
356    enum XML_Status parseResult;
357    int setParamEntityResult;
358    XML_Size lineNumber;
359    XML_Size columnNumber;
360    const char *const input = inputs[inputIndex];
361
362    parser = XML_ParserCreate(NULL);
363    setParamEntityResult
364        = XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
365    if (setParamEntityResult != 1)
366      fail("Failed to set XML_PARAM_ENTITY_PARSING_ALWAYS.");
367
368    parseResult = _XML_Parse_SINGLE_BYTES(parser, input, (int)strlen(input), 0);
369    if (parseResult != XML_STATUS_ERROR) {
370      parseResult = _XML_Parse_SINGLE_BYTES(parser, "", 0, 1);
371      if (parseResult != XML_STATUS_ERROR) {
372        fail("Parsing was expected to fail but succeeded.");
373      }
374    }
375
376    if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
377      fail("Error code does not match XML_ERROR_INVALID_TOKEN");
378
379    lineNumber = XML_GetCurrentLineNumber(parser);
380    if (lineNumber != 4)
381      fail("XML_GetCurrentLineNumber does not work as expected.");
382
383    columnNumber = XML_GetCurrentColumnNumber(parser);
384    if (columnNumber != 0)
385      fail("XML_GetCurrentColumnNumber does not work as expected.");
386
387    XML_ParserFree(parser);
388  }
389}
390END_TEST
391
392START_TEST(test_misc_tag_mismatch_reset_leak) {
393#ifdef XML_NS
394  const char *const text = "<open xmlns='https://namespace1.test'></close>";
395  XML_Parser parser = XML_ParserCreateNS(NULL, XCS('\n'));
396
397  if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
398      != XML_STATUS_ERROR)
399    fail("Call to parse was expected to fail");
400  if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
401    fail("Call to parse was expected to fail from a closing tag mismatch");
402
403  XML_ParserReset(parser, NULL);
404
405  if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
406      != XML_STATUS_ERROR)
407    fail("Call to parse was expected to fail");
408  if (XML_GetErrorCode(parser) != XML_ERROR_TAG_MISMATCH)
409    fail("Call to parse was expected to fail from a closing tag mismatch");
410
411  XML_ParserFree(parser);
412#endif
413}
414END_TEST
415
416START_TEST(test_misc_create_external_entity_parser_with_null_context) {
417  // With XML_DTD undefined, the only supported case of external entities
418  // is pattern "<!ENTITY entity123 SYSTEM 'filename123'>". A NULL context
419  // was causing a segfault through a null pointer dereference in function
420  // setContext, previously.
421  XML_Parser parser = XML_ParserCreate(NULL);
422  XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
423#ifdef XML_DTD
424  assert_true(ext_parser != NULL);
425  XML_ParserFree(ext_parser);
426#else
427  assert_true(ext_parser == NULL);
428#endif /* XML_DTD */
429  XML_ParserFree(parser);
430}
431END_TEST
432
433START_TEST(test_misc_general_entities_support) {
434  const char *const doc
435      = "<!DOCTYPE r [\n"
436        "<!ENTITY e1 'v1'>\n"
437        "<!ENTITY e2 SYSTEM 'v2'>\n"
438        "]>\n"
439        "<r a1='[&e1;]'>[&e1;][&e2;][&amp;&apos;&gt;&lt;&quot;]</r>";
440
441  CharData storage;
442  CharData_Init(&storage);
443
444  XML_Parser parser = XML_ParserCreate(NULL);
445  XML_SetUserData(parser, &storage);
446  XML_SetStartElementHandler(parser, accumulate_start_element);
447  XML_SetExternalEntityRefHandler(parser,
448                                  external_entity_failer__if_not_xml_ge);
449  XML_SetEntityDeclHandler(parser, accumulate_entity_decl);
450  XML_SetCharacterDataHandler(parser, accumulate_char_data);
451
452  if (_XML_Parse_SINGLE_BYTES(parser, doc, (int)strlen(doc), XML_TRUE)
453      != XML_STATUS_OK) {
454    xml_failure(parser);
455  }
456
457  XML_ParserFree(parser);
458
459  CharData_CheckXMLChars(&storage,
460  /* clang-format off */
461#if XML_GE == 1
462                         XCS("e1=v1\n")
463                         XCS("e2=(null)\n")
464                         XCS("(r(a1=[v1]))\n")
465                         XCS("[v1][][&'><\"]")
466#else
467                         XCS("e1=&amp;e1;\n")
468                         XCS("e2=(null)\n")
469                         XCS("(r(a1=[&e1;]))\n")
470                         XCS("[&e1;][&e2;][&'><\"]")
471#endif
472  );
473  /* clang-format on */
474}
475END_TEST
476
477static void XMLCALL
478resumable_stopping_character_handler(void *userData, const XML_Char *s,
479                                     int len) {
480  UNUSED_P(s);
481  UNUSED_P(len);
482  XML_Parser parser = (XML_Parser)userData;
483  XML_StopParser(parser, XML_TRUE);
484}
485
486// NOTE: This test needs active LeakSanitizer to be of actual use
487START_TEST(test_misc_char_handler_stop_without_leak) {
488  const char *const data
489      = "<!DOCTYPE t1[<!ENTITY e1 'angle<'><!ENTITY e2 '&e1;'>]><t1>&e2;";
490  XML_Parser parser = XML_ParserCreate(NULL);
491  assert_true(parser != NULL);
492  XML_SetUserData(parser, parser);
493  XML_SetCharacterDataHandler(parser, resumable_stopping_character_handler);
494  _XML_Parse_SINGLE_BYTES(parser, data, (int)strlen(data), XML_FALSE);
495  XML_ParserFree(parser);
496}
497END_TEST
498
499void
500make_miscellaneous_test_case(Suite *s) {
501  TCase *tc_misc = tcase_create("miscellaneous tests");
502
503  suite_add_tcase(s, tc_misc);
504  tcase_add_checked_fixture(tc_misc, NULL, basic_teardown);
505
506  tcase_add_test(tc_misc, test_misc_alloc_create_parser);
507  tcase_add_test(tc_misc, test_misc_alloc_create_parser_with_encoding);
508  tcase_add_test(tc_misc, test_misc_null_parser);
509  tcase_add_test(tc_misc, test_misc_error_string);
510  tcase_add_test(tc_misc, test_misc_version);
511  tcase_add_test(tc_misc, test_misc_features);
512  tcase_add_test(tc_misc, test_misc_attribute_leak);
513  tcase_add_test(tc_misc, test_misc_utf16le);
514  tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_1);
515  tcase_add_test(tc_misc, test_misc_stop_during_end_handler_issue_240_2);
516  tcase_add_test__ifdef_xml_dtd(
517      tc_misc, test_misc_deny_internal_entity_closing_doctype_issue_317);
518  tcase_add_test(tc_misc, test_misc_tag_mismatch_reset_leak);
519  tcase_add_test(tc_misc,
520                 test_misc_create_external_entity_parser_with_null_context);
521  tcase_add_test(tc_misc, test_misc_general_entities_support);
522  tcase_add_test(tc_misc, test_misc_char_handler_stop_without_leak);
523}
524