1/* Tests in the "accounting" test case for the Expat test suite
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10   Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12   Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14   Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15   Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16   Copyright (c) 2017      Jos�� Guti��rrez de la Concha <jose@zeroc.com>
17   Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19   Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22   Licensed under the MIT license:
23
24   Permission is  hereby granted,  free of charge,  to any  person obtaining
25   a  copy  of  this  software   and  associated  documentation  files  (the
26   "Software"),  to  deal in  the  Software  without restriction,  including
27   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28   distribute, sublicense, and/or sell copies of the Software, and to permit
29   persons  to whom  the Software  is  furnished to  do so,  subject to  the
30   following conditions:
31
32   The above copyright  notice and this permission notice  shall be included
33   in all copies or substantial portions of the Software.
34
35   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41   USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#include <math.h> /* NAN, INFINITY */
45#include <stdio.h>
46#include <string.h>
47
48#include "expat_config.h"
49
50#include "expat.h"
51#include "internal.h"
52#include "common.h"
53#include "minicheck.h"
54#include "chardata.h"
55#include "handlers.h"
56#include "acc_tests.h"
57
58#if XML_GE == 1
59START_TEST(test_accounting_precision) {
60  struct AccountingTestCase cases[] = {
61      {"<e/>", NULL, NULL, 0},
62      {"<e></e>", NULL, NULL, 0},
63
64      /* Attributes */
65      {"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0},
66      {"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0},
67      {"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0},
68      {"<e k=\"&amp;&apos;&gt;&lt;&quot;\" />", NULL, NULL,
69       sizeof(XML_Char) * 5 /* number of predefined entities */},
70      {"<e1 xmlns='https://example.org/'>\n"
71       "  <e2 xmlns=''/>\n"
72       "</e1>",
73       NULL, NULL, 0},
74
75      /* Text */
76      {"<e>text</e>", NULL, NULL, 0},
77      {"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0},
78      {"<e>&amp;&apos;&gt;&lt;&quot;</e>", NULL, NULL,
79       sizeof(XML_Char) * 5 /* number of predefined entities */},
80      {"<e>&#65;&#41;</e>", NULL, NULL, 0},
81
82      /* Prolog */
83      {"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0},
84
85      /* Whitespace */
86      {"  <e1>  <e2>  </e2>  </e1>  ", NULL, NULL, 0},
87      {"<e1  ><e2  /></e1  >", NULL, NULL, 0},
88      {"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0},
89
90      /* Comments */
91      {"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0},
92
93      /* Processing instructions */
94      {"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>",
95       NULL, NULL, 0},
96      {"<?pi0?><?pi1 ?><?pi2  ?><r/><?pi4?>", NULL, NULL, 0},
97#  ifdef XML_DTD
98      {"<?pi0?><?pi1 ?><?pi2  ?><!DOCTYPE r SYSTEM 'first.ent'><r/>",
99       "<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>",
100       0},
101#  endif /* XML_DTD */
102
103      /* CDATA */
104      {"<e><![CDATA[one two three]]></e>", NULL, NULL, 0},
105      /* The following is the essence of this OSS-Fuzz finding:
106         https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34302
107         https://oss-fuzz.com/testcase-detail/4860575394955264
108      */
109      {"<!DOCTYPE r [\n"
110       "<!ENTITY e \"111<![CDATA[2 <= 2]]>333\">\n"
111       "]>\n"
112       "<r>&e;</r>\n",
113       NULL, NULL, sizeof(XML_Char) * strlen("111<![CDATA[2 <= 2]]>333")},
114
115#  ifdef XML_DTD
116      /* Conditional sections */
117      {"<!DOCTYPE r [\n"
118       "<!ENTITY % draft 'INCLUDE'>\n"
119       "<!ENTITY % final 'IGNORE'>\n"
120       "<!ENTITY % import SYSTEM \"first.ent\">\n"
121       "%import;\n"
122       "]>\n"
123       "<r/>\n",
124       "<![%draft;[<!--1-->]]>\n"
125       "<![%final;[<!--22-->]]>",
126       NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE"))},
127#  endif /* XML_DTD */
128
129      /* General entities */
130      {"<!DOCTYPE root [\n"
131       "<!ENTITY nine \"123456789\">\n"
132       "]>\n"
133       "<root>&nine;</root>",
134       NULL, NULL, sizeof(XML_Char) * strlen("123456789")},
135      {"<!DOCTYPE root [\n"
136       "<!ENTITY nine \"123456789\">\n"
137       "]>\n"
138       "<root k1=\"&nine;\"/>",
139       NULL, NULL, sizeof(XML_Char) * strlen("123456789")},
140      {"<!DOCTYPE root [\n"
141       "<!ENTITY nine \"123456789\">\n"
142       "<!ENTITY nine2 \"&nine;&nine;\">\n"
143       "]>\n"
144       "<root>&nine2;&nine2;&nine2;</root>",
145       NULL, NULL,
146       sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */
147           * (strlen("&nine;") + strlen("123456789"))},
148      {"<!DOCTYPE r [\n"
149       "  <!ENTITY five SYSTEM 'first.ent'>\n"
150       "]>\n"
151       "<r>&five;</r>",
152       "12345", NULL, 0},
153      {"<!DOCTYPE r [\n"
154       "  <!ENTITY five SYSTEM 'first.ent'>\n"
155       "]>\n"
156       "<r>&five;</r>",
157       "\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0},
158
159#  ifdef XML_DTD
160      /* Parameter entities */
161      {"<!DOCTYPE r [\n"
162       "<!ENTITY % comment \"<!---->\">\n"
163       "%comment;\n"
164       "]>\n"
165       "<r/>",
166       NULL, NULL, sizeof(XML_Char) * strlen("<!---->")},
167      {"<!DOCTYPE r [\n"
168       "<!ENTITY % ninedef \"&#60;!ENTITY nine &#34;123456789&#34;&#62;\">\n"
169       "%ninedef;\n"
170       "]>\n"
171       "<r>&nine;</r>",
172       NULL, NULL,
173       sizeof(XML_Char)
174           * (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789"))},
175      {"<!DOCTYPE r [\n"
176       "<!ENTITY % comment \"<!--1-->\">\n"
177       "<!ENTITY % comment2 \"&#37;comment;<!--22-->&#37;comment;\">\n"
178       "%comment2;\n"
179       "]>\n"
180       "<r/>\n",
181       NULL, NULL,
182       sizeof(XML_Char)
183           * (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->"))},
184      {"<!DOCTYPE r [\n"
185       "  <!ENTITY % five \"12345\">\n"
186       "  <!ENTITY % five2def \"&#60;!ENTITY five2 &#34;[&#37;five;][&#37;five;]]]]&#34;&#62;\">\n"
187       "  %five2def;\n"
188       "]>\n"
189       "<r>&five2;</r>",
190       NULL, NULL, /* from "%five2def;": */
191       sizeof(XML_Char)
192           * (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">")
193              + 2 /* calls to "%five;" */ * strlen("12345")
194              + /* from "&five2;": */ strlen("[12345][12345]]]]"))},
195      {"<!DOCTYPE r SYSTEM \"first.ent\">\n"
196       "<r/>",
197       "<!ENTITY % comment '<!--1-->'>\n"
198       "<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n"
199       "%comment2;",
200       NULL,
201       sizeof(XML_Char)
202           * (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->")
203              + 2 /* calls to "%comment;" */ * strlen("<!---->"))},
204      {"<!DOCTYPE r SYSTEM 'first.ent'>\n"
205       "<r/>",
206       "<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n"
207       "<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n"
208       "%e2;\n",
209       "<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->")},
210      {
211          "<!DOCTYPE r SYSTEM 'first.ent'>\n"
212          "<r/>",
213          "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
214          "<!ENTITY % e2 '%e1;'>",
215          "<?xml version='1.0' encoding='utf-8'?>\n"
216          "hello\n"
217          "xml" /* without trailing newline! */,
218          0,
219      },
220      {
221          "<!DOCTYPE r SYSTEM 'first.ent'>\n"
222          "<r/>",
223          "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
224          "<!ENTITY % e2 '%e1;'>",
225          "<?xml version='1.0' encoding='utf-8'?>\n"
226          "hello\n"
227          "xml\n" /* with trailing newline! */,
228          0,
229      },
230      {"<!DOCTYPE doc SYSTEM 'first.ent'>\n"
231       "<doc></doc>\n",
232       "<!ELEMENT doc EMPTY>\n"
233       "<!ENTITY % e1 SYSTEM 'second.ent'>\n"
234       "<!ENTITY % e2 '%e1;'>\n"
235       "%e1;\n",
236       "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */,
237       strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>")},
238#  endif /* XML_DTD */
239  };
240
241  const size_t countCases = sizeof(cases) / sizeof(cases[0]);
242  size_t u = 0;
243  for (; u < countCases; u++) {
244    const unsigned long long expectedCountBytesDirect
245        = strlen(cases[u].primaryText);
246    const unsigned long long expectedCountBytesIndirect
247        = (cases[u].firstExternalText ? strlen(cases[u].firstExternalText) : 0)
248          + (cases[u].secondExternalText ? strlen(cases[u].secondExternalText)
249                                         : 0)
250          + cases[u].expectedCountBytesIndirectExtra;
251
252    XML_Parser parser = XML_ParserCreate(NULL);
253    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
254    if (cases[u].firstExternalText) {
255      XML_SetExternalEntityRefHandler(parser,
256                                      accounting_external_entity_ref_handler);
257      XML_SetUserData(parser, (void *)&cases[u]);
258    }
259
260    enum XML_Status status
261        = _XML_Parse_SINGLE_BYTES(parser, cases[u].primaryText,
262                                  (int)strlen(cases[u].primaryText), XML_TRUE);
263    if (status != XML_STATUS_OK) {
264      _xml_failure(parser, __FILE__, __LINE__);
265    }
266
267    const unsigned long long actualCountBytesDirect
268        = testingAccountingGetCountBytesDirect(parser);
269    const unsigned long long actualCountBytesIndirect
270        = testingAccountingGetCountBytesIndirect(parser);
271
272    XML_ParserFree(parser);
273
274    if (actualCountBytesDirect != expectedCountBytesDirect) {
275      fprintf(
276          stderr,
277          "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
278              "") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n",
279          u + 1, countCases, expectedCountBytesDirect, actualCountBytesDirect);
280      fail("Count of direct bytes is off");
281    }
282
283    if (actualCountBytesIndirect != expectedCountBytesIndirect) {
284      fprintf(
285          stderr,
286          "Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ": Expected " EXPAT_FMT_ULL(
287              "") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n",
288          u + 1, countCases, expectedCountBytesIndirect,
289          actualCountBytesIndirect);
290      fail("Count of indirect bytes is off");
291    }
292  }
293}
294END_TEST
295
296START_TEST(test_billion_laughs_attack_protection_api) {
297  XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
298  XML_Parser parserWithParent = XML_ExternalEntityParserCreate(
299      parserWithoutParent, XCS("entity123"), NULL);
300  if (parserWithoutParent == NULL)
301    fail("parserWithoutParent is NULL");
302  if (parserWithParent == NULL)
303    fail("parserWithParent is NULL");
304
305  // XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases
306  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f)
307      == XML_TRUE)
308    fail("Call with NULL parser is NOT supposed to succeed");
309  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent,
310                                                               123.0f)
311      == XML_TRUE)
312    fail("Call with non-root parser is NOT supposed to succeed");
313  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
314          parserWithoutParent, NAN)
315      == XML_TRUE)
316    fail("Call with NaN limit is NOT supposed to succeed");
317  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
318          parserWithoutParent, -1.0f)
319      == XML_TRUE)
320    fail("Call with negative limit is NOT supposed to succeed");
321  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
322          parserWithoutParent, 0.9f)
323      == XML_TRUE)
324    fail("Call with positive limit <1.0 is NOT supposed to succeed");
325
326  // XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases
327  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
328          parserWithoutParent, 1.0f)
329      == XML_FALSE)
330    fail("Call with positive limit >=1.0 is supposed to succeed");
331  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
332          parserWithoutParent, 123456.789f)
333      == XML_FALSE)
334    fail("Call with positive limit >=1.0 is supposed to succeed");
335  if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
336          parserWithoutParent, INFINITY)
337      == XML_FALSE)
338    fail("Call with positive limit >=1.0 is supposed to succeed");
339
340  // XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases
341  if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123)
342      == XML_TRUE)
343    fail("Call with NULL parser is NOT supposed to succeed");
344  if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent,
345                                                              123)
346      == XML_TRUE)
347    fail("Call with non-root parser is NOT supposed to succeed");
348
349  // XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases
350  if (XML_SetBillionLaughsAttackProtectionActivationThreshold(
351          parserWithoutParent, 123)
352      == XML_FALSE)
353    fail("Call with non-NULL parentless parser is supposed to succeed");
354
355  XML_ParserFree(parserWithParent);
356  XML_ParserFree(parserWithoutParent);
357}
358END_TEST
359
360START_TEST(test_helper_unsigned_char_to_printable) {
361  // Smoke test
362  unsigned char uc = 0;
363  for (; uc < (unsigned char)-1; uc++) {
364    set_subtest("char %u", (unsigned)uc);
365    const char *const printable = unsignedCharToPrintable(uc);
366    if (printable == NULL)
367      fail("unsignedCharToPrintable returned NULL");
368    else if (strlen(printable) < (size_t)1)
369      fail("unsignedCharToPrintable returned empty string");
370  }
371
372  // Two concrete samples
373  set_subtest("char 'A'");
374  if (strcmp(unsignedCharToPrintable('A'), "A") != 0)
375    fail("unsignedCharToPrintable result mistaken");
376  set_subtest("char '\\'");
377  if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0)
378    fail("unsignedCharToPrintable result mistaken");
379}
380END_TEST
381#endif // XML_GE == 1
382
383void
384make_accounting_test_case(Suite *s) {
385#if XML_GE == 1
386  TCase *tc_accounting = tcase_create("accounting tests");
387
388  suite_add_tcase(s, tc_accounting);
389
390  tcase_add_test(tc_accounting, test_accounting_precision);
391  tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
392  tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
393#else
394  UNUSED_P(s);
395#endif /* XML_GE == 1 */
396}
397