1/*
2 * HTMLtree.c : implementation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9
10#define IN_LIBXML
11#include "libxml.h"
12#ifdef LIBXML_HTML_ENABLED
13
14#include <string.h> /* for memset() only ! */
15
16#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
30#include <libxml/globals.h>
31#include <libxml/uri.h>
32
33#include "buf.h"
34
35/************************************************************************
36 *									*
37 *		Getting/Setting encoding meta tags			*
38 *									*
39 ************************************************************************/
40
41/**
42 * htmlGetMetaEncoding:
43 * @doc:  the document
44 *
45 * Encoding definition lookup in the Meta tags
46 *
47 * Returns the current encoding as flagged in the HTML source
48 */
49const xmlChar *
50htmlGetMetaEncoding(htmlDocPtr doc) {
51    htmlNodePtr cur;
52    const xmlChar *content;
53    const xmlChar *encoding;
54
55    if (doc == NULL)
56	return(NULL);
57    cur = doc->children;
58
59    /*
60     * Search the html
61     */
62    while (cur != NULL) {
63	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
64	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
65		break;
66	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
67		goto found_head;
68	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
69		goto found_meta;
70	}
71	cur = cur->next;
72    }
73    if (cur == NULL)
74	return(NULL);
75    cur = cur->children;
76
77    /*
78     * Search the head
79     */
80    while (cur != NULL) {
81	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
82	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
83		break;
84	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
85		goto found_meta;
86	}
87	cur = cur->next;
88    }
89    if (cur == NULL)
90	return(NULL);
91found_head:
92    cur = cur->children;
93
94    /*
95     * Search the meta elements
96     */
97found_meta:
98    while (cur != NULL) {
99	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
100	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
101		xmlAttrPtr attr = cur->properties;
102		int http;
103		const xmlChar *value;
104
105		content = NULL;
106		http = 0;
107		while (attr != NULL) {
108		    if ((attr->children != NULL) &&
109		        (attr->children->type == XML_TEXT_NODE) &&
110		        (attr->children->next == NULL)) {
111			value = attr->children->content;
112			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
113			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
114			    http = 1;
115			else if ((value != NULL)
116			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
117			    content = value;
118			if ((http != 0) && (content != NULL))
119			    goto found_content;
120		    }
121		    attr = attr->next;
122		}
123	    }
124	}
125	cur = cur->next;
126    }
127    return(NULL);
128
129found_content:
130    encoding = xmlStrstr(content, BAD_CAST"charset=");
131    if (encoding == NULL)
132	encoding = xmlStrstr(content, BAD_CAST"Charset=");
133    if (encoding == NULL)
134	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
135    if (encoding != NULL) {
136	encoding += 8;
137    } else {
138	encoding = xmlStrstr(content, BAD_CAST"charset =");
139	if (encoding == NULL)
140	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
141	if (encoding == NULL)
142	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
143	if (encoding != NULL)
144	    encoding += 9;
145    }
146    if (encoding != NULL) {
147	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
148    }
149    return(encoding);
150}
151
152/**
153 * htmlSetMetaEncoding:
154 * @doc:  the document
155 * @encoding:  the encoding string
156 *
157 * Sets the current encoding in the Meta tags
158 * NOTE: this will not change the document content encoding, just
159 * the META flag associated.
160 *
161 * Returns 0 in case of success and -1 in case of error
162 */
163int
164htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
165    htmlNodePtr cur, meta = NULL, head = NULL;
166    const xmlChar *content = NULL;
167    char newcontent[100];
168
169    newcontent[0] = 0;
170
171    if (doc == NULL)
172	return(-1);
173
174    /* html isn't a real encoding it's just libxml2 way to get entities */
175    if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
176        return(-1);
177
178    if (encoding != NULL) {
179	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
180                (char *)encoding);
181	newcontent[sizeof(newcontent) - 1] = 0;
182    }
183
184    cur = doc->children;
185
186    /*
187     * Search the html
188     */
189    while (cur != NULL) {
190	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
191	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
192		break;
193	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
194		goto found_head;
195	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
196		goto found_meta;
197	}
198	cur = cur->next;
199    }
200    if (cur == NULL)
201	return(-1);
202    cur = cur->children;
203
204    /*
205     * Search the head
206     */
207    while (cur != NULL) {
208	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
209	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
210		break;
211	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
212                head = cur->parent;
213		goto found_meta;
214            }
215	}
216	cur = cur->next;
217    }
218    if (cur == NULL)
219	return(-1);
220found_head:
221    head = cur;
222    if (cur->children == NULL)
223        goto create;
224    cur = cur->children;
225
226found_meta:
227    /*
228     * Search and update all the remaining the meta elements carrying
229     * encoding informations
230     */
231    while (cur != NULL) {
232	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
233	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
234		xmlAttrPtr attr = cur->properties;
235		int http;
236		const xmlChar *value;
237
238		content = NULL;
239		http = 0;
240		while (attr != NULL) {
241		    if ((attr->children != NULL) &&
242		        (attr->children->type == XML_TEXT_NODE) &&
243		        (attr->children->next == NULL)) {
244			value = attr->children->content;
245			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
246			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
247			    http = 1;
248			else
249                        {
250                           if ((value != NULL) &&
251                               (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
252			       content = value;
253                        }
254		        if ((http != 0) && (content != NULL))
255			    break;
256		    }
257		    attr = attr->next;
258		}
259		if ((http != 0) && (content != NULL)) {
260		    meta = cur;
261		    break;
262		}
263
264	    }
265	}
266	cur = cur->next;
267    }
268create:
269    if (meta == NULL) {
270        if ((encoding != NULL) && (head != NULL)) {
271            /*
272             * Create a new Meta element with the right attributes
273             */
274
275            meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
276            if (head->children == NULL)
277                xmlAddChild(head, meta);
278            else
279                xmlAddPrevSibling(head->children, meta);
280            xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
281            xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
282        }
283    } else {
284        /* remove the meta tag if NULL is passed */
285        if (encoding == NULL) {
286            xmlUnlinkNode(meta);
287            xmlFreeNode(meta);
288        }
289        /* change the document only if there is a real encoding change */
290        else if (xmlStrcasestr(content, encoding) == NULL) {
291            xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
292        }
293    }
294
295
296    return(0);
297}
298
299/**
300 * booleanHTMLAttrs:
301 *
302 * These are the HTML attributes which will be output
303 * in minimized form, i.e. <option selected="selected"> will be
304 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
305 *
306 */
307static const char* const htmlBooleanAttrs[] = {
308  "checked", "compact", "declare", "defer", "disabled", "ismap",
309  "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
310  "selected", NULL
311};
312
313
314/**
315 * htmlIsBooleanAttr:
316 * @name:  the name of the attribute to check
317 *
318 * Determine if a given attribute is a boolean attribute.
319 *
320 * returns: false if the attribute is not boolean, true otherwise.
321 */
322int
323htmlIsBooleanAttr(const xmlChar *name)
324{
325    int i = 0;
326
327    while (htmlBooleanAttrs[i] != NULL) {
328        if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
329            return 1;
330        i++;
331    }
332    return 0;
333}
334
335#ifdef LIBXML_OUTPUT_ENABLED
336/*
337 * private routine exported from xmlIO.c
338 */
339xmlOutputBufferPtr
340xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
341/************************************************************************
342 *									*
343 *			Output error handlers				*
344 *									*
345 ************************************************************************/
346/**
347 * htmlSaveErrMemory:
348 * @extra:  extra informations
349 *
350 * Handle an out of memory condition
351 */
352static void
353htmlSaveErrMemory(const char *extra)
354{
355    __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
356}
357
358/**
359 * htmlSaveErr:
360 * @code:  the error number
361 * @node:  the location of the error.
362 * @extra:  extra informations
363 *
364 * Handle an out of memory condition
365 */
366static void
367htmlSaveErr(int code, xmlNodePtr node, const char *extra)
368{
369    const char *msg = NULL;
370
371    switch(code) {
372        case XML_SAVE_NOT_UTF8:
373	    msg = "string is not in UTF-8\n";
374	    break;
375	case XML_SAVE_CHAR_INVALID:
376	    msg = "invalid character value\n";
377	    break;
378	case XML_SAVE_UNKNOWN_ENCODING:
379	    msg = "unknown encoding %s\n";
380	    break;
381	case XML_SAVE_NO_DOCTYPE:
382	    msg = "HTML has no DOCTYPE\n";
383	    break;
384	default:
385	    msg = "unexpected error number\n";
386    }
387    __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
388}
389
390/************************************************************************
391 *									*
392 *		Dumping HTML tree content to a simple buffer		*
393 *									*
394 ************************************************************************/
395
396/**
397 * htmlBufNodeDumpFormat:
398 * @buf:  the xmlBufPtr output
399 * @doc:  the document
400 * @cur:  the current node
401 * @format:  should formatting spaces been added
402 *
403 * Dump an HTML node, recursive behaviour,children are printed too.
404 *
405 * Returns the number of byte written or -1 in case of error
406 */
407static size_t
408htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
409	           int format) {
410    size_t use;
411    int ret;
412    xmlOutputBufferPtr outbuf;
413
414    if (cur == NULL) {
415	return (-1);
416    }
417    if (buf == NULL) {
418	return (-1);
419    }
420    outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
421    if (outbuf == NULL) {
422        htmlSaveErrMemory("allocating HTML output buffer");
423	return (-1);
424    }
425    memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
426    outbuf->buffer = buf;
427    outbuf->encoder = NULL;
428    outbuf->writecallback = NULL;
429    outbuf->closecallback = NULL;
430    outbuf->context = NULL;
431    outbuf->written = 0;
432
433    use = xmlBufUse(buf);
434    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
435    xmlFree(outbuf);
436    ret = xmlBufUse(buf) - use;
437    return (ret);
438}
439
440/**
441 * htmlNodeDump:
442 * @buf:  the HTML buffer output
443 * @doc:  the document
444 * @cur:  the current node
445 *
446 * Dump an HTML node, recursive behaviour,children are printed too,
447 * and formatting returns are added.
448 *
449 * Returns the number of byte written or -1 in case of error
450 */
451int
452htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
453    xmlBufPtr buffer;
454    size_t ret;
455
456    if ((buf == NULL) || (cur == NULL))
457        return(-1);
458
459    xmlInitParser();
460    buffer = xmlBufFromBuffer(buf);
461    if (buffer == NULL)
462        return(-1);
463
464    ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
465
466    xmlBufBackToBuffer(buffer);
467
468    if (ret > INT_MAX)
469        return(-1);
470    return((int) ret);
471}
472
473/**
474 * htmlNodeDumpFileFormat:
475 * @out:  the FILE pointer
476 * @doc:  the document
477 * @cur:  the current node
478 * @encoding: the document encoding
479 * @format:  should formatting spaces been added
480 *
481 * Dump an HTML node, recursive behaviour,children are printed too.
482 *
483 * TODO: if encoding == NULL try to save in the doc encoding
484 *
485 * returns: the number of byte written or -1 in case of failure.
486 */
487int
488htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
489	               xmlNodePtr cur, const char *encoding, int format) {
490    xmlOutputBufferPtr buf;
491    xmlCharEncodingHandlerPtr handler = NULL;
492    int ret;
493
494    xmlInitParser();
495
496    if (encoding != NULL) {
497	xmlCharEncoding enc;
498
499	enc = xmlParseCharEncoding(encoding);
500	if (enc != XML_CHAR_ENCODING_UTF8) {
501	    handler = xmlFindCharEncodingHandler(encoding);
502	    if (handler == NULL)
503		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
504	}
505    }
506
507    /*
508     * Fallback to HTML or ASCII when the encoding is unspecified
509     */
510    if (handler == NULL)
511	handler = xmlFindCharEncodingHandler("HTML");
512    if (handler == NULL)
513	handler = xmlFindCharEncodingHandler("ascii");
514
515    /*
516     * save the content to a temp buffer.
517     */
518    buf = xmlOutputBufferCreateFile(out, handler);
519    if (buf == NULL) return(0);
520
521    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
522
523    ret = xmlOutputBufferClose(buf);
524    return(ret);
525}
526
527/**
528 * htmlNodeDumpFile:
529 * @out:  the FILE pointer
530 * @doc:  the document
531 * @cur:  the current node
532 *
533 * Dump an HTML node, recursive behaviour,children are printed too,
534 * and formatting returns are added.
535 */
536void
537htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
538    htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
539}
540
541/**
542 * htmlDocDumpMemoryFormat:
543 * @cur:  the document
544 * @mem:  OUT: the memory pointer
545 * @size:  OUT: the memory length
546 * @format:  should formatting spaces been added
547 *
548 * Dump an HTML document in memory and return the xmlChar * and it's size.
549 * It's up to the caller to free the memory.
550 */
551void
552htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
553    xmlOutputBufferPtr buf;
554    xmlCharEncodingHandlerPtr handler = NULL;
555    const char *encoding;
556
557    xmlInitParser();
558
559    if ((mem == NULL) || (size == NULL))
560        return;
561    if (cur == NULL) {
562	*mem = NULL;
563	*size = 0;
564	return;
565    }
566
567    encoding = (const char *) htmlGetMetaEncoding(cur);
568
569    if (encoding != NULL) {
570	xmlCharEncoding enc;
571
572	enc = xmlParseCharEncoding(encoding);
573	if (enc != cur->charset) {
574	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
575		/*
576		 * Not supported yet
577		 */
578		*mem = NULL;
579		*size = 0;
580		return;
581	    }
582
583	    handler = xmlFindCharEncodingHandler(encoding);
584	    if (handler == NULL)
585                htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
586
587	} else {
588	    handler = xmlFindCharEncodingHandler(encoding);
589	}
590    }
591
592    /*
593     * Fallback to HTML or ASCII when the encoding is unspecified
594     */
595    if (handler == NULL)
596	handler = xmlFindCharEncodingHandler("HTML");
597    if (handler == NULL)
598	handler = xmlFindCharEncodingHandler("ascii");
599
600    buf = xmlAllocOutputBufferInternal(handler);
601    if (buf == NULL) {
602	*mem = NULL;
603	*size = 0;
604	return;
605    }
606
607    htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
608
609    xmlOutputBufferFlush(buf);
610    if (buf->conv != NULL) {
611	*size = xmlBufUse(buf->conv);
612	*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
613    } else {
614	*size = xmlBufUse(buf->buffer);
615	*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
616    }
617    (void)xmlOutputBufferClose(buf);
618}
619
620/**
621 * htmlDocDumpMemory:
622 * @cur:  the document
623 * @mem:  OUT: the memory pointer
624 * @size:  OUT: the memory length
625 *
626 * Dump an HTML document in memory and return the xmlChar * and it's size.
627 * It's up to the caller to free the memory.
628 */
629void
630htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
631	htmlDocDumpMemoryFormat(cur, mem, size, 1);
632}
633
634
635/************************************************************************
636 *									*
637 *		Dumping HTML tree content to an I/O output buffer	*
638 *									*
639 ************************************************************************/
640
641void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
642
643/**
644 * htmlDtdDumpOutput:
645 * @buf:  the HTML buffer output
646 * @doc:  the document
647 * @encoding:  the encoding string
648 *
649 * TODO: check whether encoding is needed
650 *
651 * Dump the HTML document DTD, if any.
652 */
653static void
654htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
655	          const char *encoding ATTRIBUTE_UNUSED) {
656    xmlDtdPtr cur = doc->intSubset;
657
658    if (cur == NULL) {
659	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
660	return;
661    }
662    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
663    xmlOutputBufferWriteString(buf, (const char *)cur->name);
664    if (cur->ExternalID != NULL) {
665	xmlOutputBufferWriteString(buf, " PUBLIC ");
666	xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
667	if (cur->SystemID != NULL) {
668	    xmlOutputBufferWriteString(buf, " ");
669	    xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
670	}
671    }  else if (cur->SystemID != NULL) {
672	xmlOutputBufferWriteString(buf, " SYSTEM ");
673	xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
674    }
675    xmlOutputBufferWriteString(buf, ">\n");
676}
677
678/**
679 * htmlAttrDumpOutput:
680 * @buf:  the HTML buffer output
681 * @doc:  the document
682 * @cur:  the attribute pointer
683 * @encoding:  the encoding string
684 *
685 * Dump an HTML attribute
686 */
687static void
688htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
689	           const char *encoding ATTRIBUTE_UNUSED) {
690    xmlChar *value;
691
692    /*
693     * The html output method should not escape a & character
694     * occurring in an attribute value immediately followed by
695     * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
696     * This is implemented in xmlEncodeEntitiesReentrant
697     */
698
699    if (cur == NULL) {
700	return;
701    }
702    xmlOutputBufferWriteString(buf, " ");
703    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
704        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
705	xmlOutputBufferWriteString(buf, ":");
706    }
707    xmlOutputBufferWriteString(buf, (const char *)cur->name);
708    if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
709	value = xmlNodeListGetString(doc, cur->children, 0);
710	if (value) {
711	    xmlOutputBufferWriteString(buf, "=");
712	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
713		(cur->parent->ns == NULL) &&
714		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
715	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
716		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
717		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
718		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
719		xmlChar *escaped;
720		xmlChar *tmp = value;
721
722		while (IS_BLANK_CH(*tmp)) tmp++;
723
724		/*
725		 * the < and > have already been escaped at the entity level
726		 * And doing so here breaks server side includes
727		 */
728		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
729		if (escaped != NULL) {
730		    xmlBufWriteQuotedString(buf->buffer, escaped);
731		    xmlFree(escaped);
732		} else {
733		    xmlBufWriteQuotedString(buf->buffer, value);
734		}
735	    } else {
736		xmlBufWriteQuotedString(buf->buffer, value);
737	    }
738	    xmlFree(value);
739	} else  {
740	    xmlOutputBufferWriteString(buf, "=\"\"");
741	}
742    }
743}
744
745/**
746 * htmlAttrListDumpOutput:
747 * @buf:  the HTML buffer output
748 * @doc:  the document
749 * @cur:  the first attribute pointer
750 * @encoding:  the encoding string
751 *
752 * Dump a list of HTML attributes
753 */
754static void
755htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
756    if (cur == NULL) {
757	return;
758    }
759    while (cur != NULL) {
760        htmlAttrDumpOutput(buf, doc, cur, encoding);
761	cur = cur->next;
762    }
763}
764
765
766
767/**
768 * htmlNodeListDumpOutput:
769 * @buf:  the HTML buffer output
770 * @doc:  the document
771 * @cur:  the first node
772 * @encoding:  the encoding string
773 * @format:  should formatting spaces been added
774 *
775 * Dump an HTML node list, recursive behaviour,children are printed too.
776 */
777static void
778htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
779	               xmlNodePtr cur, const char *encoding, int format) {
780    if (cur == NULL) {
781	return;
782    }
783    while (cur != NULL) {
784        htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
785	cur = cur->next;
786    }
787}
788
789/**
790 * htmlNodeDumpFormatOutput:
791 * @buf:  the HTML buffer output
792 * @doc:  the document
793 * @cur:  the current node
794 * @encoding:  the encoding string
795 * @format:  should formatting spaces been added
796 *
797 * Dump an HTML node, recursive behaviour,children are printed too.
798 */
799void
800htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
801	                 xmlNodePtr cur, const char *encoding, int format) {
802    const htmlElemDesc * info;
803
804    xmlInitParser();
805
806    if ((cur == NULL) || (buf == NULL)) {
807	return;
808    }
809    /*
810     * Special cases.
811     */
812    if (cur->type == XML_DTD_NODE)
813	return;
814    if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
815        (cur->type == XML_DOCUMENT_NODE)){
816	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
817	return;
818    }
819    if (cur->type == XML_ATTRIBUTE_NODE) {
820        htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
821	return;
822    }
823    if (cur->type == HTML_TEXT_NODE) {
824	if (cur->content != NULL) {
825	    if (((cur->name == (const xmlChar *)xmlStringText) ||
826		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
827		((cur->parent == NULL) ||
828		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
829		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
830		xmlChar *buffer;
831
832		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
833		if (buffer != NULL) {
834		    xmlOutputBufferWriteString(buf, (const char *)buffer);
835		    xmlFree(buffer);
836		}
837	    } else {
838		xmlOutputBufferWriteString(buf, (const char *)cur->content);
839	    }
840	}
841	return;
842    }
843    if (cur->type == HTML_COMMENT_NODE) {
844	if (cur->content != NULL) {
845	    xmlOutputBufferWriteString(buf, "<!--");
846	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
847	    xmlOutputBufferWriteString(buf, "-->");
848	}
849	return;
850    }
851    if (cur->type == HTML_PI_NODE) {
852	if (cur->name == NULL)
853	    return;
854	xmlOutputBufferWriteString(buf, "<?");
855	xmlOutputBufferWriteString(buf, (const char *)cur->name);
856	if (cur->content != NULL) {
857	    xmlOutputBufferWriteString(buf, " ");
858	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
859	}
860	xmlOutputBufferWriteString(buf, ">");
861	return;
862    }
863    if (cur->type == HTML_ENTITY_REF_NODE) {
864        xmlOutputBufferWriteString(buf, "&");
865	xmlOutputBufferWriteString(buf, (const char *)cur->name);
866        xmlOutputBufferWriteString(buf, ";");
867	return;
868    }
869    if (cur->type == HTML_PRESERVE_NODE) {
870	if (cur->content != NULL) {
871	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
872	}
873	return;
874    }
875
876    /*
877     * Get specific HTML info for that node.
878     */
879    if (cur->ns == NULL)
880	info = htmlTagLookup(cur->name);
881    else
882	info = NULL;
883
884    xmlOutputBufferWriteString(buf, "<");
885    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
886        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
887	xmlOutputBufferWriteString(buf, ":");
888    }
889    xmlOutputBufferWriteString(buf, (const char *)cur->name);
890    if (cur->nsDef)
891	xmlNsListDumpOutput(buf, cur->nsDef);
892    if (cur->properties != NULL)
893        htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
894
895    if ((info != NULL) && (info->empty)) {
896        xmlOutputBufferWriteString(buf, ">");
897	if ((format) && (!info->isinline) && (cur->next != NULL)) {
898	    if ((cur->next->type != HTML_TEXT_NODE) &&
899		(cur->next->type != HTML_ENTITY_REF_NODE) &&
900		(cur->parent != NULL) &&
901		(cur->parent->name != NULL) &&
902		(cur->parent->name[0] != 'p')) /* p, pre, param */
903		xmlOutputBufferWriteString(buf, "\n");
904	}
905	return;
906    }
907    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
908	(cur->children == NULL)) {
909        if ((info != NULL) && (info->saveEndTag != 0) &&
910	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
911	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
912	    xmlOutputBufferWriteString(buf, ">");
913	} else {
914	    xmlOutputBufferWriteString(buf, "></");
915            if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
916                xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
917                xmlOutputBufferWriteString(buf, ":");
918            }
919	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
920	    xmlOutputBufferWriteString(buf, ">");
921	}
922	if ((format) && (cur->next != NULL) &&
923            (info != NULL) && (!info->isinline)) {
924	    if ((cur->next->type != HTML_TEXT_NODE) &&
925		(cur->next->type != HTML_ENTITY_REF_NODE) &&
926		(cur->parent != NULL) &&
927		(cur->parent->name != NULL) &&
928		(cur->parent->name[0] != 'p')) /* p, pre, param */
929		xmlOutputBufferWriteString(buf, "\n");
930	}
931	return;
932    }
933    xmlOutputBufferWriteString(buf, ">");
934    if ((cur->type != XML_ELEMENT_NODE) &&
935	(cur->content != NULL)) {
936	    /*
937	     * Uses the OutputBuffer property to automatically convert
938	     * invalids to charrefs
939	     */
940
941            xmlOutputBufferWriteString(buf, (const char *) cur->content);
942    }
943    if (cur->children != NULL) {
944        if ((format) && (info != NULL) && (!info->isinline) &&
945	    (cur->children->type != HTML_TEXT_NODE) &&
946	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
947	    (cur->children != cur->last) &&
948	    (cur->name != NULL) &&
949	    (cur->name[0] != 'p')) /* p, pre, param */
950	    xmlOutputBufferWriteString(buf, "\n");
951	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
952        if ((format) && (info != NULL) && (!info->isinline) &&
953	    (cur->last->type != HTML_TEXT_NODE) &&
954	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
955	    (cur->children != cur->last) &&
956	    (cur->name != NULL) &&
957	    (cur->name[0] != 'p')) /* p, pre, param */
958	    xmlOutputBufferWriteString(buf, "\n");
959    }
960    xmlOutputBufferWriteString(buf, "</");
961    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
962        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
963	xmlOutputBufferWriteString(buf, ":");
964    }
965    xmlOutputBufferWriteString(buf, (const char *)cur->name);
966    xmlOutputBufferWriteString(buf, ">");
967    if ((format) && (info != NULL) && (!info->isinline) &&
968	(cur->next != NULL)) {
969        if ((cur->next->type != HTML_TEXT_NODE) &&
970	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
971	    (cur->parent != NULL) &&
972	    (cur->parent->name != NULL) &&
973	    (cur->parent->name[0] != 'p')) /* p, pre, param */
974	    xmlOutputBufferWriteString(buf, "\n");
975    }
976}
977
978/**
979 * htmlNodeDumpOutput:
980 * @buf:  the HTML buffer output
981 * @doc:  the document
982 * @cur:  the current node
983 * @encoding:  the encoding string
984 *
985 * Dump an HTML node, recursive behaviour,children are printed too,
986 * and formatting returns/spaces are added.
987 */
988void
989htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
990	           xmlNodePtr cur, const char *encoding) {
991    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
992}
993
994/**
995 * htmlDocContentDumpFormatOutput:
996 * @buf:  the HTML buffer output
997 * @cur:  the document
998 * @encoding:  the encoding string
999 * @format:  should formatting spaces been added
1000 *
1001 * Dump an HTML document.
1002 */
1003void
1004htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1005	                       const char *encoding, int format) {
1006    int type;
1007
1008    xmlInitParser();
1009
1010    if ((buf == NULL) || (cur == NULL))
1011        return;
1012
1013    /*
1014     * force to output the stuff as HTML, especially for entities
1015     */
1016    type = cur->type;
1017    cur->type = XML_HTML_DOCUMENT_NODE;
1018    if (cur->intSubset != NULL) {
1019        htmlDtdDumpOutput(buf, cur, NULL);
1020    }
1021    if (cur->children != NULL) {
1022        htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1023    }
1024    xmlOutputBufferWriteString(buf, "\n");
1025    cur->type = (xmlElementType) type;
1026}
1027
1028/**
1029 * htmlDocContentDumpOutput:
1030 * @buf:  the HTML buffer output
1031 * @cur:  the document
1032 * @encoding:  the encoding string
1033 *
1034 * Dump an HTML document. Formating return/spaces are added.
1035 */
1036void
1037htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1038	                 const char *encoding) {
1039    htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1040}
1041
1042/************************************************************************
1043 *									*
1044 *		Saving functions front-ends				*
1045 *									*
1046 ************************************************************************/
1047
1048/**
1049 * htmlDocDump:
1050 * @f:  the FILE*
1051 * @cur:  the document
1052 *
1053 * Dump an HTML document to an open FILE.
1054 *
1055 * returns: the number of byte written or -1 in case of failure.
1056 */
1057int
1058htmlDocDump(FILE *f, xmlDocPtr cur) {
1059    xmlOutputBufferPtr buf;
1060    xmlCharEncodingHandlerPtr handler = NULL;
1061    const char *encoding;
1062    int ret;
1063
1064    xmlInitParser();
1065
1066    if ((cur == NULL) || (f == NULL)) {
1067	return(-1);
1068    }
1069
1070    encoding = (const char *) htmlGetMetaEncoding(cur);
1071
1072    if (encoding != NULL) {
1073	xmlCharEncoding enc;
1074
1075	enc = xmlParseCharEncoding(encoding);
1076	if (enc != cur->charset) {
1077	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1078		/*
1079		 * Not supported yet
1080		 */
1081		return(-1);
1082	    }
1083
1084	    handler = xmlFindCharEncodingHandler(encoding);
1085	    if (handler == NULL)
1086		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1087	} else {
1088	    handler = xmlFindCharEncodingHandler(encoding);
1089	}
1090    }
1091
1092    /*
1093     * Fallback to HTML or ASCII when the encoding is unspecified
1094     */
1095    if (handler == NULL)
1096	handler = xmlFindCharEncodingHandler("HTML");
1097    if (handler == NULL)
1098	handler = xmlFindCharEncodingHandler("ascii");
1099
1100    buf = xmlOutputBufferCreateFile(f, handler);
1101    if (buf == NULL) return(-1);
1102    htmlDocContentDumpOutput(buf, cur, NULL);
1103
1104    ret = xmlOutputBufferClose(buf);
1105    return(ret);
1106}
1107
1108/**
1109 * htmlSaveFile:
1110 * @filename:  the filename (or URL)
1111 * @cur:  the document
1112 *
1113 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1114 * used.
1115 * returns: the number of byte written or -1 in case of failure.
1116 */
1117int
1118htmlSaveFile(const char *filename, xmlDocPtr cur) {
1119    xmlOutputBufferPtr buf;
1120    xmlCharEncodingHandlerPtr handler = NULL;
1121    const char *encoding;
1122    int ret;
1123
1124    if ((cur == NULL) || (filename == NULL))
1125        return(-1);
1126
1127    xmlInitParser();
1128
1129    encoding = (const char *) htmlGetMetaEncoding(cur);
1130
1131    if (encoding != NULL) {
1132	xmlCharEncoding enc;
1133
1134	enc = xmlParseCharEncoding(encoding);
1135	if (enc != cur->charset) {
1136	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1137		/*
1138		 * Not supported yet
1139		 */
1140		return(-1);
1141	    }
1142
1143	    handler = xmlFindCharEncodingHandler(encoding);
1144	    if (handler == NULL)
1145		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1146	}
1147    }
1148
1149    /*
1150     * Fallback to HTML or ASCII when the encoding is unspecified
1151     */
1152    if (handler == NULL)
1153	handler = xmlFindCharEncodingHandler("HTML");
1154    if (handler == NULL)
1155	handler = xmlFindCharEncodingHandler("ascii");
1156
1157    /*
1158     * save the content to a temp buffer.
1159     */
1160    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1161    if (buf == NULL) return(0);
1162
1163    htmlDocContentDumpOutput(buf, cur, NULL);
1164
1165    ret = xmlOutputBufferClose(buf);
1166    return(ret);
1167}
1168
1169/**
1170 * htmlSaveFileFormat:
1171 * @filename:  the filename
1172 * @cur:  the document
1173 * @format:  should formatting spaces been added
1174 * @encoding: the document encoding
1175 *
1176 * Dump an HTML document to a file using a given encoding.
1177 *
1178 * returns: the number of byte written or -1 in case of failure.
1179 */
1180int
1181htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1182	           const char *encoding, int format) {
1183    xmlOutputBufferPtr buf;
1184    xmlCharEncodingHandlerPtr handler = NULL;
1185    int ret;
1186
1187    if ((cur == NULL) || (filename == NULL))
1188        return(-1);
1189
1190    xmlInitParser();
1191
1192    if (encoding != NULL) {
1193	xmlCharEncoding enc;
1194
1195	enc = xmlParseCharEncoding(encoding);
1196	if (enc != cur->charset) {
1197	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1198		/*
1199		 * Not supported yet
1200		 */
1201		return(-1);
1202	    }
1203
1204	    handler = xmlFindCharEncodingHandler(encoding);
1205	    if (handler == NULL)
1206		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1207	}
1208        htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1209    } else {
1210	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1211    }
1212
1213    /*
1214     * Fallback to HTML or ASCII when the encoding is unspecified
1215     */
1216    if (handler == NULL)
1217	handler = xmlFindCharEncodingHandler("HTML");
1218    if (handler == NULL)
1219	handler = xmlFindCharEncodingHandler("ascii");
1220
1221    /*
1222     * save the content to a temp buffer.
1223     */
1224    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1225    if (buf == NULL) return(0);
1226
1227    htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1228
1229    ret = xmlOutputBufferClose(buf);
1230    return(ret);
1231}
1232
1233/**
1234 * htmlSaveFileEnc:
1235 * @filename:  the filename
1236 * @cur:  the document
1237 * @encoding: the document encoding
1238 *
1239 * Dump an HTML document to a file using a given encoding
1240 * and formatting returns/spaces are added.
1241 *
1242 * returns: the number of byte written or -1 in case of failure.
1243 */
1244int
1245htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1246    return(htmlSaveFileFormat(filename, cur, encoding, 1));
1247}
1248
1249#endif /* LIBXML_OUTPUT_ENABLED */
1250
1251#define bottom_HTMLtree
1252#include "elfgcchack.h"
1253#endif /* LIBXML_HTML_ENABLED */
1254