1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 David Chisnall
5 * All rights reserved.
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include "input_buffer.hh"
34#include <ctype.h>
35#include <errno.h>
36#include <stdint.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <functional>
41#ifndef NDEBUG
42#include <iostream>
43#endif
44#include <limits>
45
46
47#include <sys/stat.h>
48#include <sys/mman.h>
49#include <assert.h>
50#include <fcntl.h>
51#include <unistd.h>
52
53#ifndef MAP_PREFAULT_READ
54#define MAP_PREFAULT_READ 0
55#endif
56
57using std::string;
58
59namespace
60{
61/**
62 * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
63 * When this object is destroyed, the memory is unmapped.
64 */
65struct mmap_input_buffer : public dtc::input_buffer
66{
67	string fn;
68	const string &filename() const override
69	{
70		return fn;
71	}
72	/**
73	 * Constructs a new buffer from the file passed in as a file
74	 * descriptor.
75	 */
76	mmap_input_buffer(int fd, string &&filename);
77	/**
78	 * Unmaps the buffer, if one exists.
79	 */
80	virtual ~mmap_input_buffer();
81};
82/**
83 * Input buffer read from standard input.  This is used for reading device tree
84 * blobs and source from standard input.  It reads the entire input into
85 * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
86 * are very rarely more than 10KB though, so this is probably not a problem.
87 */
88struct stream_input_buffer : public dtc::input_buffer
89{
90	const string &filename() const override
91	{
92		static string n = "<standard input>";
93		return n;
94	}
95	/**
96	 * The buffer that will store the data read from the standard input.
97	 */
98	std::vector<char> b;
99	/**
100	 * Constructs a new buffer from the standard input.
101	 */
102	stream_input_buffer();
103};
104
105mmap_input_buffer::mmap_input_buffer(int fd, string &&filename)
106	: input_buffer(0, 0), fn(filename)
107{
108	struct stat sb;
109	if (fstat(fd, &sb))
110	{
111		perror("Failed to stat file");
112	}
113	size = sb.st_size;
114	buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
115			MAP_PREFAULT_READ, fd, 0);
116	if (buffer == MAP_FAILED)
117	{
118		perror("Failed to mmap file");
119		exit(EXIT_FAILURE);
120	}
121}
122
123mmap_input_buffer::~mmap_input_buffer()
124{
125	if (buffer != 0)
126	{
127		munmap(const_cast<char*>(buffer), size);
128	}
129}
130
131stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
132{
133	int c;
134	while ((c = fgetc(stdin)) != EOF)
135	{
136		b.push_back(c);
137	}
138	buffer = b.data();
139	size = b.size();
140}
141
142} // Anonymous namespace
143
144
145namespace dtc
146{
147
148void
149input_buffer::skip_to(char c)
150{
151	while ((cursor < size) && (buffer[cursor] != c))
152	{
153		cursor++;
154	}
155}
156
157void
158text_input_buffer::skip_to(char c)
159{
160	while (!finished() && (*(*this) != c))
161	{
162		++(*this);
163	}
164}
165
166void
167text_input_buffer::skip_spaces()
168{
169	if (finished()) { return; }
170	char c = *(*this);
171	bool last_nl = false;
172	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
173	       || (c == '\v') || (c == '\r'))
174	{
175		last_nl = ((c == '\n') || (c == '\r'));
176		++(*this);
177		if (finished())
178		{
179			c = '\0';
180		}
181		else
182		{
183			c = *(*this);
184		}
185	}
186	// Skip C preprocessor leftovers
187	if ((c == '#') && ((cursor == 0) || last_nl))
188	{
189		skip_to('\n');
190		skip_spaces();
191	}
192	if (consume("/include/"))
193	{
194		handle_include();
195		skip_spaces();
196	}
197}
198
199void
200text_input_buffer::handle_include()
201{
202	bool reallyInclude = true;
203	if (consume("if "))
204	{
205		next_token();
206		string name = parse_property_name();
207		if (defines.count(name) == 0)
208		{
209			reallyInclude = false;
210		}
211		consume('/');
212	}
213	next_token();
214	if (!consume('"'))
215	{
216		parse_error("Expected quoted filename");
217		return;
218	}
219	auto loc = location();
220	string file = parse_to('"');
221	consume('"');
222	if (!reallyInclude)
223	{
224		return;
225	}
226	string include_file = dir + '/' + file;
227	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
228	if (include_buffer == 0)
229	{
230		for (auto i : include_paths)
231		{
232			include_file = i + '/' + file;
233			include_buffer = input_buffer::buffer_for_file(include_file, false);
234			if (include_buffer != 0)
235			{
236				break;
237			}
238		}
239	}
240	if (depfile)
241	{
242		putc(' ', depfile);
243		fputs(include_file.c_str(), depfile);
244	}
245	if (!include_buffer)
246	{
247		loc.report_error("Unable to locate input file");
248		return;
249	}
250	input_stack.push(std::move(include_buffer));
251}
252
253bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b)
254{
255	bool try_include_paths = true;
256	string include_file;
257	if (filename[0] == '/')
258	{
259		include_file = filename;
260		// Don't try include paths if we're given an absolute path.
261		// Failing is better so that we don't accidentally do the wrong thing,
262		// but make it seem like everything is alright.
263		try_include_paths = false;
264	}
265	else
266	{
267		include_file = dir + '/' + filename;
268	}
269	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
270	if (include_buffer == 0 && try_include_paths)
271	{
272		for (auto i : include_paths)
273		{
274			include_file = i + '/' + filename;
275			include_buffer = input_buffer::buffer_for_file(include_file, false);
276			if (include_buffer != 0)
277			{
278				break;
279			}
280		}
281	}
282	if (!include_buffer)
283	{
284		return false;
285	}
286	if (depfile)
287	{
288		putc(' ', depfile);
289		fputs(include_file.c_str(), depfile);
290	}
291	b.insert(b.begin(), include_buffer->begin(), include_buffer->end());
292	return true;
293}
294
295input_buffer
296input_buffer::buffer_from_offset(int offset, int s)
297{
298	if (offset < 0)
299	{
300		return input_buffer();
301	}
302	if (s == 0)
303	{
304		s = size - offset;
305	}
306	if (offset > size)
307	{
308		return input_buffer();
309	}
310	if (s > (size-offset))
311	{
312		return input_buffer();
313	}
314	return input_buffer(&buffer[offset], s);
315}
316
317bool
318input_buffer::consume(const char *str)
319{
320	int len = strlen(str);
321	if (len > size - cursor)
322	{
323		return false;
324	}
325	else
326	{
327		for (int i=0 ; i<len ; ++i)
328		{
329			if (str[i] != (*this)[i])
330			{
331				return false;
332			}
333		}
334		cursor += len;
335		return true;
336	}
337	return false;
338}
339
340bool
341input_buffer::consume_integer(unsigned long long &outInt)
342{
343	// The first character must be a digit.  Hex and octal strings
344	// are prefixed by 0 and 0x, respectively.
345	if (!isdigit((*this)[0]))
346	{
347		return false;
348	}
349	char *end= const_cast<char*>(&buffer[size]);
350	errno = 0;
351	outInt = strtoull(&buffer[cursor], &end, 0);
352	if (end == &buffer[cursor] ||
353	    (outInt == std::numeric_limits<unsigned long long>::max() &&
354	     errno == ERANGE))
355	{
356		return false;
357	}
358	cursor = end - buffer;
359	return true;
360}
361
362namespace {
363
364/**
365 * Convenience typedef for the type that we use for all values.
366 */
367typedef unsigned long long valty;
368
369/**
370 * Expression tree currently being parsed.
371 */
372struct expression
373{
374	typedef text_input_buffer::source_location source_location;
375	/**
376	 * The type that is returned when computing the result.  The boolean value
377	 * indicates whether this is a valid expression.
378	 *
379	 * FIXME: Once we can use C++17, this should be `std::optional`.
380	 */
381	typedef std::pair<valty, bool> result;
382	/**
383	 * Evaluate this node, taking into account operator precedence.
384	 */
385	virtual result operator()() = 0;
386	/**
387	 * Returns the precedence of this node.  Lower values indicate higher
388	 * precedence.
389	 */
390	virtual int precedence() = 0;
391	/**
392	 * Constructs an expression, storing the location where it was created.
393	 */
394	expression(source_location l) : loc(l) {}
395	virtual ~expression() {}
396#ifndef NDEBUG
397	/**
398	 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
399	 * `true`.
400	 */
401	void dump(bool nl=false)
402	{
403		void *ptr = this;
404		if (ptr == nullptr)
405		{
406			std::cerr << "{nullptr}\n";
407			return;
408		}
409		dump_impl();
410		if (nl)
411		{
412			std::cerr << '\n';
413		}
414	}
415	private:
416	/**
417	 * Method that sublcasses override to implement the behaviour of `dump()`.
418	 */
419	virtual void dump_impl() = 0;
420#endif
421	protected:
422	source_location loc;
423};
424
425/**
426 * Expression wrapping a single integer.  Leaf nodes in the expression tree.
427 */
428class terminal_expr : public expression
429{
430	/**
431	 * The value that this wraps.
432	 */
433	valty val;
434	/**
435	 * Evaluate.  Trivially returns the value that this class wraps.
436	 */
437	result operator()() override
438	{
439		return {val, true};
440	}
441	int precedence() override
442	{
443		return 0;
444	}
445	public:
446	/**
447	 * Constructor.
448	 */
449	terminal_expr(source_location l, valty v) : expression(l), val(v) {}
450#ifndef NDEBUG
451	void dump_impl() override { std::cerr << val; }
452#endif
453};
454
455/**
456 * Parenthetical expression.  Exists to make the contents opaque.
457 */
458struct paren_expression : public expression
459{
460	/**
461	 * The expression within the parentheses.
462	 */
463	expression_ptr subexpr;
464	/**
465	 * Constructor.  Takes the child expression as the only argument.
466	 */
467	paren_expression(source_location l, expression_ptr p) : expression(l),
468	subexpr(std::move(p)) {}
469	int precedence() override
470	{
471		return 0;
472	}
473	/**
474	 * Evaluate - just forwards to the underlying expression.
475	 */
476	result operator()() override
477	{
478		return (*subexpr)();
479	}
480#ifndef NDEBUG
481	void dump_impl() override
482	{
483		std::cerr << " (";
484		subexpr->dump();
485		std::cerr << ") ";
486	}
487#endif
488};
489
490/**
491 * Template class for unary operators.  The `OpChar` template parameter is
492 * solely for debugging and makes it easy to print the expression.  The `Op`
493 * template parameter is a function object that implements the operator that
494 * this class provides.  Most of these are provided by the `<functional>`
495 * header.
496 */
497template<char OpChar, class Op>
498class unary_operator : public expression
499{
500	/**
501	 * The subexpression for this unary operator.
502	 */
503	expression_ptr subexpr;
504	result operator()() override
505	{
506		Op op;
507		result s = (*subexpr)();
508		if (!s.second)
509		{
510			return s;
511		}
512		return {op(s.first), true};
513	}
514	/**
515	 * All unary operators have the same precedence.  They are all evaluated
516	 * before binary expressions, but after parentheses.
517	 */
518	int precedence() override
519	{
520		return 3;
521	}
522	public:
523	unary_operator(source_location l, expression_ptr p) :
524		expression(l), subexpr(std::move(p)) {}
525#ifndef NDEBUG
526	void dump_impl() override
527	{
528		std::cerr << OpChar;
529		subexpr->dump();
530	}
531#endif
532};
533
534/**
535 * Abstract base class for binary operators.  Allows the tree to be modified
536 * without knowing what the operations actually are.
537 */
538struct binary_operator_base : public expression
539{
540	using expression::expression;
541	/**
542	 * The left side of the expression.
543	 */
544	expression_ptr lhs;
545	/**
546	 * The right side of the expression.
547	 */
548	expression_ptr rhs;
549	/**
550	 * Insert a node somewhere down the path of left children, until it would
551	 * be preempting something that should execute first.
552	 */
553	void insert_left(binary_operator_base *new_left)
554	{
555		if (lhs->precedence() < new_left->precedence())
556		{
557			new_left->rhs = std::move(lhs);
558			lhs.reset(new_left);
559		}
560		else
561		{
562			static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
563		}
564	}
565};
566
567/**
568 * Template class for binary operators.  The precedence and the operation are
569 * provided as template parameters.
570 */
571template<int Precedence, class Op>
572struct binary_operator : public binary_operator_base
573{
574	result operator()() override
575	{
576		Op op;
577		result l = (*lhs)();
578		result r = (*rhs)();
579		if (!(l.second && r.second))
580		{
581			return {0, false};
582		}
583		return {op(l.first, r.first), true};
584	}
585	int precedence() override
586	{
587		return Precedence;
588	}
589#ifdef NDEBUG
590	/**
591	 * Constructor.  Takes the name of the operator as an argument, for
592	 * debugging.  Only stores it in debug mode.
593	 */
594	binary_operator(source_location l, const char *) :
595		binary_operator_base(l) {}
596#else
597	const char *opName;
598	binary_operator(source_location l, const char *o) :
599		binary_operator_base(l), opName(o) {}
600	void dump_impl() override
601	{
602		lhs->dump();
603		std::cerr << opName;
604		rhs->dump();
605	}
606#endif
607};
608
609/**
610 * Ternary conditional operators (`cond ? true : false`) are a special case -
611 * there are no other ternary operators.
612 */
613class ternary_conditional_operator : public expression
614{
615	/**
616	 * The condition for the clause.
617	 */
618	expression_ptr cond;
619	/**
620	 * The expression that this evaluates to if the condition is true.
621	 */
622	expression_ptr lhs;
623	/**
624	 * The expression that this evaluates to if the condition is false.
625	 */
626	expression_ptr rhs;
627	result operator()() override
628	{
629		result c = (*cond)();
630		result l = (*lhs)();
631		result r = (*rhs)();
632		if (!(l.second && r.second && c.second))
633		{
634			return {0, false};
635		}
636		return c.first ? l : r;
637	}
638	int precedence() override
639	{
640		// The actual precedence of a ternary conditional operator is 15, but
641		// its associativity is the opposite way around to the other operators,
642		// so we fudge it slightly.
643		return 3;
644	}
645#ifndef NDEBUG
646	void dump_impl() override
647	{
648		cond->dump();
649		std::cerr << " ? ";
650		lhs->dump();
651		std::cerr << " : ";
652		rhs->dump();
653	}
654#endif
655	public:
656	ternary_conditional_operator(source_location sl,
657	                             expression_ptr c,
658	                             expression_ptr l,
659	                             expression_ptr r) :
660		expression(sl), cond(std::move(c)), lhs(std::move(l)),
661		rhs(std::move(r)) {}
662};
663
664template<typename T>
665struct lshift
666{
667	constexpr T operator()(const T &lhs, const T &rhs) const
668	{
669		return lhs << rhs;
670	}
671};
672template<typename T>
673struct rshift
674{
675	constexpr T operator()(const T &lhs, const T &rhs) const
676	{
677		return lhs >> rhs;
678	}
679};
680template<typename T>
681struct unary_plus
682{
683	constexpr T operator()(const T &val) const
684	{
685		return +val;
686	}
687};
688// TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
689template<typename T>
690struct bit_not
691{
692	constexpr T operator()(const T &val) const
693	{
694		return ~val;
695	}
696};
697
698template<typename T>
699struct divmod : public binary_operator<5, T>
700{
701	using binary_operator<5, T>::binary_operator;
702	using typename binary_operator_base::result;
703	result operator()() override
704	{
705		result r = (*binary_operator_base::rhs)();
706		if (r.second && (r.first == 0))
707		{
708			expression::loc.report_error("Division by zero");
709			return {0, false};
710		}
711		return binary_operator<5, T>::operator()();
712	}
713};
714
715} // anonymous namespace
716
717
718expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs)
719{
720	next_token();
721	binary_operator_base *expr = nullptr;
722	char op = *(*this);
723	source_location l = location();
724	switch (op)
725	{
726		default:
727			return lhs;
728		case '+':
729			expr = new binary_operator<6, std::plus<valty>>(l, "+");
730			break;
731		case '-':
732			expr = new binary_operator<6, std::minus<valty>>(l, "-");
733			break;
734		case '%':
735			expr = new divmod<std::modulus<valty>>(l, "/");
736			break;
737		case '*':
738			expr = new binary_operator<5, std::multiplies<valty>>(l, "*");
739			break;
740		case '/':
741			expr = new divmod<std::divides<valty>>(l, "/");
742			break;
743		case '<':
744			switch (peek())
745			{
746				default:
747					parse_error("Invalid operator");
748					return nullptr;
749				case ' ':
750				case '(':
751				case '0'...'9':
752					expr = new binary_operator<8, std::less<valty>>(l, "<");
753					break;
754				case '=':
755					++(*this);
756					expr = new binary_operator<8, std::less_equal<valty>>(l, "<=");
757					break;
758				case '<':
759					++(*this);
760					expr = new binary_operator<7, lshift<valty>>(l, "<<");
761					break;
762			}
763			break;
764		case '>':
765			switch (peek())
766			{
767				default:
768					parse_error("Invalid operator");
769					return nullptr;
770				case '(':
771				case ' ':
772				case '0'...'9':
773					expr = new binary_operator<8, std::greater<valty>>(l, ">");
774					break;
775				case '=':
776					++(*this);
777					expr = new binary_operator<8, std::greater_equal<valty>>(l, ">=");
778					break;
779				case '>':
780					++(*this);
781					expr = new binary_operator<7, rshift<valty>>(l, ">>");
782					break;
783					return lhs;
784			}
785			break;
786		case '=':
787			if (peek() != '=')
788			{
789				parse_error("Invalid operator");
790				return nullptr;
791			}
792			expr = new binary_operator<9, std::equal_to<valty>>(l, "==");
793			break;
794		case '!':
795			if (peek() != '=')
796			{
797				parse_error("Invalid operator");
798				return nullptr;
799			}
800			cursor++;
801			expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!=");
802			break;
803		case '&':
804			if (peek() == '&')
805			{
806				expr = new binary_operator<13, std::logical_and<valty>>(l, "&&");
807			}
808			else
809			{
810				expr = new binary_operator<10, std::bit_and<valty>>(l, "&");
811			}
812			break;
813		case '|':
814			if (peek() == '|')
815			{
816				expr = new binary_operator<12, std::logical_or<valty>>(l, "||");
817			}
818			else
819			{
820				expr = new binary_operator<14, std::bit_or<valty>>(l, "|");
821			}
822			break;
823		case '?':
824		{
825			consume('?');
826			expression_ptr true_case = parse_expression();
827			next_token();
828			if (!true_case || !consume(':'))
829			{
830				parse_error("Expected : in ternary conditional operator");
831				return nullptr;
832			}
833			expression_ptr false_case = parse_expression();
834			if (!false_case)
835			{
836				parse_error("Expected false condition for ternary operator");
837				return nullptr;
838			}
839			return expression_ptr(new ternary_conditional_operator(l, std::move(lhs),
840						std::move(true_case), std::move(false_case)));
841		}
842	}
843	++(*this);
844	next_token();
845	expression_ptr e(expr);
846	expression_ptr rhs(parse_expression());
847	if (!rhs)
848	{
849		return nullptr;
850	}
851	expr->lhs = std::move(lhs);
852	if (rhs->precedence() < expr->precedence())
853	{
854		expr->rhs = std::move(rhs);
855	}
856	else
857	{
858		// If we're a normal left-to-right expression, then we need to insert
859		// this as the far-left child node of the rhs expression
860		binary_operator_base *rhs_op =
861			static_cast<binary_operator_base*>(rhs.get());
862		rhs_op->insert_left(expr);
863		e.release();
864		return rhs;
865	}
866	return e;
867}
868
869expression_ptr text_input_buffer::parse_expression(bool stopAtParen)
870{
871	next_token();
872	unsigned long long leftVal;
873	expression_ptr lhs;
874	source_location l = location();
875	switch (*(*this))
876	{
877		case '0'...'9':
878			if (!consume_integer(leftVal))
879			{
880				return nullptr;
881			}
882			lhs.reset(new terminal_expr(l, leftVal));
883			break;
884		case '(':
885		{
886			consume('(');
887			expression_ptr &&subexpr = parse_expression();
888			if (!subexpr)
889			{
890				return nullptr;
891			}
892			lhs.reset(new paren_expression(l, std::move(subexpr)));
893			if (!consume(')'))
894			{
895				return nullptr;
896			}
897			if (stopAtParen)
898			{
899				return lhs;
900			}
901			break;
902		}
903		case '+':
904		{
905			consume('+');
906			expression_ptr &&subexpr = parse_expression();
907			if (!subexpr)
908			{
909				return nullptr;
910			}
911			lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr)));
912			break;
913		}
914		case '-':
915		{
916			consume('-');
917			expression_ptr &&subexpr = parse_expression();
918			if (!subexpr)
919			{
920				return nullptr;
921			}
922			lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr)));
923			break;
924		}
925		case '!':
926		{
927			consume('!');
928			expression_ptr &&subexpr = parse_expression();
929			if (!subexpr)
930			{
931				return nullptr;
932			}
933			lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr)));
934			break;
935		}
936		case '~':
937		{
938			consume('~');
939			expression_ptr &&subexpr = parse_expression();
940			if (!subexpr)
941			{
942				return nullptr;
943			}
944			lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr)));
945			break;
946		}
947	}
948	if (!lhs)
949	{
950		return nullptr;
951	}
952	return parse_binary_expression(std::move(lhs));
953}
954
955bool
956text_input_buffer::consume_integer_expression(unsigned long long &outInt)
957{
958	switch (*(*this))
959	{
960		case '(':
961		{
962			expression_ptr e(parse_expression(true));
963			if (!e)
964			{
965				return false;
966			}
967			auto r = (*e)();
968			if (r.second)
969			{
970				outInt = r.first;
971				return true;
972			}
973			return false;
974		}
975		case '0'...'9':
976			return consume_integer(outInt);
977		default:
978			return false;
979	}
980}
981
982bool
983input_buffer::consume_hex_byte(uint8_t &outByte)
984{
985	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
986	{
987		return false;
988	}
989	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
990	cursor += 2;
991	return true;
992}
993
994text_input_buffer&
995text_input_buffer::next_token()
996{
997	auto &self = *this;
998	int start;
999	do {
1000		start = cursor;
1001		skip_spaces();
1002		if (finished())
1003		{
1004			return self;
1005		}
1006		// Parse /* comments
1007		if (*self == '/' && peek() == '*')
1008		{
1009			// eat the start of the comment
1010			++self;
1011			++self;
1012			do {
1013				// Find the ending * of */
1014				while ((*self != '\0') && (*self != '*') && !finished())
1015				{
1016					++self;
1017				}
1018				// Eat the *
1019				++self;
1020			} while ((*self != '\0') && (*self != '/') && !finished());
1021			// Eat the /
1022			++self;
1023		}
1024		// Parse // comments
1025		if ((*self == '/' && peek() == '/'))
1026		{
1027			// eat the start of the comment
1028			++self;
1029			++self;
1030			// Find the ending of the line
1031			while (*self != '\n' && !finished())
1032			{
1033				++self;
1034			}
1035			// Eat the \n
1036			++self;
1037		}
1038	} while (start != cursor);
1039	return self;
1040}
1041
1042void
1043text_input_buffer::parse_error(const char *msg)
1044{
1045	if (input_stack.empty())
1046	{
1047		fprintf(stderr, "Error: %s\n", msg);
1048		return;
1049	}
1050	input_buffer &b = *input_stack.top();
1051	parse_error(msg, b, b.cursor);
1052}
1053void
1054text_input_buffer::parse_error(const char *msg,
1055                               input_buffer &b,
1056                               int loc)
1057{
1058	int line_count = 1;
1059	int line_start = 0;
1060	int line_end = loc;
1061	if (loc < 0 || loc > b.size)
1062	{
1063		return;
1064	}
1065	for (int i=loc ; i>0 ; --i)
1066	{
1067		if (b.buffer[i] == '\n')
1068		{
1069			line_count++;
1070			if (line_start == 0)
1071			{
1072				line_start = i+1;
1073			}
1074		}
1075	}
1076	for (int i=loc+1 ; i<b.size ; ++i)
1077	{
1078		if (b.buffer[i] == '\n')
1079		{
1080			line_end = i;
1081			break;
1082		}
1083	}
1084	fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg);
1085	fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr);
1086	putc('\n', stderr);
1087	for (int i=0 ; i<(loc-line_start) ; ++i)
1088	{
1089		char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' ';
1090		putc(c, stderr);
1091	}
1092	putc('^', stderr);
1093	putc('\n', stderr);
1094}
1095#ifndef NDEBUG
1096void
1097input_buffer::dump()
1098{
1099	fprintf(stderr, "Current cursor: %d\n", cursor);
1100	fwrite(&buffer[cursor], size-cursor, 1, stderr);
1101}
1102#endif
1103
1104
1105namespace
1106{
1107/**
1108 * The source files are ASCII, so we provide a non-locale-aware version of
1109 * isalpha.  This is a class so that it can be used with a template function
1110 * for parsing strings.
1111 */
1112struct is_alpha
1113{
1114	static inline bool check(const char c)
1115	{
1116		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
1117			(c <= 'Z'));
1118	}
1119};
1120/**
1121 * Check whether a character is in the set allowed for node names.  This is a
1122 * class so that it can be used with a template function for parsing strings.
1123 */
1124struct is_node_name_character
1125{
1126	static inline bool check(const char c)
1127	{
1128		switch(c)
1129		{
1130			default:
1131				return false;
1132			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1133			case ',': case '.': case '+': case '-':
1134			case '_':
1135				return true;
1136		}
1137	}
1138};
1139/**
1140 * Check whether a character is in the set allowed for property names.  This is
1141 * a class so that it can be used with a template function for parsing strings.
1142 */
1143struct is_property_name_character
1144{
1145	static inline bool check(const char c)
1146	{
1147		switch(c)
1148		{
1149			default:
1150				return false;
1151			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1152			case ',': case '.': case '+': case '-':
1153			case '_': case '#':
1154				return true;
1155		}
1156	}
1157};
1158
1159template<class T>
1160string parse(text_input_buffer &s)
1161{
1162	std::vector<char> bytes;
1163	for (char c=*s ; T::check(c) ; c=*(++s))
1164	{
1165		bytes.push_back(c);
1166	}
1167	return string(bytes.begin(), bytes.end());
1168}
1169
1170}
1171
1172string
1173text_input_buffer::parse_node_name()
1174{
1175	return parse<is_node_name_character>(*this);
1176}
1177
1178string
1179text_input_buffer::parse_property_name()
1180{
1181	return parse<is_property_name_character>(*this);
1182}
1183
1184string
1185text_input_buffer::parse_node_or_property_name(bool &is_property)
1186{
1187	if (is_property)
1188	{
1189		return parse_property_name();
1190	}
1191	std::vector<char> bytes;
1192	for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this)))
1193	{
1194		bytes.push_back(c);
1195	}
1196	for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this)))
1197	{
1198		bytes.push_back(c);
1199		is_property = true;
1200	}
1201	return string(bytes.begin(), bytes.end());
1202}
1203
1204string
1205input_buffer::parse_to(char stop)
1206{
1207	std::vector<char> bytes;
1208	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1209	{
1210		bytes.push_back(c);
1211	}
1212	return string(bytes.begin(), bytes.end());
1213}
1214
1215string
1216text_input_buffer::parse_to(char stop)
1217{
1218	std::vector<char> bytes;
1219	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1220	{
1221		if (finished())
1222		{
1223			break;
1224		}
1225		bytes.push_back(c);
1226	}
1227	return string(bytes.begin(), bytes.end());
1228}
1229
1230char
1231text_input_buffer::peek()
1232{
1233	return (*input_stack.top())[1];
1234}
1235
1236std::unique_ptr<input_buffer>
1237input_buffer::buffer_for_file(const string &path, bool warn)
1238{
1239	if (path == "-")
1240	{
1241		std::unique_ptr<input_buffer> b(new stream_input_buffer());
1242		return b;
1243	}
1244	int source = open(path.c_str(), O_RDONLY);
1245	if (source == -1)
1246	{
1247		if (warn)
1248		{
1249			fprintf(stderr, "Unable to open file '%s'.  %s\n", path.c_str(), strerror(errno));
1250		}
1251		return 0;
1252	}
1253	struct stat st;
1254	if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode))
1255	{
1256		if (warn)
1257		{
1258			fprintf(stderr, "File %s is a directory\n", path.c_str());
1259		}
1260		close(source);
1261		return 0;
1262	}
1263	std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path)));
1264	close(source);
1265	return b;
1266}
1267
1268} // namespace dtc
1269
1270