1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 David Chisnall
5 * All rights reserved.
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#ifndef _INPUT_BUFFER_HH_
34#define _INPUT_BUFFER_HH_
35#include "util.hh"
36#include <assert.h>
37#include <stack>
38#include <string>
39#include <unordered_set>
40
41namespace dtc
42{
43
44namespace {
45struct expression;
46typedef std::unique_ptr<expression> expression_ptr;
47}
48
49/**
50 * Class encapsulating the input file.  Can be used as a const char*, but has
51 * range checking.  Attempting to access anything out of range will return a 0
52 * byte.  The input buffer can be cheaply copied, without copying the
53 * underlying memory, however it is the user's responsibility to ensure that
54 * such copies do not persist beyond the lifetime of the underlying memory.
55 *
56 * This also contains methods for reporting errors and for consuming the token
57 * stream.
58 */
59class input_buffer
60{
61	friend class text_input_buffer;
62	protected:
63	/**
64	 * The buffer.  This class doesn't own the buffer, but the
65	 * mmap_input_buffer subclass does.
66	 */
67	const char* buffer;
68	/**
69	 * The size of the buffer.
70	 */
71	int size;
72	private:
73	/**
74	 * The current place in the buffer where we are reading.  This class
75	 * keeps a separate size, pointer, and cursor so that we can move
76	 * forwards and backwards and still have checks that we haven't fallen
77	 * off either end.
78	 */
79	int cursor;
80	/**
81	 * Private constructor.  This is used to create input buffers that
82	 * refer to the same memory, but have different cursors.
83	 */
84	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85		cursor(c) {}
86	public:
87	/**
88	 * Returns the file name associated with this buffer.
89	 */
90	virtual const std::string &filename() const
91	{
92		static std::string s;
93		return s;
94	}
95	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96	                                                     bool warn=true);
97	/**
98	 * Skips all characters in the input until the specified character is
99	 * encountered.
100	 */
101	void skip_to(char);
102	/**
103	 * Parses up to a specified character and returns the intervening
104	 * characters as a string.
105	 */
106	std::string parse_to(char);
107	/**
108	 * Return whether all input has been consumed.
109	 */
110	bool finished() { return cursor >= size; }
111	/**
112	 * Virtual destructor.  Does nothing, but exists so that subclasses
113	 * that own the memory can run cleanup code for deallocating it.
114	 */
115	virtual ~input_buffer() {};
116	/**
117	 * Constructs an empty buffer.
118	 */
119	input_buffer() : buffer(0), size(0), cursor(0) {}
120	/**
121	 * Constructs a new buffer with a specified memory region and size.
122	 */
123	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
124	/**
125	 * Returns a new input buffer referring into this input, clamped to the
126	 * specified size.  If the requested buffer would fall outside the
127	 * range of this one, then it returns an empty buffer.
128	 *
129	 * The returned buffer shares the same underlying storage as the
130	 * original.  This is intended to be used for splitting up the various
131	 * sections of a device tree blob.  Requesting a size of 0 will give a
132	 * buffer that extends to the end of the available memory.
133	 */
134	input_buffer buffer_from_offset(int offset, int s=0);
135	/**
136	 * Dereferencing operator, allows the buffer to be treated as a char*
137	 * and dereferenced to give a character.  This returns a null byte if
138	 * the cursor is out of range.
139	 */
140	inline char operator*()
141	{
142		if (cursor >= size) { return '\0'; }
143		if (cursor < 0) { return '\0'; }
144		return buffer[cursor];
145	}
146	/**
147	 * Array subscripting operator, returns a character at the specified
148	 * index offset from the current cursor.  The offset may be negative,
149	 * to reread characters that have already been read.  If the current
150	 * cursor plus offset is outside of the range, this returns a nul
151	 * byte.
152	 */
153	inline char operator[](int offset)
154	{
155		if (cursor + offset >= size) { return '\0'; }
156		if (cursor + offset < 0) { return '\0'; }
157		return buffer[cursor + offset];
158	}
159	/**
160	 * Increments the cursor, iterating forward in the buffer.
161	 */
162	inline input_buffer &operator++()
163	{
164		cursor++;
165		return *this;
166	}
167	const char *begin()
168	{
169		return buffer;
170	}
171	const char *end()
172	{
173		return buffer + size;
174	}
175	/**
176	 * Consumes a character.  Moves the cursor one character forward if the
177	 * next character matches the argument, returning true.  If the current
178	 * character does not match the argument, returns false.
179	 */
180	inline bool consume(char c)
181	{
182		if (*(*this) == c)
183		{
184			++(*this);
185			return true;
186		}
187		return false;
188	}
189	/**
190	 * Consumes a string.  If the (null-terminated) string passed as the
191	 * argument appears in the input, advances the cursor to the end and
192	 * returns true.  Returns false if the string does not appear at the
193	 * current point in the input.
194	 */
195	bool consume(const char *str);
196	/**
197	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
198	 * the cursor to the end of the integer if the cursor points to an
199	 * integer, returns false and does not move the cursor otherwise.
200	 *
201	 * The parsed value is returned via the argument.
202	 */
203	bool consume_integer(unsigned long long &outInt);
204	/**
205	 * Reads an arithmetic expression (containing any of the normal C
206	 * operators), evaluates it, and returns the result.
207	 */
208	bool consume_integer_expression(unsigned long long &outInt);
209	/**
210	 * Consumes two hex digits and return the resulting byte via the first
211	 * argument.  If the next two characters are hex digits, returns true
212	 * and advances the cursor.  If not, then returns false and leaves the
213	 * cursor in place.
214	 */
215	bool consume_hex_byte(uint8_t &outByte);
216	/**
217	 * Template function that consumes a binary value in big-endian format
218	 * from the input stream.  Returns true and advances the cursor if
219	 * there is a value of the correct size.  This function assumes that
220	 * all values must be natively aligned, and so advances the cursor to
221	 * the correct alignment before reading.
222	 */
223	template<typename T>
224	bool consume_binary(T &out)
225	{
226		int align = 0;
227		int type_size = sizeof(T);
228		if (cursor % type_size != 0)
229		{
230			align = type_size - (cursor % type_size);
231		}
232		if (size < cursor + align + type_size)
233		{
234			return false;
235		}
236		cursor += align;
237		assert(cursor % type_size == 0);
238		out = 0;
239		for (int i=0 ; i<type_size ; ++i)
240		{
241			if (size < cursor)
242			{
243				return false;
244			}
245			out <<= 8;
246			out |= (((T)buffer[cursor++]) & 0xff);
247		}
248		return true;
249	}
250#ifndef NDEBUG
251	/**
252	 * Dumps the current cursor value and the unconsumed values in the
253	 * input buffer to the standard error.  This method is intended solely
254	 * for debugging.
255	 */
256	void dump();
257#endif
258};
259/**
260 * Explicit specialisation for reading a single byte.
261 */
262template<>
263inline bool input_buffer::consume_binary(uint8_t &out)
264{
265	if (size < cursor + 1)
266	{
267		return false;
268	}
269	out = buffer[cursor++];
270	return true;
271}
272
273/**
274 * An input buffer subclass used for parsing DTS files.  This manages a stack
275 * of input buffers to handle /input/ operations.
276 */
277class text_input_buffer
278{
279	std::unordered_set<std::string> defines;
280	/**
281	 * The cursor is the input into the input stream where we are currently reading.
282	 */
283	int cursor = 0;
284	/**
285	 * The current stack of includes.  The current input is always from the top
286	 * of the stack.
287	 */
288	std::stack<std::shared_ptr<input_buffer>> input_stack;
289	/**
290	 *
291	 */
292	const std::vector<std::string> include_paths;
293	/**
294	 * Reads forward past any spaces.  The DTS format is not whitespace
295	 * sensitive and so we want to scan past whitespace when reading it.
296	 */
297	void skip_spaces();
298	/**
299	 * Returns the character immediately after the current one.
300	 *
301	 * This method does not look between files.
302	 */
303	char peek();
304	/**
305	 * If a /include/ token is encountered, then look up the corresponding
306	 * input file, push it onto the input stack, and continue.
307	 */
308	void handle_include();
309	/**
310	 * The base directory for this file.
311	 */
312	const std::string dir;
313	/**
314	 * The file where dependencies should be output.
315	 */
316	FILE *depfile;
317	public:
318	/**
319	 * Construct a new text input buffer with the specified buffer as the start
320	 * of parsing and the specified set of input paths for handling new
321	 * inclusions.
322	 */
323	text_input_buffer(std::unique_ptr<input_buffer> &&b,
324	                  std::unordered_set<std::string> &&d,
325	                  std::vector<std::string> &&i,
326	                  const std::string directory,
327	                  FILE *deps)
328		: defines(d), include_paths(i), dir(directory), depfile(deps)
329	{
330		input_stack.push(std::move(b));
331	}
332	/**
333	 * Skips all characters in the input until the specified character is
334	 * encountered.
335	 */
336	void skip_to(char);
337	/**
338	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
339	 * or a parenthetical expression, otherwise assume that either is the
340	 * left-hand side of a binary expression and try to parse the right-hand
341	 * side.
342	 */
343	expression_ptr parse_expression(bool stopAtParen=false);
344	/**
345	 * Parse a binary expression, having already parsed the right-hand side.
346	 */
347	expression_ptr parse_binary_expression(expression_ptr lhs);
348	/**
349	 * Return whether all input has been consumed.
350	 */
351	bool finished()
352	{
353		return input_stack.empty() ||
354			((input_stack.size() == 1) && input_stack.top()->finished());
355	}
356	/**
357	 * Dereferencing operator.  Returns the current character in the top input buffer.
358	 */
359	inline char operator*()
360	{
361		if (input_stack.empty())
362		{
363			return 0;
364		}
365		return *(*input_stack.top());
366	}
367	/**
368	 * Increments the cursor, iterating forward in the buffer.
369	 */
370	inline text_input_buffer &operator++()
371	{
372		if (input_stack.empty())
373		{
374			return *this;
375		}
376		cursor++;
377		auto &top = *input_stack.top();
378		++top;
379		if (top.finished())
380		{
381			input_stack.pop();
382		}
383		return *this;
384	}
385	/**
386	 * Consumes a character.  Moves the cursor one character forward if the
387	 * next character matches the argument, returning true.  If the current
388	 * character does not match the argument, returns false.
389	 */
390	inline bool consume(char c)
391	{
392		if (*(*this) == c)
393		{
394			++(*this);
395			return true;
396		}
397		return false;
398	}
399	/**
400	 * Consumes a string.  If the (null-terminated) string passed as the
401	 * argument appears in the input, advances the cursor to the end and
402	 * returns true.  Returns false if the string does not appear at the
403	 * current point in the input.
404	 *
405	 * This method does not scan between files.
406	 */
407	bool consume(const char *str)
408	{
409		if (input_stack.empty())
410		{
411			return false;
412		}
413		return input_stack.top()->consume(str);
414	}
415	/**
416	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
417	 * the cursor to the end of the integer if the cursor points to an
418	 * integer, returns false and does not move the cursor otherwise.
419	 *
420	 * The parsed value is returned via the argument.
421	 *
422	 * This method does not scan between files.
423	 */
424	bool consume_integer(unsigned long long &outInt)
425	{
426		if (input_stack.empty())
427		{
428			return false;
429		}
430		return input_stack.top()->consume_integer(outInt);
431	}
432	/**
433	 * Reads an arithmetic expression (containing any of the normal C
434	 * operators), evaluates it, and returns the result.
435	 */
436	bool consume_integer_expression(unsigned long long &outInt);
437	/**
438	 * Consumes two hex digits and return the resulting byte via the first
439	 * argument.  If the next two characters are hex digits, returns true
440	 * and advances the cursor.  If not, then returns false and leaves the
441	 * cursor in place.
442	 *
443	 * This method does not scan between files.
444	 */
445	bool consume_hex_byte(uint8_t &outByte)
446	{
447		if (input_stack.empty())
448		{
449			return false;
450		}
451		return input_stack.top()->consume_hex_byte(outByte);
452	}
453	/**
454	 * Returns the longest string in the input buffer starting at the
455	 * current cursor and composed entirely of characters that are valid in
456	 * node names.
457	*/
458	std::string parse_node_name();
459	/**
460	 * Returns the longest string in the input buffer starting at the
461	 * current cursor and composed entirely of characters that are valid in
462	 * property names.
463	 */
464	std::string parse_property_name();
465	/**
466	 * Parses either a node or a property name.  If is_property is true on
467	 * entry, then only property names are parsed.  If it is false, then it
468	 * will be set, on return, to indicate whether the parsed name is only
469	 * valid as a property.
470	 */
471	std::string parse_node_or_property_name(bool &is_property);
472	/**
473	 * Parses up to a specified character and returns the intervening
474	 * characters as a string.
475	 */
476	std::string parse_to(char);
477	/**
478	 * Advances the cursor to the start of the next token, skipping
479	 * comments and whitespace.  If the cursor already points to the start
480	 * of a token, then this function does nothing.
481	 */
482	text_input_buffer &next_token();
483	/**
484	 * Location in the source file.  This should never be interpreted by
485	 * anything other than error reporting functions of this class.  It will
486	 * eventually become something more complex than an `int`.
487	 */
488	class source_location
489	{
490		friend class text_input_buffer;
491		/**
492		 * The text buffer object that included `b`.
493		 */
494		text_input_buffer &buffer;
495		/**
496		 * The underlying buffer that contains this location.
497		 */
498		std::shared_ptr<input_buffer> b;
499		/**
500		 * The offset within the current buffer of the source location.
501		 */
502		int cursor;
503		source_location(text_input_buffer &buf)
504			: buffer(buf),
505			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
506			  cursor(b ? b->cursor : 0) {}
507		public:
508		/**
509		 * Report an error at this location.
510		 */
511		void report_error(const char *msg)
512		{
513			if (b)
514			{
515				buffer.parse_error(msg, *b, cursor);
516			}
517			else
518			{
519				buffer.parse_error(msg);
520			}
521		}
522	};
523	/**
524	 * Returns the current source location.
525	 */
526	source_location location()
527	{
528		return { *this };
529	}
530	/**
531	 * Prints a message indicating the location of a parse error.
532	 */
533	void parse_error(const char *msg);
534	/**
535	 * Reads the contents of a binary file into `b`.  The file name is assumed
536	 * to be relative to one of the include paths.
537	 *
538	 * Returns true if the file exists and can be read, false otherwise.
539	 */
540	bool read_binary_file(const std::string &filename, byte_buffer &b);
541	private:
542	/**
543	 * Prints a message indicating the location of a parse error, given a
544	 * specified location.  This is used when input has already moved beyond
545	 * the location that caused the failure.
546	 */
547	void parse_error(const char *msg, input_buffer &b, int loc);
548};
549
550} // namespace dtc
551
552#endif // !_INPUT_BUFFER_HH_
553