1/*-
2 * Copyright (c) 2013 David Chisnall
3 * All rights reserved.
4 *
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33#ifndef _INPUT_BUFFER_HH_
34#define _INPUT_BUFFER_HH_
35#include "util.hh"
36#include <assert.h>
37
38namespace dtc
39{
40
41/**
42 * Class encapsulating the input file.  Can be used as a const char*, but has
43 * range checking.  Attempting to access anything out of range will return a 0
44 * byte.  The input buffer can be cheaply copied, without copying the
45 * underlying memory, however it is the user's responsibility to ensure that
46 * such copies do not persist beyond the lifetime of the underlying memory.
47 *
48 * This also contains methods for reporting errors and for consuming the token
49 * stream.
50 */
51class input_buffer
52{
53	protected:
54	/**
55	 * The buffer.  This class doesn't own the buffer, but the
56	 * mmap_input_buffer subclass does.
57	 */
58	const char* buffer;
59	/**
60	 * The size of the buffer.
61	 */
62	int size;
63	private:
64	/**
65	 * The current place in the buffer where we are reading.  This class
66	 * keeps a separate size, pointer, and cursor so that we can move
67	 * forwards and backwards and still have checks that we haven't fallen
68	 * off either end.
69	 */
70	int cursor;
71	/**
72	 * Private constructor.  This is used to create input buffers that
73	 * refer to the same memory, but have different cursors.
74	 */
75	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
76		cursor(c) {}
77	/**
78	 * Reads forward past any spaces.  The DTS format is not whitespace
79	 * sensitive and so we want to scan past whitespace when reading it.
80	 */
81	void skip_spaces();
82	public:
83	/**
84	 * Virtual destructor.  Does nothing, but exists so that subclasses
85	 * that own the memory can run cleanup code for deallocating it.
86	 */
87	virtual ~input_buffer() {};
88	/**
89	 * Constructs an empty buffer.
90	 */
91	input_buffer() : buffer(0), size(0), cursor(0) {}
92	/**
93	 * Constructs a new buffer with a specified memory region and size.
94	 */
95	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
96	/**
97	 * Returns a new input buffer referring into this input, clamped to the
98	 * specified size.  If the requested buffer would fall outside the
99	 * range of this one, then it returns an empty buffer.
100	 *
101	 * The returned buffer shares the same underlying storage as the
102	 * original.  This is intended to be used for splitting up the various
103	 * sections of a device tree blob.  Requesting a size of 0 will give a
104	 * buffer that extends to the end of the available memory.
105	 */
106	input_buffer buffer_from_offset(int offset, int s=0);
107	/**
108	 * Returns true if this buffer has no unconsumed space in it.
109	 */
110	inline bool empty()
111	{
112		return cursor >= size;
113	}
114	/**
115	 * Dereferencing operator, allows the buffer to be treated as a char*
116	 * and dereferenced to give a character.  This returns a null byte if
117	 * the cursor is out of range.
118	 */
119	inline char operator*()
120	{
121		if (cursor >= size) { return '\0'; }
122		if (cursor < 0) { return '\0'; }
123		return buffer[cursor];
124	}
125	/**
126	 * Array subscripting operator, returns a character at the specified
127	 * index offset from the current cursor.  The offset may be negative,
128	 * to reread characters that have already been read.  If the current
129	 * cursor plus offset is outside of the range, this returns a nul
130	 * byte.
131	 */
132	inline char operator[](int offset)
133	{
134		if (cursor + offset >= size) { return '\0'; }
135		if (cursor + offset < 0) { return '\0'; }
136		return buffer[cursor + offset];
137	}
138	/**
139	 * Increments the cursor, iterating forward in the buffer.
140	 */
141	inline input_buffer &operator++()
142	{
143		cursor++;
144		return *this;
145	}
146	/**
147	 * Cast to char* operator.  Returns a pointer into the buffer that can
148	 * be used for constructing strings.
149	 */
150	inline operator const char*()
151	{
152		if (cursor >= size) { return 0; }
153		if (cursor < 0) { return 0; }
154		return &buffer[cursor];
155	}
156	/**
157	 * Consumes a character.  Moves the cursor one character forward if the
158	 * next character matches the argument, returning true.  If the current
159	 * character does not match the argument, returns false.
160	 */
161	inline bool consume(char c)
162	{
163		if ((*this)[0] == c)
164		{
165			++(*this);
166			return true;
167		}
168		return false;
169	}
170	/**
171	 * Consumes a string.  If the (null-terminated) string passed as the
172	 * argument appears in the input, advances the cursor to the end and
173	 * returns true.  Returns false if the string does not appear at the
174	 * current point in the input.
175	 */
176	bool consume(const char *str);
177	/**
178	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
179	 * the cursor to the end of the integer if the cursor points to an
180	 * integer, returns false and does not move the cursor otherwise.
181	 *
182	 * The parsed value is returned via the argument.
183	 */
184	bool consume_integer(long long &outInt);
185	/**
186	 * Template function that consumes a binary value in big-endian format
187	 * from the input stream.  Returns true and advances the cursor if
188	 * there is a value of the correct size.  This function assumes that
189	 * all values must be natively aligned, and so advances the cursor to
190	 * the correct alignment before reading.
191	 */
192	template<typename T>
193	bool consume_binary(T &out)
194	{
195		int align = 0;
196		int type_size = sizeof(T);
197		if (cursor % type_size != 0)
198		{
199			align = type_size - (cursor % type_size);
200		}
201		if (size < cursor + align + type_size)
202		{
203			return false;
204		}
205		cursor += align;
206		assert(cursor % type_size == 0);
207		out = 0;
208		for (int i=0 ; i<type_size ; ++i)
209		{
210			out <<= 8;
211			out |= (((T)buffer[cursor++]) & 0xff);
212		}
213		return true;
214	}
215	/**
216	 * Consumes two hex digits and return the resulting byte via the first
217	 * argument.  If the next two characters are hex digits, returns true
218	 * and advances the cursor.  If not, then returns false and leaves the
219	 * cursor in place.
220	 */
221	bool consume_hex_byte(uint8_t &outByte);
222	/**
223	 * Advances the cursor to the start of the next token, skipping
224	 * comments and whitespace.  If the cursor already points to the start
225	 * of a token, then this function does nothing.
226	 */
227	input_buffer &next_token();
228	/**
229	 * Prints a message indicating the location of a parse error.
230	 */
231	void parse_error(const char *msg);
232	/**
233	 * Dumps the current cursor value and the unconsumed values in the
234	 * input buffer to the standard error.  This method is intended solely
235	 * for debugging.
236	 */
237	void dump();
238};
239/**
240 * Explicit specialisation for reading a single byte.
241 */
242template<>
243inline bool input_buffer::consume_binary(uint8_t &out)
244{
245	if (size < cursor + 1)
246	{
247		return false;
248	}
249	out = buffer[cursor++];
250	return true;
251}
252
253/**
254 * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
255 * When this object is destroyed, the memory is unmapped.
256 */
257struct mmap_input_buffer : public input_buffer
258{
259	/**
260	 * Constructs a new buffer from the file passed in as a file
261	 * descriptor.
262	 */
263	mmap_input_buffer(int fd);
264	/**
265	 * Unmaps the buffer, if one exists.
266	 */
267	virtual ~mmap_input_buffer();
268};
269/**
270 * Input buffer read from standard input.  This is used for reading device tree
271 * blobs and source from standard input.  It reads the entire input into
272 * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
273 * are very rarely more than 10KB though, so this is probably not a problem.
274 */
275struct stream_input_buffer : public input_buffer
276{
277	/**
278	 * The buffer that will store the data read from the standard input.
279	 */
280	std::vector<char> b;
281	/**
282	 * Constructs a new buffer from the standard input.
283	 */
284	stream_input_buffer();
285};
286
287} // namespace dtc
288
289#endif // !_INPUT_BUFFER_HH_
290