1/*-
2 * Copyright (c) 2013 David Chisnall
3 * All rights reserved.
4 *
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33#include "string.hh"
34#include <ctype.h>
35#include <stdio.h>
36
37namespace
38{
39/**
40 * The source files are ASCII, so we provide a non-locale-aware version of
41 * isalpha.  This is a class so that it can be used with a template function
42 * for parsing strings.
43 */
44struct is_alpha
45{
46	static inline bool check(const char c)
47	{
48		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
49			(c <= 'Z'));
50	}
51};
52/**
53 * Check whether a character is in the set allowed for node names.  This is a
54 * class so that it can be used with a template function for parsing strings.
55 */
56struct is_node_name_character
57{
58	static inline bool check(const char c)
59	{
60		switch(c)
61		{
62			default:
63				return false;
64			case 'a'...'z': case 'A'...'Z': case '0'...'9':
65			case ',': case '.': case '+': case '-':
66			case '_':
67				return true;
68		}
69	}
70};
71/**
72 * Check whether a character is in the set allowed for property names.  This is
73 * a class so that it can be used with a template function for parsing strings.
74 */
75struct is_property_name_character
76{
77	static inline bool check(const char c)
78	{
79		switch(c)
80		{
81			default:
82				return false;
83			case 'a'...'z': case 'A'...'Z': case '0'...'9':
84			case ',': case '.': case '+': case '-':
85			case '_': case '#':
86				return true;
87		}
88	}
89};
90
91}
92
93namespace dtc
94{
95
96template<class T> string
97string::parse(input_buffer &s)
98{
99	const char *start = s;
100	int l=0;
101	while (T::check(*s)) { l++; ++s; }
102	return string(start, l);
103}
104
105string::string(input_buffer &s) : start((const char*)s), length(0)
106{
107	while(s[length] != '\0')
108	{
109		length++;
110	}
111}
112
113string
114string::parse_node_name(input_buffer &s)
115{
116	return parse<is_node_name_character>(s);
117}
118
119string
120string::parse_property_name(input_buffer &s)
121{
122	return parse<is_property_name_character>(s);
123}
124string
125string::parse_node_or_property_name(input_buffer &s, bool &is_property)
126{
127	if (is_property)
128	{
129		return parse_property_name(s);
130	}
131	const char *start = s;
132	int l=0;
133	while (is_node_name_character::check(*s))
134	{
135		l++;
136		++s;
137	}
138	while (is_property_name_character::check(*s))
139	{
140		l++;
141		++s;
142		is_property = true;
143	}
144	return string(start, l);
145}
146
147bool
148string::operator==(const string& other) const
149{
150	return (length == other.length) &&
151	       (memcmp(start, other.start, length) == 0);
152}
153
154bool
155string::operator==(const char *other) const
156{
157	return strncmp(other, start, length) == 0;
158}
159
160bool
161string::operator<(const string& other) const
162{
163	if (length < other.length) { return true; }
164	if (length > other.length) { return false; }
165	return memcmp(start, other.start, length) < 0;
166}
167
168void
169string::push_to_buffer(byte_buffer &buffer, bool escapes)
170{
171	for (int i=0 ; i<length ; ++i)
172	{
173		uint8_t c = start[i];
174		if (escapes && c == '\\' && i+1 < length)
175		{
176			c = start[++i];
177			switch (c)
178			{
179				// For now, we just ignore invalid escape sequences.
180				default:
181				case '"':
182				case '\'':
183				case '\\':
184					break;
185				case 'a':
186					c = '\a';
187					break;
188				case 'b':
189					c = '\b';
190					break;
191				case 't':
192					c = '\t';
193					break;
194				case 'n':
195					c = '\n';
196					break;
197				case 'v':
198					c = '\v';
199					break;
200				case 'f':
201					c = '\f';
202					break;
203				case 'r':
204					c = '\r';
205					break;
206				case '0'...'7':
207				{
208					int v = digittoint(c);
209					if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
210					{
211						v <<= 3;
212						v |= digittoint(start[i+1]);
213						i++;
214						if (i+1 < length && start[i+1] <= '7' && start[i+1] >= '0')
215						{
216							v <<= 3;
217							v |= digittoint(start[i+1]);
218						}
219					}
220					c = (uint8_t)v;
221					break;
222				}
223				case 'x':
224				{
225					++i;
226					if (i >= length)
227					{
228						break;
229					}
230					int v = digittoint(start[i]);
231					if (i+1 < length && ishexdigit(start[i+1]))
232					{
233						v <<= 4;
234						v |= digittoint(start[++i]);
235					}
236					c = (uint8_t)v;
237					break;
238				}
239			}
240		}
241		buffer.push_back(c);
242	}
243}
244
245void
246string::print(FILE *file)
247{
248	fwrite(start, length, 1, file);
249}
250
251void
252string::dump()
253{
254	print(stderr);
255}
256
257} // namespace dtc
258
259