1/*-
2 * Mach Operating System
3 * Copyright (c) 1991,1990 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19 *  School of Computer Science
20 *  Carnegie Mellon University
21 *  Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie the
24 * rights to redistribute these changes.
25 */
26/*
27 *	Author: David B. Golub, Carnegie Mellon University
28 *	Date:	7/90
29 */
30/*
31 * Lexical analyzer.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include <sys/param.h>
38#include <sys/libkern.h>
39
40#include <ddb/ddb.h>
41#include <ddb/db_lex.h>
42
43static char	db_line[DB_MAXLINE];
44static char *	db_lp, *db_endlp;
45
46static int	db_lex(void);
47static void 	db_flush_line(void);
48static int 	db_read_char(void);
49static void 	db_unread_char(int);
50
51int
52db_read_line()
53{
54	int	i;
55
56	i = db_readline(db_line, sizeof(db_line));
57	if (i == 0)
58	    return (0);	/* EOI */
59	db_lp = db_line;
60	db_endlp = db_lp + i;
61	return (i);
62}
63
64/*
65 * Simulate a line of input into DDB.
66 */
67void
68db_inject_line(const char *command)
69{
70
71	strlcpy(db_line, command, sizeof(db_line));
72	db_lp = db_line;
73	db_endlp = db_lp + strlen(command);
74}
75
76/*
77 * In rare cases, we may want to pull the remainder of the line input
78 * verbatim, rather than lexing it.  For example, when assigning literal
79 * values associated with scripts.  In that case, return a static pointer to
80 * the current location in the input buffer.  The caller must be aware that
81 * the contents are not stable if other lex/input calls are made.
82 */
83char *
84db_get_line(void)
85{
86
87	return (db_lp);
88}
89
90static void
91db_flush_line()
92{
93	db_lp = db_line;
94	db_endlp = db_line;
95}
96
97static int	db_look_char = 0;
98
99static int
100db_read_char()
101{
102	int	c;
103
104	if (db_look_char != 0) {
105	    c = db_look_char;
106	    db_look_char = 0;
107	}
108	else if (db_lp >= db_endlp)
109	    c = -1;
110	else
111	    c = *db_lp++;
112	return (c);
113}
114
115static void
116db_unread_char(c)
117	int c;
118{
119	db_look_char = c;
120}
121
122static int	db_look_token = 0;
123
124void
125db_unread_token(t)
126	int	t;
127{
128	db_look_token = t;
129}
130
131int
132db_read_token()
133{
134	int	t;
135
136	if (db_look_token) {
137	    t = db_look_token;
138	    db_look_token = 0;
139	}
140	else
141	    t = db_lex();
142	return (t);
143}
144
145db_expr_t	db_tok_number;
146char	db_tok_string[TOK_STRING_SIZE];
147
148db_expr_t	db_radix = 16;
149
150void
151db_flush_lex()
152{
153	db_flush_line();
154	db_look_char = 0;
155	db_look_token = 0;
156}
157
158static int
159db_lex()
160{
161	int	c;
162
163	c = db_read_char();
164	while (c <= ' ' || c > '~') {
165	    if (c == '\n' || c == -1)
166		return (tEOL);
167	    c = db_read_char();
168	}
169
170	if (c >= '0' && c <= '9') {
171	    /* number */
172	    int	r, digit = 0;
173
174	    if (c > '0')
175		r = db_radix;
176	    else {
177		c = db_read_char();
178		if (c == 'O' || c == 'o')
179		    r = 8;
180		else if (c == 'T' || c == 't')
181		    r = 10;
182		else if (c == 'X' || c == 'x')
183		    r = 16;
184		else {
185		    r = db_radix;
186		    db_unread_char(c);
187		}
188		c = db_read_char();
189	    }
190	    db_tok_number = 0;
191	    for (;;) {
192		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
193		    digit = c - '0';
194		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
195				     (c >= 'a' && c <= 'f'))) {
196		    if (c >= 'a')
197			digit = c - 'a' + 10;
198		    else if (c >= 'A')
199			digit = c - 'A' + 10;
200		}
201		else
202		    break;
203		db_tok_number = db_tok_number * r + digit;
204		c = db_read_char();
205	    }
206	    if ((c >= '0' && c <= '9') ||
207		(c >= 'A' && c <= 'Z') ||
208		(c >= 'a' && c <= 'z') ||
209		(c == '_'))
210	    {
211		db_error("Bad character in number\n");
212		db_flush_lex();
213		return (tEOF);
214	    }
215	    db_unread_char(c);
216	    return (tNUMBER);
217	}
218	if ((c >= 'A' && c <= 'Z') ||
219	    (c >= 'a' && c <= 'z') ||
220	    c == '_' || c == '\\')
221	{
222	    /* string */
223	    char *cp;
224
225	    cp = db_tok_string;
226	    if (c == '\\') {
227		c = db_read_char();
228		if (c == '\n' || c == -1)
229		    db_error("Bad escape\n");
230	    }
231	    *cp++ = c;
232	    while (1) {
233		c = db_read_char();
234		if ((c >= 'A' && c <= 'Z') ||
235		    (c >= 'a' && c <= 'z') ||
236		    (c >= '0' && c <= '9') ||
237		    c == '_' || c == '\\' || c == ':' || c == '.')
238		{
239		    if (c == '\\') {
240			c = db_read_char();
241			if (c == '\n' || c == -1)
242			    db_error("Bad escape\n");
243		    }
244		    *cp++ = c;
245		    if (cp == db_tok_string+sizeof(db_tok_string)) {
246			db_error("String too long\n");
247			db_flush_lex();
248			return (tEOF);
249		    }
250		    continue;
251		}
252		else {
253		    *cp = '\0';
254		    break;
255		}
256	    }
257	    db_unread_char(c);
258	    return (tIDENT);
259	}
260
261	switch (c) {
262	    case '+':
263		return (tPLUS);
264	    case '-':
265		return (tMINUS);
266	    case '.':
267		c = db_read_char();
268		if (c == '.')
269		    return (tDOTDOT);
270		db_unread_char(c);
271		return (tDOT);
272	    case '*':
273		return (tSTAR);
274	    case '/':
275		return (tSLASH);
276	    case '=':
277		return (tEQ);
278	    case '%':
279		return (tPCT);
280	    case '#':
281		return (tHASH);
282	    case '(':
283		return (tLPAREN);
284	    case ')':
285		return (tRPAREN);
286	    case ',':
287		return (tCOMMA);
288	    case '"':
289		return (tDITTO);
290	    case '$':
291		return (tDOLLAR);
292	    case '!':
293		return (tEXCL);
294	    case ';':
295		return (tSEMI);
296	    case '<':
297		c = db_read_char();
298		if (c == '<')
299		    return (tSHIFT_L);
300		db_unread_char(c);
301		break;
302	    case '>':
303		c = db_read_char();
304		if (c == '>')
305		    return (tSHIFT_R);
306		db_unread_char(c);
307		break;
308	    case -1:
309		return (tEOF);
310	}
311	db_printf("Bad character\n");
312	db_flush_lex();
313	return (tEOF);
314}
315