1/*-
2 * SPDX-License-Identifier: MIT-CMU
3 *
4 * Mach Operating System
5 * Copyright (c) 1991,1990 Carnegie Mellon University
6 * All Rights Reserved.
7 *
8 * Permission to use, copy, modify and distribute this software and its
9 * documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
15 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21 *  School of Computer Science
22 *  Carnegie Mellon University
23 *  Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28/*
29 *	Author: David B. Golub, Carnegie Mellon University
30 *	Date:	7/90
31 */
32/*
33 * Lexical analyzer.
34 */
35
36#include <sys/param.h>
37#include <sys/libkern.h>
38#include <sys/lock.h>
39
40#include <ddb/ddb.h>
41#include <ddb/db_lex.h>
42
43static char	db_line[DB_MAXLINE];
44static char *	db_lp, *db_endlp;
45
46static int	db_lex(int);
47static void 	db_flush_line(void);
48static int 	db_read_char(void);
49static void 	db_unread_char(int);
50
51int
52db_read_line(void)
53{
54	int	i;
55
56	i = db_readline(db_line, sizeof(db_line));
57	if (i == 0)
58	    return (0);	/* EOI */
59	db_lp = db_line;
60	db_endlp = db_lp + i;
61	return (i);
62}
63
64/*
65 * Simulate a line of input into DDB.
66 */
67void
68db_inject_line(const char *command)
69{
70
71	strlcpy(db_line, command, sizeof(db_line));
72	db_lp = db_line;
73	db_endlp = db_lp + strlen(command);
74}
75
76/*
77 * In rare cases, we may want to pull the remainder of the line input
78 * verbatim, rather than lexing it.  For example, when assigning literal
79 * values associated with scripts.  In that case, return a static pointer to
80 * the current location in the input buffer.  The caller must be aware that
81 * the contents are not stable if other lex/input calls are made.
82 */
83char *
84db_get_line(void)
85{
86
87	return (db_lp);
88}
89
90static void
91db_flush_line(void)
92{
93	db_lp = db_line;
94	db_endlp = db_line;
95}
96
97static int
98db_read_char(void)
99{
100	int	c;
101
102	if (db_lp >= db_endlp)
103	    c = -1;
104	else
105	    c = *db_lp++;
106	return (c);
107}
108
109static void
110db_unread_char(int c)
111{
112
113	if (c == -1) {
114		/* Unread EOL at EOL is okay. */
115		if (db_lp < db_endlp)
116			db_error("db_unread_char(-1) before end of line\n");
117	} else {
118		if (db_lp > db_line) {
119			db_lp--;
120			if (*db_lp != c)
121				db_error("db_unread_char() wrong char\n");
122		} else {
123			db_error("db_unread_char() at beginning of line\n");
124		}
125	}
126}
127
128static int	db_look_token = 0;
129
130void
131db_unread_token(int t)
132{
133	db_look_token = t;
134}
135
136int
137db_read_token_flags(int flags)
138{
139	int	t;
140
141	MPASS((flags & ~(DRT_VALID_FLAGS_MASK)) == 0);
142
143	if (db_look_token) {
144	    t = db_look_token;
145	    db_look_token = 0;
146	}
147	else
148	    t = db_lex(flags);
149	return (t);
150}
151
152db_expr_t	db_tok_number;
153char	db_tok_string[TOK_STRING_SIZE];
154
155db_expr_t	db_radix = 16;
156
157void
158db_flush_lex(void)
159{
160	db_flush_line();
161	db_look_token = 0;
162}
163
164static int
165db_lex(int flags)
166{
167	int	c, n, radix_mode;
168	bool	lex_wspace, lex_hex_numbers;
169
170	switch (flags & DRT_RADIX_MASK) {
171	case DRT_DEFAULT_RADIX:
172		radix_mode = -1;
173		break;
174	case DRT_OCTAL:
175		radix_mode = 8;
176		break;
177	case DRT_DECIMAL:
178		radix_mode = 10;
179		break;
180	case DRT_HEXADECIMAL:
181		radix_mode = 16;
182		break;
183	}
184
185	lex_wspace = ((flags & DRT_WSPACE) != 0);
186	lex_hex_numbers = ((flags & DRT_HEX) != 0);
187
188	c = db_read_char();
189	for (n = 0; c <= ' ' || c > '~'; n++) {
190	    if (c == '\n' || c == -1)
191		return (tEOL);
192	    c = db_read_char();
193	}
194	if (lex_wspace && n != 0) {
195	    db_unread_char(c);
196	    return (tWSPACE);
197	}
198
199	if ((c >= '0' && c <= '9') ||
200	   (lex_hex_numbers &&
201	   ((c >= 'a' && c <= 'f') ||
202	   (c >= 'A' && c <= 'F')))) {
203	    /* number */
204	    int	r, digit = 0;
205
206	    if (radix_mode != -1)
207		r = radix_mode;
208	    else if (c != '0')
209		r = db_radix;
210	    else {
211		c = db_read_char();
212		if (c == 'O' || c == 'o')
213		    r = 8;
214		else if (c == 'T' || c == 't')
215		    r = 10;
216		else if (c == 'X' || c == 'x')
217		    r = 16;
218		else {
219		    r = db_radix;
220		    db_unread_char(c);
221		}
222		c = db_read_char();
223	    }
224	    db_tok_number = 0;
225	    for (;;) {
226		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
227		    digit = c - '0';
228		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
229				     (c >= 'a' && c <= 'f'))) {
230		    if (c >= 'a')
231			digit = c - 'a' + 10;
232		    else if (c >= 'A')
233			digit = c - 'A' + 10;
234		}
235		else
236		    break;
237		db_tok_number = db_tok_number * r + digit;
238		c = db_read_char();
239	    }
240	    if ((c >= '0' && c <= '9') ||
241		(c >= 'A' && c <= 'Z') ||
242		(c >= 'a' && c <= 'z') ||
243		(c == '_'))
244	    {
245		db_error("Bad character in number\n");
246		db_flush_lex();
247		return (tEOF);
248	    }
249	    db_unread_char(c);
250	    return (tNUMBER);
251	}
252	if ((c >= 'A' && c <= 'Z') ||
253	    (c >= 'a' && c <= 'z') ||
254	    c == '_' || c == '\\')
255	{
256	    /* string */
257	    char *cp;
258
259	    cp = db_tok_string;
260	    if (c == '\\') {
261		c = db_read_char();
262		if (c == '\n' || c == -1)
263		    db_error("Bad escape\n");
264	    }
265	    *cp++ = c;
266	    while (1) {
267		c = db_read_char();
268		if ((c >= 'A' && c <= 'Z') ||
269		    (c >= 'a' && c <= 'z') ||
270		    (c >= '0' && c <= '9') ||
271		    c == '_' || c == '\\' || c == ':' || c == '.')
272		{
273		    if (c == '\\') {
274			c = db_read_char();
275			if (c == '\n' || c == -1)
276			    db_error("Bad escape\n");
277		    }
278		    *cp++ = c;
279		    if (cp == db_tok_string+sizeof(db_tok_string)) {
280			db_error("String too long\n");
281			db_flush_lex();
282			return (tEOF);
283		    }
284		    continue;
285		}
286		else {
287		    *cp = '\0';
288		    break;
289		}
290	    }
291	    db_unread_char(c);
292	    return (tIDENT);
293	}
294
295	switch (c) {
296	    case '+':
297		return (tPLUS);
298	    case '-':
299		return (tMINUS);
300	    case '.':
301		c = db_read_char();
302		if (c == '.')
303		    return (tDOTDOT);
304		db_unread_char(c);
305		return (tDOT);
306	    case '*':
307		return (tSTAR);
308	    case '/':
309		return (tSLASH);
310	    case '=':
311		c = db_read_char();
312		if (c == '=')
313		    return (tLOG_EQ);
314		db_unread_char(c);
315		return (tEQ);
316	    case '%':
317		return (tPCT);
318	    case '#':
319		return (tHASH);
320	    case '(':
321		return (tLPAREN);
322	    case ')':
323		return (tRPAREN);
324	    case ',':
325		return (tCOMMA);
326	    case '"':
327		return (tDITTO);
328	    case '$':
329		return (tDOLLAR);
330	    case '!':
331		c = db_read_char();
332		if (c == '='){
333			return (tLOG_NOT_EQ);
334		}
335		db_unread_char(c);
336		return (tEXCL);
337	    case ':':
338		c = db_read_char();
339		if (c == ':')
340			return (tCOLONCOLON);
341		db_unread_char(c);
342		return (tCOLON);
343	    case ';':
344		return (tSEMI);
345	    case '&':
346		c = db_read_char();
347		if (c == '&')
348		    return (tLOG_AND);
349		db_unread_char(c);
350		return (tBIT_AND);
351	    case '|':
352		c = db_read_char();
353		if (c == '|')
354		    return (tLOG_OR);
355		db_unread_char(c);
356		return (tBIT_OR);
357	    case '<':
358		c = db_read_char();
359		if (c == '<')
360		    return (tSHIFT_L);
361		if (c == '=')
362		    return (tLESS_EQ);
363		db_unread_char(c);
364		return (tLESS);
365	    case '>':
366		c = db_read_char();
367		if (c == '>')
368		    return (tSHIFT_R);
369		if (c == '=')
370		    return (tGREATER_EQ);
371		db_unread_char(c);
372		return (tGREATER);
373	    case '?':
374		return (tQUESTION);
375	    case '~':
376		return (tBIT_NOT);
377	    case -1:
378		return (tEOF);
379	}
380	db_printf("Bad character\n");
381	db_flush_lex();
382	return (tEOF);
383}
384