lex.h revision 290001
1117632Sharti/*
2117632Sharti * Copyright (C) 2004, 2005, 2007, 2008  Internet Systems Consortium, Inc. ("ISC")
3117632Sharti * Copyright (C) 1998-2002  Internet Software Consortium.
4117632Sharti *
5117632Sharti * Permission to use, copy, modify, and/or distribute this software for any
6117632Sharti * purpose with or without fee is hereby granted, provided that the above
7117632Sharti * copyright notice and this permission notice appear in all copies.
8117632Sharti *
9117632Sharti * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10117632Sharti * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11117632Sharti * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12117632Sharti * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13117632Sharti * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14117632Sharti * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15117632Sharti * PERFORMANCE OF THIS SOFTWARE.
16117632Sharti */
17117632Sharti
18117632Sharti/* $Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp $ */
19117632Sharti
20117632Sharti#ifndef ISC_LEX_H
21117632Sharti#define ISC_LEX_H 1
22117632Sharti
23117632Sharti/*****
24117632Sharti ***** Module Info
25117632Sharti *****/
26117632Sharti
27117632Sharti/*! \file isc/lex.h
28117632Sharti * \brief The "lex" module provides a lightweight tokenizer.  It can operate
29117632Sharti * on files or buffers, and can handle "include".  It is designed for
30117632Sharti * parsing of DNS master files and the BIND configuration file, but
31117632Sharti * should be general enough to tokenize other things, e.g. HTTP.
32117632Sharti *
33117632Sharti * \li MP:
34117632Sharti *	No synchronization is provided.  Clients must ensure exclusive
35117632Sharti *	access.
36117632Sharti *
37117632Sharti * \li Reliability:
38117632Sharti *	No anticipated impact.
39117632Sharti *
40117632Sharti * \li Resources:
41117632Sharti *	TBS
42117632Sharti *
43117632Sharti * \li Security:
44117632Sharti *	No anticipated impact.
45117632Sharti *
46117632Sharti * \li Standards:
47117632Sharti * 	None.
48117632Sharti */
49117632Sharti
50117632Sharti/***
51117632Sharti *** Imports
52117632Sharti ***/
53117632Sharti
54117632Sharti#include <stdio.h>
55117632Sharti
56117632Sharti#include <isc/lang.h>
57117632Sharti#include <isc/region.h>
58117632Sharti#include <isc/types.h>
59117632Sharti
60117632ShartiISC_LANG_BEGINDECLS
61117632Sharti
62117632Sharti/***
63117632Sharti *** Options
64117632Sharti ***/
65117632Sharti
66117632Sharti/*@{*/
67117632Sharti/*!
68117632Sharti * Various options for isc_lex_gettoken().
69117632Sharti */
70117632Sharti
71117632Sharti#define ISC_LEXOPT_EOL			0x01	/*%< Want end-of-line token. */
72117632Sharti#define ISC_LEXOPT_EOF			0x02	/*%< Want end-of-file token. */
73117632Sharti#define ISC_LEXOPT_INITIALWS		0x04	/*%< Want initial whitespace. */
74117632Sharti#define ISC_LEXOPT_NUMBER		0x08	/*%< Recognize numbers. */
75117632Sharti#define ISC_LEXOPT_QSTRING		0x10	/*%< Recognize qstrings. */
76117632Sharti/*@}*/
77117632Sharti
78117632Sharti/*@{*/
79117632Sharti/*!
80117632Sharti * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in
81117632Sharti * the DNS master file format.  If this option is set, then the
82117632Sharti * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when
83117632Sharti * the paren count is > 0.  To use this option, '(' and ')' must be special
84117632Sharti * characters.
85117632Sharti */
86117632Sharti#define ISC_LEXOPT_DNSMULTILINE		0x20	/*%< Handle '(' and ')'. */
87117632Sharti#define ISC_LEXOPT_NOMORE		0x40	/*%< Want "no more" token. */
88117632Sharti
89117632Sharti#define ISC_LEXOPT_CNUMBER		0x80    /*%< Recognize octal and hex. */
90117632Sharti#define ISC_LEXOPT_ESCAPE		0x100	/*%< Recognize escapes. */
91117632Sharti#define ISC_LEXOPT_QSTRINGMULTILINE	0x200	/*%< Allow multiline "" strings */
92117632Sharti#define ISC_LEXOPT_OCTAL		0x400	/*%< Expect a octal number. */
93117632Sharti/*@}*/
94117632Sharti/*@{*/
95117632Sharti/*!
96117632Sharti * Various commenting styles, which may be changed at any time with
97117632Sharti * isc_lex_setcomments().
98117632Sharti */
99117632Sharti
100117632Sharti#define ISC_LEXCOMMENT_C		0x01
101117632Sharti#define ISC_LEXCOMMENT_CPLUSPLUS	0x02
102117632Sharti#define ISC_LEXCOMMENT_SHELL		0x04
103117632Sharti#define ISC_LEXCOMMENT_DNSMASTERFILE	0x08
104117632Sharti/*@}*/
105117632Sharti
106117632Sharti/***
107117632Sharti *** Types
108117632Sharti ***/
109117632Sharti
110117632Sharti/*! Lex */
111117632Sharti
112117632Shartitypedef char isc_lexspecials_t[256];
113117632Sharti
114117632Sharti/* Tokens */
115117632Sharti
116117632Shartitypedef enum {
117117632Sharti	isc_tokentype_unknown = 0,
118117632Sharti	isc_tokentype_string = 1,
119117632Sharti	isc_tokentype_number = 2,
120117632Sharti	isc_tokentype_qstring = 3,
121117632Sharti	isc_tokentype_eol = 4,
122117632Sharti	isc_tokentype_eof = 5,
123117632Sharti	isc_tokentype_initialws = 6,
124117632Sharti	isc_tokentype_special = 7,
125117632Sharti	isc_tokentype_nomore = 8
126117632Sharti} isc_tokentype_t;
127117632Sharti
128117632Shartitypedef union {
129117632Sharti	char				as_char;
130117632Sharti	unsigned long			as_ulong;
131117632Sharti	isc_region_t			as_region;
132117632Sharti	isc_textregion_t		as_textregion;
133117632Sharti	void *				as_pointer;
134117632Sharti} isc_tokenvalue_t;
135117632Sharti
136117632Shartitypedef struct isc_token {
137117632Sharti	isc_tokentype_t			type;
138117632Sharti	isc_tokenvalue_t		value;
139117632Sharti} isc_token_t;
140117632Sharti
141117632Sharti/***
142117632Sharti *** Functions
143117632Sharti ***/
144117632Sharti
145117632Shartiisc_result_t
146117632Shartiisc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp);
147117632Sharti/*%<
148117632Sharti * Create a lexer.
149117632Sharti *
150117632Sharti * 'max_token' is a hint of the number of bytes in the largest token.
151117632Sharti *
152117632Sharti * Requires:
153117632Sharti *\li	'*lexp' is a valid lexer.
154117632Sharti *
155117632Sharti *\li	max_token > 0.
156117632Sharti *
157117632Sharti * Ensures:
158117632Sharti *\li	On success, *lexp is attached to the newly created lexer.
159117632Sharti *
160117632Sharti * Returns:
161117632Sharti *\li	#ISC_R_SUCCESS
162117632Sharti *\li	#ISC_R_NOMEMORY
163117632Sharti */
164117632Sharti
165117632Shartivoid
166117632Shartiisc_lex_destroy(isc_lex_t **lexp);
167117632Sharti/*%<
168117632Sharti * Destroy the lexer.
169117632Sharti *
170117632Sharti * Requires:
171117632Sharti *\li	'*lexp' is a valid lexer.
172117632Sharti *
173117632Sharti * Ensures:
174117632Sharti *\li	*lexp == NULL
175117632Sharti */
176117632Sharti
177117632Shartiunsigned int
178117632Shartiisc_lex_getcomments(isc_lex_t *lex);
179117632Sharti/*%<
180117632Sharti * Return the current lexer commenting styles.
181117632Sharti *
182117632Sharti * Requires:
183117632Sharti *\li	'lex' is a valid lexer.
184117632Sharti *
185117632Sharti * Returns:
186117632Sharti *\li	The commenting sytles which are currently allowed.
187117632Sharti */
188117632Sharti
189117632Shartivoid
190117632Shartiisc_lex_setcomments(isc_lex_t *lex, unsigned int comments);
191117632Sharti/*%<
192117632Sharti * Set allowed lexer commenting styles.
193117632Sharti *
194117632Sharti * Requires:
195117632Sharti *\li	'lex' is a valid lexer.
196117632Sharti *
197117632Sharti *\li	'comments' has meaningful values.
198117632Sharti */
199117632Sharti
200117632Shartivoid
201117632Shartiisc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials);
202117632Sharti/*%<
203117632Sharti * Put the current list of specials into 'specials'.
204117632Sharti *
205117632Sharti * Requires:
206117632Sharti *\li	'lex' is a valid lexer.
207117632Sharti */
208117632Sharti
209117632Shartivoid
210117632Shartiisc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials);
211117632Sharti/*!<
212117632Sharti * The characters in 'specials' are returned as tokens.  Along with
213117632Sharti * whitespace, they delimit strings and numbers.
214117632Sharti *
215117632Sharti * Note:
216117632Sharti *\li	Comment processing takes precedence over special character
217117632Sharti *	recognition.
218117632Sharti *
219117632Sharti * Requires:
220117632Sharti *\li	'lex' is a valid lexer.
221117632Sharti */
222117632Sharti
223117632Shartiisc_result_t
224117632Shartiisc_lex_openfile(isc_lex_t *lex, const char *filename);
225117632Sharti/*%<
226117632Sharti * Open 'filename' and make it the current input source for 'lex'.
227117632Sharti *
228117632Sharti * Requires:
229117632Sharti *\li	'lex' is a valid lexer.
230117632Sharti *
231117632Sharti *\li	filename is a valid C string.
232117632Sharti *
233117632Sharti * Returns:
234117632Sharti *\li	#ISC_R_SUCCESS
235117632Sharti *\li	#ISC_R_NOMEMORY			Out of memory
236117632Sharti *\li	#ISC_R_NOTFOUND			File not found
237117632Sharti *\li	#ISC_R_NOPERM			No permission to open file
238117632Sharti *\li	#ISC_R_FAILURE			Couldn't open file, not sure why
239117632Sharti *\li	#ISC_R_UNEXPECTED
240117632Sharti */
241117632Sharti
242117632Shartiisc_result_t
243117632Shartiisc_lex_openstream(isc_lex_t *lex, FILE *stream);
244117632Sharti/*%<
245117632Sharti * Make 'stream' the current input source for 'lex'.
246117632Sharti *
247117632Sharti * Requires:
248117632Sharti *\li	'lex' is a valid lexer.
249117632Sharti *
250117632Sharti *\li	'stream' is a valid C stream.
251117632Sharti *
252117632Sharti * Returns:
253117632Sharti *\li	#ISC_R_SUCCESS
254117632Sharti *\li	#ISC_R_NOMEMORY			Out of memory
255117632Sharti */
256117632Sharti
257117632Shartiisc_result_t
258117632Shartiisc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer);
259117632Sharti/*%<
260117632Sharti * Make 'buffer' the current input source for 'lex'.
261117632Sharti *
262117632Sharti * Requires:
263117632Sharti *\li	'lex' is a valid lexer.
264117632Sharti *
265117632Sharti *\li	'buffer' is a valid buffer.
266117632Sharti *
267117632Sharti * Returns:
268117632Sharti *\li	#ISC_R_SUCCESS
269117632Sharti *\li	#ISC_R_NOMEMORY			Out of memory
270117632Sharti */
271117632Sharti
272117632Shartiisc_result_t
273117632Shartiisc_lex_close(isc_lex_t *lex);
274117632Sharti/*%<
275117632Sharti * Close the most recently opened object (i.e. file or buffer).
276117632Sharti *
277117632Sharti * Returns:
278117632Sharti *\li	#ISC_R_SUCCESS
279117632Sharti *\li	#ISC_R_NOMORE			No more input sources
280117632Sharti */
281117632Sharti
282117632Shartiisc_result_t
283117632Shartiisc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp);
284117632Sharti/*%<
285117632Sharti * Get the next token.
286117632Sharti *
287117632Sharti * Requires:
288117632Sharti *\li	'lex' is a valid lexer.
289117632Sharti *
290117632Sharti *\li	'lex' has an input source.
291117632Sharti *
292117632Sharti *\li	'options' contains valid options.
293117632Sharti *
294117632Sharti *\li	'*tokenp' is a valid pointer.
295117632Sharti *
296117632Sharti * Returns:
297117632Sharti *\li	#ISC_R_SUCCESS
298117632Sharti *\li	#ISC_R_UNEXPECTEDEND
299117632Sharti *\li	#ISC_R_NOMEMORY
300117632Sharti *
301117632Sharti *	These two results are returned only if their corresponding lexer
302117632Sharti *	options are not set.
303117632Sharti *
304117632Sharti *\li	#ISC_R_EOF			End of input source
305117632Sharti *\li	#ISC_R_NOMORE			No more input sources
306117632Sharti */
307117632Sharti
308117632Shartiisc_result_t
309117632Shartiisc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
310117632Sharti		       isc_tokentype_t expect, isc_boolean_t eol);
311117632Sharti/*%<
312117632Sharti * Get the next token from a DNS master file type stream.  This is a
313117632Sharti * convenience function that sets appropriate options and handles quoted
314117632Sharti * strings and end of line correctly for master files.  It also ungets
315117632Sharti * unexpected tokens.
316117632Sharti *
317117632Sharti * Requires:
318117632Sharti *\li	'lex' is a valid lexer.
319117632Sharti *
320117632Sharti *\li	'token' is a valid pointer
321117632Sharti *
322117632Sharti * Returns:
323117632Sharti *
324117632Sharti * \li	any return code from isc_lex_gettoken().
325117632Sharti */
326117632Sharti
327117632Shartiisc_result_t
328117632Shartiisc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol);
329117632Sharti/*%<
330117632Sharti * Get the next token from a DNS master file type stream.  This is a
331117632Sharti * convenience function that sets appropriate options and handles end
332117632Sharti * of line correctly for master files.  It also ungets unexpected tokens.
333117632Sharti *
334117632Sharti * Requires:
335117632Sharti *\li	'lex' is a valid lexer.
336117632Sharti *
337117632Sharti *\li	'token' is a valid pointer
338117632Sharti *
339117632Sharti * Returns:
340117632Sharti *
341117632Sharti * \li	any return code from isc_lex_gettoken().
342117632Sharti */
343117632Sharti
344117632Shartivoid
345117632Shartiisc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp);
346117632Sharti/*%<
347117632Sharti * Unget the current token.
348117632Sharti *
349117632Sharti * Requires:
350117632Sharti *\li	'lex' is a valid lexer.
351117632Sharti *
352117632Sharti *\li	'lex' has an input source.
353117632Sharti *
354117632Sharti *\li	'tokenp' points to a valid token.
355117632Sharti *
356117632Sharti *\li	There is no ungotten token already.
357117632Sharti */
358117632Sharti
359117632Shartivoid
360117632Shartiisc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r);
361117632Sharti/*%<
362117632Sharti * Returns a region containing the text of the last token returned.
363117632Sharti *
364117632Sharti * Requires:
365117632Sharti *\li	'lex' is a valid lexer.
366117632Sharti *
367117632Sharti *\li	'lex' has an input source.
368117632Sharti *
369117632Sharti *\li	'tokenp' points to a valid token.
370117632Sharti *
371117632Sharti *\li	A token has been gotten and not ungotten.
372117632Sharti */
373117632Sharti
374117632Shartichar *
375117632Shartiisc_lex_getsourcename(isc_lex_t *lex);
376117632Sharti/*%<
377117632Sharti * Return the input source name.
378117632Sharti *
379117632Sharti * Requires:
380117632Sharti *\li	'lex' is a valid lexer.
381117632Sharti *
382117632Sharti * Returns:
383117632Sharti * \li	source name or NULL if no current source.
384117632Sharti *\li	result valid while current input source exists.
385117632Sharti */
386117632Sharti
387117632Sharti
388117632Shartiunsigned long
389117632Shartiisc_lex_getsourceline(isc_lex_t *lex);
390117632Sharti/*%<
391117632Sharti * Return the input source line number.
392117632Sharti *
393117632Sharti * Requires:
394117632Sharti *\li	'lex' is a valid lexer.
395117632Sharti *
396117632Sharti * Returns:
397117632Sharti *\li 	Current line number or 0 if no current source.
398117632Sharti */
399117632Sharti
400117632Shartiisc_result_t
401117632Shartiisc_lex_setsourcename(isc_lex_t *lex, const char *name);
402117632Sharti/*%<
403117632Sharti * Assigns a new name to the input source.
404117632Sharti *
405117632Sharti * Requires:
406117632Sharti *
407117632Sharti * \li	'lex' is a valid lexer.
408117632Sharti *
409117632Sharti * Returns:
410117632Sharti * \li	#ISC_R_SUCCESS
411117632Sharti * \li	#ISC_R_NOMEMORY
412117632Sharti * \li	#ISC_R_NOTFOUND - there are no sources.
413117632Sharti */
414117632Sharti
415117632Shartiisc_boolean_t
416117632Shartiisc_lex_isfile(isc_lex_t *lex);
417117632Sharti/*%<
418117632Sharti * Return whether the current input source is a file.
419117632Sharti *
420117632Sharti * Requires:
421117632Sharti *\li	'lex' is a valid lexer.
422117632Sharti *
423117632Sharti * Returns:
424117872Sharti * \li	#ISC_TRUE if the current input is a file,
425117632Sharti *\li	#ISC_FALSE otherwise.
426117632Sharti */
427117632Sharti
428117632Sharti
429117632ShartiISC_LANG_ENDDECLS
430117632Sharti
431117632Sharti#endif /* ISC_LEX_H */
432117632Sharti