lex.h revision 290001
1117632Sharti/* 2117632Sharti * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") 3117632Sharti * Copyright (C) 1998-2002 Internet Software Consortium. 4117632Sharti * 5117632Sharti * Permission to use, copy, modify, and/or distribute this software for any 6117632Sharti * purpose with or without fee is hereby granted, provided that the above 7117632Sharti * copyright notice and this permission notice appear in all copies. 8117632Sharti * 9117632Sharti * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10117632Sharti * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11117632Sharti * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12117632Sharti * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13117632Sharti * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14117632Sharti * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15117632Sharti * PERFORMANCE OF THIS SOFTWARE. 16117632Sharti */ 17117632Sharti 18117632Sharti/* $Id: lex.h,v 1.37 2008/05/30 23:47:01 tbox Exp $ */ 19117632Sharti 20117632Sharti#ifndef ISC_LEX_H 21117632Sharti#define ISC_LEX_H 1 22117632Sharti 23117632Sharti/***** 24117632Sharti ***** Module Info 25117632Sharti *****/ 26117632Sharti 27117632Sharti/*! \file isc/lex.h 28117632Sharti * \brief The "lex" module provides a lightweight tokenizer. It can operate 29117632Sharti * on files or buffers, and can handle "include". It is designed for 30117632Sharti * parsing of DNS master files and the BIND configuration file, but 31117632Sharti * should be general enough to tokenize other things, e.g. HTTP. 32117632Sharti * 33117632Sharti * \li MP: 34117632Sharti * No synchronization is provided. Clients must ensure exclusive 35117632Sharti * access. 36117632Sharti * 37117632Sharti * \li Reliability: 38117632Sharti * No anticipated impact. 39117632Sharti * 40117632Sharti * \li Resources: 41117632Sharti * TBS 42117632Sharti * 43117632Sharti * \li Security: 44117632Sharti * No anticipated impact. 45117632Sharti * 46117632Sharti * \li Standards: 47117632Sharti * None. 48117632Sharti */ 49117632Sharti 50117632Sharti/*** 51117632Sharti *** Imports 52117632Sharti ***/ 53117632Sharti 54117632Sharti#include <stdio.h> 55117632Sharti 56117632Sharti#include <isc/lang.h> 57117632Sharti#include <isc/region.h> 58117632Sharti#include <isc/types.h> 59117632Sharti 60117632ShartiISC_LANG_BEGINDECLS 61117632Sharti 62117632Sharti/*** 63117632Sharti *** Options 64117632Sharti ***/ 65117632Sharti 66117632Sharti/*@{*/ 67117632Sharti/*! 68117632Sharti * Various options for isc_lex_gettoken(). 69117632Sharti */ 70117632Sharti 71117632Sharti#define ISC_LEXOPT_EOL 0x01 /*%< Want end-of-line token. */ 72117632Sharti#define ISC_LEXOPT_EOF 0x02 /*%< Want end-of-file token. */ 73117632Sharti#define ISC_LEXOPT_INITIALWS 0x04 /*%< Want initial whitespace. */ 74117632Sharti#define ISC_LEXOPT_NUMBER 0x08 /*%< Recognize numbers. */ 75117632Sharti#define ISC_LEXOPT_QSTRING 0x10 /*%< Recognize qstrings. */ 76117632Sharti/*@}*/ 77117632Sharti 78117632Sharti/*@{*/ 79117632Sharti/*! 80117632Sharti * The ISC_LEXOPT_DNSMULTILINE option handles the processing of '(' and ')' in 81117632Sharti * the DNS master file format. If this option is set, then the 82117632Sharti * ISC_LEXOPT_INITIALWS and ISC_LEXOPT_EOL options will be ignored when 83117632Sharti * the paren count is > 0. To use this option, '(' and ')' must be special 84117632Sharti * characters. 85117632Sharti */ 86117632Sharti#define ISC_LEXOPT_DNSMULTILINE 0x20 /*%< Handle '(' and ')'. */ 87117632Sharti#define ISC_LEXOPT_NOMORE 0x40 /*%< Want "no more" token. */ 88117632Sharti 89117632Sharti#define ISC_LEXOPT_CNUMBER 0x80 /*%< Recognize octal and hex. */ 90117632Sharti#define ISC_LEXOPT_ESCAPE 0x100 /*%< Recognize escapes. */ 91117632Sharti#define ISC_LEXOPT_QSTRINGMULTILINE 0x200 /*%< Allow multiline "" strings */ 92117632Sharti#define ISC_LEXOPT_OCTAL 0x400 /*%< Expect a octal number. */ 93117632Sharti/*@}*/ 94117632Sharti/*@{*/ 95117632Sharti/*! 96117632Sharti * Various commenting styles, which may be changed at any time with 97117632Sharti * isc_lex_setcomments(). 98117632Sharti */ 99117632Sharti 100117632Sharti#define ISC_LEXCOMMENT_C 0x01 101117632Sharti#define ISC_LEXCOMMENT_CPLUSPLUS 0x02 102117632Sharti#define ISC_LEXCOMMENT_SHELL 0x04 103117632Sharti#define ISC_LEXCOMMENT_DNSMASTERFILE 0x08 104117632Sharti/*@}*/ 105117632Sharti 106117632Sharti/*** 107117632Sharti *** Types 108117632Sharti ***/ 109117632Sharti 110117632Sharti/*! Lex */ 111117632Sharti 112117632Shartitypedef char isc_lexspecials_t[256]; 113117632Sharti 114117632Sharti/* Tokens */ 115117632Sharti 116117632Shartitypedef enum { 117117632Sharti isc_tokentype_unknown = 0, 118117632Sharti isc_tokentype_string = 1, 119117632Sharti isc_tokentype_number = 2, 120117632Sharti isc_tokentype_qstring = 3, 121117632Sharti isc_tokentype_eol = 4, 122117632Sharti isc_tokentype_eof = 5, 123117632Sharti isc_tokentype_initialws = 6, 124117632Sharti isc_tokentype_special = 7, 125117632Sharti isc_tokentype_nomore = 8 126117632Sharti} isc_tokentype_t; 127117632Sharti 128117632Shartitypedef union { 129117632Sharti char as_char; 130117632Sharti unsigned long as_ulong; 131117632Sharti isc_region_t as_region; 132117632Sharti isc_textregion_t as_textregion; 133117632Sharti void * as_pointer; 134117632Sharti} isc_tokenvalue_t; 135117632Sharti 136117632Shartitypedef struct isc_token { 137117632Sharti isc_tokentype_t type; 138117632Sharti isc_tokenvalue_t value; 139117632Sharti} isc_token_t; 140117632Sharti 141117632Sharti/*** 142117632Sharti *** Functions 143117632Sharti ***/ 144117632Sharti 145117632Shartiisc_result_t 146117632Shartiisc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp); 147117632Sharti/*%< 148117632Sharti * Create a lexer. 149117632Sharti * 150117632Sharti * 'max_token' is a hint of the number of bytes in the largest token. 151117632Sharti * 152117632Sharti * Requires: 153117632Sharti *\li '*lexp' is a valid lexer. 154117632Sharti * 155117632Sharti *\li max_token > 0. 156117632Sharti * 157117632Sharti * Ensures: 158117632Sharti *\li On success, *lexp is attached to the newly created lexer. 159117632Sharti * 160117632Sharti * Returns: 161117632Sharti *\li #ISC_R_SUCCESS 162117632Sharti *\li #ISC_R_NOMEMORY 163117632Sharti */ 164117632Sharti 165117632Shartivoid 166117632Shartiisc_lex_destroy(isc_lex_t **lexp); 167117632Sharti/*%< 168117632Sharti * Destroy the lexer. 169117632Sharti * 170117632Sharti * Requires: 171117632Sharti *\li '*lexp' is a valid lexer. 172117632Sharti * 173117632Sharti * Ensures: 174117632Sharti *\li *lexp == NULL 175117632Sharti */ 176117632Sharti 177117632Shartiunsigned int 178117632Shartiisc_lex_getcomments(isc_lex_t *lex); 179117632Sharti/*%< 180117632Sharti * Return the current lexer commenting styles. 181117632Sharti * 182117632Sharti * Requires: 183117632Sharti *\li 'lex' is a valid lexer. 184117632Sharti * 185117632Sharti * Returns: 186117632Sharti *\li The commenting sytles which are currently allowed. 187117632Sharti */ 188117632Sharti 189117632Shartivoid 190117632Shartiisc_lex_setcomments(isc_lex_t *lex, unsigned int comments); 191117632Sharti/*%< 192117632Sharti * Set allowed lexer commenting styles. 193117632Sharti * 194117632Sharti * Requires: 195117632Sharti *\li 'lex' is a valid lexer. 196117632Sharti * 197117632Sharti *\li 'comments' has meaningful values. 198117632Sharti */ 199117632Sharti 200117632Shartivoid 201117632Shartiisc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials); 202117632Sharti/*%< 203117632Sharti * Put the current list of specials into 'specials'. 204117632Sharti * 205117632Sharti * Requires: 206117632Sharti *\li 'lex' is a valid lexer. 207117632Sharti */ 208117632Sharti 209117632Shartivoid 210117632Shartiisc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials); 211117632Sharti/*!< 212117632Sharti * The characters in 'specials' are returned as tokens. Along with 213117632Sharti * whitespace, they delimit strings and numbers. 214117632Sharti * 215117632Sharti * Note: 216117632Sharti *\li Comment processing takes precedence over special character 217117632Sharti * recognition. 218117632Sharti * 219117632Sharti * Requires: 220117632Sharti *\li 'lex' is a valid lexer. 221117632Sharti */ 222117632Sharti 223117632Shartiisc_result_t 224117632Shartiisc_lex_openfile(isc_lex_t *lex, const char *filename); 225117632Sharti/*%< 226117632Sharti * Open 'filename' and make it the current input source for 'lex'. 227117632Sharti * 228117632Sharti * Requires: 229117632Sharti *\li 'lex' is a valid lexer. 230117632Sharti * 231117632Sharti *\li filename is a valid C string. 232117632Sharti * 233117632Sharti * Returns: 234117632Sharti *\li #ISC_R_SUCCESS 235117632Sharti *\li #ISC_R_NOMEMORY Out of memory 236117632Sharti *\li #ISC_R_NOTFOUND File not found 237117632Sharti *\li #ISC_R_NOPERM No permission to open file 238117632Sharti *\li #ISC_R_FAILURE Couldn't open file, not sure why 239117632Sharti *\li #ISC_R_UNEXPECTED 240117632Sharti */ 241117632Sharti 242117632Shartiisc_result_t 243117632Shartiisc_lex_openstream(isc_lex_t *lex, FILE *stream); 244117632Sharti/*%< 245117632Sharti * Make 'stream' the current input source for 'lex'. 246117632Sharti * 247117632Sharti * Requires: 248117632Sharti *\li 'lex' is a valid lexer. 249117632Sharti * 250117632Sharti *\li 'stream' is a valid C stream. 251117632Sharti * 252117632Sharti * Returns: 253117632Sharti *\li #ISC_R_SUCCESS 254117632Sharti *\li #ISC_R_NOMEMORY Out of memory 255117632Sharti */ 256117632Sharti 257117632Shartiisc_result_t 258117632Shartiisc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer); 259117632Sharti/*%< 260117632Sharti * Make 'buffer' the current input source for 'lex'. 261117632Sharti * 262117632Sharti * Requires: 263117632Sharti *\li 'lex' is a valid lexer. 264117632Sharti * 265117632Sharti *\li 'buffer' is a valid buffer. 266117632Sharti * 267117632Sharti * Returns: 268117632Sharti *\li #ISC_R_SUCCESS 269117632Sharti *\li #ISC_R_NOMEMORY Out of memory 270117632Sharti */ 271117632Sharti 272117632Shartiisc_result_t 273117632Shartiisc_lex_close(isc_lex_t *lex); 274117632Sharti/*%< 275117632Sharti * Close the most recently opened object (i.e. file or buffer). 276117632Sharti * 277117632Sharti * Returns: 278117632Sharti *\li #ISC_R_SUCCESS 279117632Sharti *\li #ISC_R_NOMORE No more input sources 280117632Sharti */ 281117632Sharti 282117632Shartiisc_result_t 283117632Shartiisc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp); 284117632Sharti/*%< 285117632Sharti * Get the next token. 286117632Sharti * 287117632Sharti * Requires: 288117632Sharti *\li 'lex' is a valid lexer. 289117632Sharti * 290117632Sharti *\li 'lex' has an input source. 291117632Sharti * 292117632Sharti *\li 'options' contains valid options. 293117632Sharti * 294117632Sharti *\li '*tokenp' is a valid pointer. 295117632Sharti * 296117632Sharti * Returns: 297117632Sharti *\li #ISC_R_SUCCESS 298117632Sharti *\li #ISC_R_UNEXPECTEDEND 299117632Sharti *\li #ISC_R_NOMEMORY 300117632Sharti * 301117632Sharti * These two results are returned only if their corresponding lexer 302117632Sharti * options are not set. 303117632Sharti * 304117632Sharti *\li #ISC_R_EOF End of input source 305117632Sharti *\li #ISC_R_NOMORE No more input sources 306117632Sharti */ 307117632Sharti 308117632Shartiisc_result_t 309117632Shartiisc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token, 310117632Sharti isc_tokentype_t expect, isc_boolean_t eol); 311117632Sharti/*%< 312117632Sharti * Get the next token from a DNS master file type stream. This is a 313117632Sharti * convenience function that sets appropriate options and handles quoted 314117632Sharti * strings and end of line correctly for master files. It also ungets 315117632Sharti * unexpected tokens. 316117632Sharti * 317117632Sharti * Requires: 318117632Sharti *\li 'lex' is a valid lexer. 319117632Sharti * 320117632Sharti *\li 'token' is a valid pointer 321117632Sharti * 322117632Sharti * Returns: 323117632Sharti * 324117632Sharti * \li any return code from isc_lex_gettoken(). 325117632Sharti */ 326117632Sharti 327117632Shartiisc_result_t 328117632Shartiisc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol); 329117632Sharti/*%< 330117632Sharti * Get the next token from a DNS master file type stream. This is a 331117632Sharti * convenience function that sets appropriate options and handles end 332117632Sharti * of line correctly for master files. It also ungets unexpected tokens. 333117632Sharti * 334117632Sharti * Requires: 335117632Sharti *\li 'lex' is a valid lexer. 336117632Sharti * 337117632Sharti *\li 'token' is a valid pointer 338117632Sharti * 339117632Sharti * Returns: 340117632Sharti * 341117632Sharti * \li any return code from isc_lex_gettoken(). 342117632Sharti */ 343117632Sharti 344117632Shartivoid 345117632Shartiisc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp); 346117632Sharti/*%< 347117632Sharti * Unget the current token. 348117632Sharti * 349117632Sharti * Requires: 350117632Sharti *\li 'lex' is a valid lexer. 351117632Sharti * 352117632Sharti *\li 'lex' has an input source. 353117632Sharti * 354117632Sharti *\li 'tokenp' points to a valid token. 355117632Sharti * 356117632Sharti *\li There is no ungotten token already. 357117632Sharti */ 358117632Sharti 359117632Shartivoid 360117632Shartiisc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r); 361117632Sharti/*%< 362117632Sharti * Returns a region containing the text of the last token returned. 363117632Sharti * 364117632Sharti * Requires: 365117632Sharti *\li 'lex' is a valid lexer. 366117632Sharti * 367117632Sharti *\li 'lex' has an input source. 368117632Sharti * 369117632Sharti *\li 'tokenp' points to a valid token. 370117632Sharti * 371117632Sharti *\li A token has been gotten and not ungotten. 372117632Sharti */ 373117632Sharti 374117632Shartichar * 375117632Shartiisc_lex_getsourcename(isc_lex_t *lex); 376117632Sharti/*%< 377117632Sharti * Return the input source name. 378117632Sharti * 379117632Sharti * Requires: 380117632Sharti *\li 'lex' is a valid lexer. 381117632Sharti * 382117632Sharti * Returns: 383117632Sharti * \li source name or NULL if no current source. 384117632Sharti *\li result valid while current input source exists. 385117632Sharti */ 386117632Sharti 387117632Sharti 388117632Shartiunsigned long 389117632Shartiisc_lex_getsourceline(isc_lex_t *lex); 390117632Sharti/*%< 391117632Sharti * Return the input source line number. 392117632Sharti * 393117632Sharti * Requires: 394117632Sharti *\li 'lex' is a valid lexer. 395117632Sharti * 396117632Sharti * Returns: 397117632Sharti *\li Current line number or 0 if no current source. 398117632Sharti */ 399117632Sharti 400117632Shartiisc_result_t 401117632Shartiisc_lex_setsourcename(isc_lex_t *lex, const char *name); 402117632Sharti/*%< 403117632Sharti * Assigns a new name to the input source. 404117632Sharti * 405117632Sharti * Requires: 406117632Sharti * 407117632Sharti * \li 'lex' is a valid lexer. 408117632Sharti * 409117632Sharti * Returns: 410117632Sharti * \li #ISC_R_SUCCESS 411117632Sharti * \li #ISC_R_NOMEMORY 412117632Sharti * \li #ISC_R_NOTFOUND - there are no sources. 413117632Sharti */ 414117632Sharti 415117632Shartiisc_boolean_t 416117632Shartiisc_lex_isfile(isc_lex_t *lex); 417117632Sharti/*%< 418117632Sharti * Return whether the current input source is a file. 419117632Sharti * 420117632Sharti * Requires: 421117632Sharti *\li 'lex' is a valid lexer. 422117632Sharti * 423117632Sharti * Returns: 424117872Sharti * \li #ISC_TRUE if the current input is a file, 425117632Sharti *\li #ISC_FALSE otherwise. 426117632Sharti */ 427117632Sharti 428117632Sharti 429117632ShartiISC_LANG_ENDDECLS 430117632Sharti 431117632Sharti#endif /* ISC_LEX_H */ 432117632Sharti