parse.c revision 269257
144743Smarkm/*
244743Smarkm * a generic (simple) parser. Use to parse rr's, private key
344743Smarkm * information and /etc/resolv.conf files
444743Smarkm *
556977Sshin * a Net::DNS like library for C
656977Sshin * LibDNS Team @ NLnet Labs
744743Smarkm * (c) NLnet Labs, 2005-2006
844743Smarkm * See the file LICENSE for the license
944743Smarkm */
1044743Smarkm#include <ldns/config.h>
11277281Spfg#include <ldns/ldns.h>
1244743Smarkm
1344743Smarkm#include <limits.h>
1444743Smarkm#include <strings.h>
1544743Smarkm
1656977Sshinldns_lookup_table ldns_directive_types[] = {
1756977Sshin        { LDNS_DIR_TTL, "$TTL" },
1856977Sshin        { LDNS_DIR_ORIGIN, "$ORIGIN" },
1944743Smarkm        { LDNS_DIR_INCLUDE, "$INCLUDE" },
2056977Sshin        { 0, NULL }
2144743Smarkm};
2244743Smarkm
2344743Smarkm/* add max_limit here? */
2444743Smarkmssize_t
2544743Smarkmldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
2644743Smarkm{
2744743Smarkm	return ldns_fget_token_l(f, token, delim, limit, NULL);
2844743Smarkm}
2944743Smarkm
3044743Smarkmssize_t
3144743Smarkmldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
3244743Smarkm{
3344743Smarkm	int c, prev_c;
34277281Spfg	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
35277281Spfg	int com, quoted;
36277281Spfg	char *t;
37277281Spfg	size_t i;
3844743Smarkm	const char *d;
3944743Smarkm	const char *del;
4044743Smarkm
4144743Smarkm	/* standard delimeters */
4244743Smarkm	if (!delim) {
43277281Spfg		/* from isspace(3) */
4444743Smarkm		del = LDNS_PARSE_NORMAL;
45277281Spfg	} else {
46277281Spfg		del = delim;
47277281Spfg	}
48277281Spfg
4944743Smarkm	p = 0;
5044743Smarkm	i = 0;
5144743Smarkm	com = 0;
5244743Smarkm	quoted = 0;
5344743Smarkm	prev_c = 0;
5444743Smarkm	t = token;
5544743Smarkm	if (del[0] == '"') {
5644743Smarkm		quoted = 1;
57277281Spfg	}
58277281Spfg	while ((c = getc(f)) != EOF) {
5944743Smarkm		if (c == '\r') /* carriage return */
6044743Smarkm			c = ' ';
6144743Smarkm		if (c == '(' && prev_c != '\\' && !quoted) {
6244743Smarkm			/* this only counts for non-comments */
63277281Spfg			if (com == 0) {
6444743Smarkm				p++;
65277281Spfg			}
6644743Smarkm			prev_c = c;
6744743Smarkm			continue;
6844743Smarkm		}
6944743Smarkm
7044743Smarkm		if (c == ')' && prev_c != '\\' && !quoted) {
7144743Smarkm			/* this only counts for non-comments */
72277281Spfg			if (com == 0) {
7344743Smarkm				p--;
7444743Smarkm			}
7544743Smarkm			prev_c = c;
76277281Spfg			continue;
7744743Smarkm		}
7844743Smarkm
7944743Smarkm		if (p < 0) {
8044743Smarkm			/* more ) then ( - close off the string */
8144743Smarkm			*t = '\0';
8244743Smarkm			return 0;
8344743Smarkm		}
8444743Smarkm
8544743Smarkm		/* do something with comments ; */
8644743Smarkm		if (c == ';' && quoted == 0) {
8744743Smarkm			if (prev_c != '\\') {
8844743Smarkm				com = 1;
8944743Smarkm			}
9044743Smarkm		}
9144743Smarkm		if (c == '\"' && com == 0 && prev_c != '\\') {
9244743Smarkm			quoted = 1 - quoted;
9344743Smarkm		}
9444743Smarkm
9544743Smarkm		if (c == '\n' && com != 0) {
9644743Smarkm			/* comments */
9744743Smarkm			com = 0;
9844743Smarkm			*t = ' ';
9944743Smarkm			if (line_nr) {
10044743Smarkm				*line_nr = *line_nr + 1;
10144743Smarkm			}
10244743Smarkm			if (p == 0 && i > 0) {
10344743Smarkm				goto tokenread;
10444743Smarkm			} else {
10544743Smarkm				prev_c = c;
10644743Smarkm				continue;
10744743Smarkm			}
10844743Smarkm		}
109277281Spfg
110277281Spfg		if (com == 1) {
111277281Spfg			*t = ' ';
112277281Spfg			prev_c = c;
113277281Spfg			continue;
114277281Spfg		}
115277281Spfg
116277281Spfg		if (c == '\n' && p != 0 && t > token) {
117277281Spfg			/* in parentheses */
11844743Smarkm			if (line_nr) {
11944743Smarkm				*line_nr = *line_nr + 1;
12044743Smarkm			}
12144743Smarkm			*t++ = ' ';
12244743Smarkm			prev_c = c;
12344743Smarkm			continue;
12444743Smarkm		}
12544743Smarkm
12644743Smarkm		/* check if we hit the delim */
12744743Smarkm		for (d = del; *d; d++) {
12844743Smarkm			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
12944743Smarkm				if (c == '\n' && line_nr) {
13044743Smarkm					*line_nr = *line_nr + 1;
13144743Smarkm				}
13244743Smarkm				goto tokenread;
133277281Spfg			}
134277281Spfg		}
13544743Smarkm		if (c != '\0' && c != '\n') {
13644743Smarkm			i++;
13744743Smarkm		}
13844743Smarkm		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
13944743Smarkm			*t = '\0';
14044743Smarkm			return -1;
141277281Spfg		}
14244743Smarkm		if (c != '\0' && c != '\n') {
14344743Smarkm			*t++ = c;
14444743Smarkm		}
14544743Smarkm		if (c == '\\' && prev_c == '\\')
14644743Smarkm			prev_c = 0;
14744743Smarkm		else	prev_c = c;
14844743Smarkm	}
14944743Smarkm	*t = '\0';
15044743Smarkm	if (c == EOF) {
15144743Smarkm		return (ssize_t)i;
15244743Smarkm	}
15344743Smarkm
15444743Smarkm	if (i == 0) {
15544743Smarkm		/* nothing read */
15644743Smarkm		return -1;
15744743Smarkm	}
15844743Smarkm	if (p != 0) {
15944743Smarkm		return -1;
16044743Smarkm	}
16144743Smarkm	return (ssize_t)i;
16244743Smarkm
16344743Smarkmtokenread:
16444743Smarkm	ldns_fskipcs_l(f, del, line_nr);
16544743Smarkm	*t = '\0';
16644743Smarkm	if (p != 0) {
16744743Smarkm		return -1;
16844743Smarkm	}
16944743Smarkm
17044743Smarkm	return (ssize_t)i;
17144743Smarkm}
17244743Smarkm
17344743Smarkmssize_t
17444743Smarkmldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
17544743Smarkm               const char *d_del, size_t data_limit)
17644743Smarkm{
177277281Spfg       return ldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
178277281Spfg		       data_limit, NULL);
179277281Spfg}
18044743Smarkm
18144743Smarkmssize_t
18244743Smarkmldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
18344743Smarkm               const char *d_del, size_t data_limit, int *line_nr)
18444743Smarkm{
18544743Smarkm       /* we assume: keyword|sep|data */
18644743Smarkm       char *fkeyword;
18744743Smarkm       ssize_t i;
18844743Smarkm
18944743Smarkm       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
19044743Smarkm               return -1;
19144743Smarkm       fkeyword = LDNS_XMALLOC(char, LDNS_MAX_KEYWORDLEN);
19244743Smarkm       if(!fkeyword)
19344743Smarkm               return -1;
194277281Spfg
19544743Smarkm       i = ldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
19644743Smarkm       if(i==0 || i==-1) {
19744743Smarkm               LDNS_FREE(fkeyword);
19844743Smarkm               return -1;
199277281Spfg       }
20044743Smarkm
20144743Smarkm       /* case??? i instead of strlen? */
20244743Smarkm       if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
20344743Smarkm               /* whee! */
204277281Spfg               /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
20544743Smarkm               i = ldns_fget_token_l(f, data, d_del, data_limit, line_nr);
20644743Smarkm               LDNS_FREE(fkeyword);
20744743Smarkm               return i;
20844743Smarkm       } else {
209277281Spfg               /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
21044743Smarkm               LDNS_FREE(fkeyword);
21144743Smarkm               return -1;
21244743Smarkm       }
21344743Smarkm}
214277281Spfg
21544743Smarkm
21644743Smarkmssize_t
21744743Smarkmldns_bget_token(ldns_buffer *b, char *token, const char *delim, size_t limit)
21844743Smarkm{
219277281Spfg	int c, lc;
22044743Smarkm	int p; /* 0 -> no parenthese seen, >0 nr of ( seen */
22144743Smarkm	int com, quoted;
22244743Smarkm	char *t;
22344743Smarkm	size_t i;
224277281Spfg	const char *d;
22544743Smarkm	const char *del;
22644743Smarkm
227	/* standard delimiters */
228	if (!delim) {
229		/* from isspace(3) */
230		del = LDNS_PARSE_NORMAL;
231	} else {
232		del = delim;
233	}
234
235	p = 0;
236	i = 0;
237	com = 0;
238	quoted = 0;
239	t = token;
240	lc = 0;
241	if (del[0] == '"') {
242		quoted = 1;
243	}
244
245	while ((c = ldns_bgetc(b)) != EOF) {
246		if (c == '\r') /* carriage return */
247			c = ' ';
248		if (c == '(' && lc != '\\' && !quoted) {
249			/* this only counts for non-comments */
250			if (com == 0) {
251				p++;
252			}
253			lc = c;
254			continue;
255		}
256
257		if (c == ')' && lc != '\\' && !quoted) {
258			/* this only counts for non-comments */
259			if (com == 0) {
260				p--;
261			}
262			lc = c;
263			continue;
264		}
265
266		if (p < 0) {
267			/* more ) then ( */
268			*t = '\0';
269			return 0;
270		}
271
272		/* do something with comments ; */
273		if (c == ';' && quoted == 0) {
274			if (lc != '\\') {
275				com = 1;
276			}
277		}
278		if (c == '"' && com == 0 && lc != '\\') {
279			quoted = 1 - quoted;
280		}
281
282		if (c == '\n' && com != 0) {
283			/* comments */
284			com = 0;
285			*t = ' ';
286			lc = c;
287			continue;
288		}
289
290		if (com == 1) {
291			*t = ' ';
292			lc = c;
293			continue;
294		}
295
296		if (c == '\n' && p != 0) {
297			/* in parentheses */
298			*t++ = ' ';
299			lc = c;
300			continue;
301		}
302
303		/* check if we hit the delim */
304		for (d = del; *d; d++) {
305                        if (c == *d && lc != '\\' && p == 0) {
306				goto tokenread;
307                        }
308		}
309
310		i++;
311		if (limit > 0 && (i >= limit || (size_t)(t-token) >= limit)) {
312			*t = '\0';
313			return -1;
314		}
315		*t++ = c;
316
317		if (c == '\\' && lc == '\\') {
318			lc = 0;
319		} else {
320			lc = c;
321		}
322	}
323	*t = '\0';
324	if (i == 0) {
325		/* nothing read */
326		return -1;
327	}
328	if (p != 0) {
329		return -1;
330	}
331	return (ssize_t)i;
332
333tokenread:
334	ldns_bskipcs(b, del);
335	*t = '\0';
336
337	if (p != 0) {
338		return -1;
339	}
340	return (ssize_t)i;
341}
342
343
344void
345ldns_bskipcs(ldns_buffer *buffer, const char *s)
346{
347        bool found;
348        char c;
349        const char *d;
350
351        while(ldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
352                c = (char) ldns_buffer_read_u8_at(buffer, buffer->_position);
353                found = false;
354                for (d = s; *d; d++) {
355                        if (*d == c) {
356                                found = true;
357                        }
358                }
359                if (found && buffer->_limit > buffer->_position) {
360                        buffer->_position += sizeof(char);
361                } else {
362                        return;
363                }
364        }
365}
366
367void
368ldns_fskipcs(FILE *fp, const char *s)
369{
370	ldns_fskipcs_l(fp, s, NULL);
371}
372
373void
374ldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
375{
376        bool found;
377        int c;
378        const char *d;
379
380	while ((c = fgetc(fp)) != EOF) {
381		if (line_nr && c == '\n') {
382			*line_nr = *line_nr + 1;
383		}
384                found = false;
385                for (d = s; *d; d++) {
386                        if (*d == c) {
387                                found = true;
388                        }
389                }
390		if (!found) {
391			/* with getc, we've read too far */
392			ungetc(c, fp);
393			return;
394		}
395	}
396}
397
398ssize_t
399ldns_bget_keyword_data(ldns_buffer *b, const char *keyword, const char *k_del, char
400*data, const char *d_del, size_t data_limit)
401{
402       /* we assume: keyword|sep|data */
403       char *fkeyword;
404       ssize_t i;
405
406       if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
407               return -1;
408       fkeyword = LDNS_XMALLOC(char, LDNS_MAX_KEYWORDLEN);
409       if(!fkeyword)
410               return -1; /* out of memory */
411
412       i = ldns_bget_token(b, fkeyword, k_del, data_limit);
413       if(i==0 || i==-1) {
414               LDNS_FREE(fkeyword);
415               return -1; /* nothing read */
416       }
417
418       /* case??? */
419       if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
420               LDNS_FREE(fkeyword);
421               /* whee, the match! */
422               /* retrieve it's data */
423               i = ldns_bget_token(b, data, d_del, 0);
424               return i;
425       } else {
426               LDNS_FREE(fkeyword);
427               return -1;
428       }
429}
430
431