1241675Suqs/*	$Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#ifdef HAVE_CONFIG_H
19241675Suqs#include "config.h"
20241675Suqs#endif
21241675Suqs
22241675Suqs#include <sys/types.h>
23241675Suqs
24241675Suqs#include <assert.h>
25241675Suqs#include <ctype.h>
26241675Suqs#include <errno.h>
27241675Suqs#include <limits.h>
28241675Suqs#include <stdlib.h>
29241675Suqs#include <stdio.h>
30241675Suqs#include <string.h>
31241675Suqs#include <time.h>
32241675Suqs
33241675Suqs#include "mandoc.h"
34241675Suqs#include "libmandoc.h"
35241675Suqs
36241675Suqs#define DATESIZE 32
37241675Suqs
38241675Suqsstatic	int	 a2time(time_t *, const char *, const char *);
39241675Suqsstatic	char	*time2a(time_t);
40241675Suqsstatic	int	 numescape(const char *);
41241675Suqs
42241675Suqs/*
43241675Suqs * Pass over recursive numerical expressions.  This context of this
44241675Suqs * function is important: it's only called within character-terminating
45241675Suqs * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
46241675Suqs * recursion: we don't care about what's in these blocks.
47241675Suqs * This returns the number of characters skipped or -1 if an error
48241675Suqs * occurs (the caller should bail).
49241675Suqs */
50241675Suqsstatic int
51241675Suqsnumescape(const char *start)
52241675Suqs{
53241675Suqs	int		 i;
54241675Suqs	size_t		 sz;
55241675Suqs	const char	*cp;
56241675Suqs
57241675Suqs	i = 0;
58241675Suqs
59241675Suqs	/* The expression consists of a subexpression. */
60241675Suqs
61241675Suqs	if ('\\' == start[i]) {
62241675Suqs		cp = &start[++i];
63241675Suqs		/*
64241675Suqs		 * Read past the end of the subexpression.
65241675Suqs		 * Bail immediately on errors.
66241675Suqs		 */
67241675Suqs		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
68241675Suqs			return(-1);
69241675Suqs		return(i + cp - &start[i]);
70241675Suqs	}
71241675Suqs
72241675Suqs	if ('(' != start[i++])
73241675Suqs		return(0);
74241675Suqs
75241675Suqs	/*
76241675Suqs	 * A parenthesised subexpression.  Read until the closing
77241675Suqs	 * parenthesis, making sure to handle any nested subexpressions
78241675Suqs	 * that might ruin our parse.
79241675Suqs	 */
80241675Suqs
81241675Suqs	while (')' != start[i]) {
82241675Suqs		sz = strcspn(&start[i], ")\\");
83241675Suqs		i += (int)sz;
84241675Suqs
85241675Suqs		if ('\0' == start[i])
86241675Suqs			return(-1);
87241675Suqs		else if ('\\' != start[i])
88241675Suqs			continue;
89241675Suqs
90241675Suqs		cp = &start[++i];
91241675Suqs		if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
92241675Suqs			return(-1);
93241675Suqs		i += cp - &start[i];
94241675Suqs	}
95241675Suqs
96241675Suqs	/* Read past the terminating ')'. */
97241675Suqs	return(++i);
98241675Suqs}
99241675Suqs
100241675Suqsenum mandoc_esc
101241675Suqsmandoc_escape(const char **end, const char **start, int *sz)
102241675Suqs{
103241675Suqs	char		 c, term, numeric;
104241675Suqs	int		 i, lim, ssz, rlim;
105241675Suqs	const char	*cp, *rstart;
106241675Suqs	enum mandoc_esc	 gly;
107241675Suqs
108241675Suqs	cp = *end;
109241675Suqs	rstart = cp;
110241675Suqs	if (start)
111241675Suqs		*start = rstart;
112241675Suqs	i = lim = 0;
113241675Suqs	gly = ESCAPE_ERROR;
114241675Suqs	term = numeric = '\0';
115241675Suqs
116241675Suqs	switch ((c = cp[i++])) {
117241675Suqs	/*
118241675Suqs	 * First the glyphs.  There are several different forms of
119241675Suqs	 * these, but each eventually returns a substring of the glyph
120241675Suqs	 * name.
121241675Suqs	 */
122241675Suqs	case ('('):
123241675Suqs		gly = ESCAPE_SPECIAL;
124241675Suqs		lim = 2;
125241675Suqs		break;
126241675Suqs	case ('['):
127241675Suqs		gly = ESCAPE_SPECIAL;
128241675Suqs		/*
129241675Suqs		 * Unicode escapes are defined in groff as \[uXXXX] to
130241675Suqs		 * \[u10FFFF], where the contained value must be a valid
131241675Suqs		 * Unicode codepoint.  Here, however, only check whether
132241675Suqs		 * it's not a zero-width escape.
133241675Suqs		 */
134241675Suqs		if ('u' == cp[i] && ']' != cp[i + 1])
135241675Suqs			gly = ESCAPE_UNICODE;
136241675Suqs		term = ']';
137241675Suqs		break;
138241675Suqs	case ('C'):
139241675Suqs		if ('\'' != cp[i])
140241675Suqs			return(ESCAPE_ERROR);
141241675Suqs		gly = ESCAPE_SPECIAL;
142241675Suqs		term = '\'';
143241675Suqs		break;
144241675Suqs
145241675Suqs	/*
146241675Suqs	 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
147241675Suqs	 * 'X' is the trigger.  These have opaque sub-strings.
148241675Suqs	 */
149241675Suqs	case ('F'):
150241675Suqs		/* FALLTHROUGH */
151241675Suqs	case ('g'):
152241675Suqs		/* FALLTHROUGH */
153241675Suqs	case ('k'):
154241675Suqs		/* FALLTHROUGH */
155241675Suqs	case ('M'):
156241675Suqs		/* FALLTHROUGH */
157241675Suqs	case ('m'):
158241675Suqs		/* FALLTHROUGH */
159241675Suqs	case ('n'):
160241675Suqs		/* FALLTHROUGH */
161241675Suqs	case ('V'):
162241675Suqs		/* FALLTHROUGH */
163241675Suqs	case ('Y'):
164241675Suqs		gly = ESCAPE_IGNORE;
165241675Suqs		/* FALLTHROUGH */
166241675Suqs	case ('f'):
167241675Suqs		if (ESCAPE_ERROR == gly)
168241675Suqs			gly = ESCAPE_FONT;
169241675Suqs
170241675Suqs		rstart= &cp[i];
171241675Suqs		if (start)
172241675Suqs			*start = rstart;
173241675Suqs
174241675Suqs		switch (cp[i++]) {
175241675Suqs		case ('('):
176241675Suqs			lim = 2;
177241675Suqs			break;
178241675Suqs		case ('['):
179241675Suqs			term = ']';
180241675Suqs			break;
181241675Suqs		default:
182241675Suqs			lim = 1;
183241675Suqs			i--;
184241675Suqs			break;
185241675Suqs		}
186241675Suqs		break;
187241675Suqs
188241675Suqs	/*
189241675Suqs	 * These escapes are of the form \X'Y', where 'X' is the trigger
190241675Suqs	 * and 'Y' is any string.  These have opaque sub-strings.
191241675Suqs	 */
192241675Suqs	case ('A'):
193241675Suqs		/* FALLTHROUGH */
194241675Suqs	case ('b'):
195241675Suqs		/* FALLTHROUGH */
196241675Suqs	case ('D'):
197241675Suqs		/* FALLTHROUGH */
198241675Suqs	case ('o'):
199241675Suqs		/* FALLTHROUGH */
200241675Suqs	case ('R'):
201241675Suqs		/* FALLTHROUGH */
202241675Suqs	case ('X'):
203241675Suqs		/* FALLTHROUGH */
204241675Suqs	case ('Z'):
205241675Suqs		if ('\'' != cp[i++])
206241675Suqs			return(ESCAPE_ERROR);
207241675Suqs		gly = ESCAPE_IGNORE;
208241675Suqs		term = '\'';
209241675Suqs		break;
210241675Suqs
211241675Suqs	/*
212241675Suqs	 * These escapes are of the form \X'N', where 'X' is the trigger
213241675Suqs	 * and 'N' resolves to a numerical expression.
214241675Suqs	 */
215241675Suqs	case ('B'):
216241675Suqs		/* FALLTHROUGH */
217241675Suqs	case ('h'):
218241675Suqs		/* FALLTHROUGH */
219241675Suqs	case ('H'):
220241675Suqs		/* FALLTHROUGH */
221241675Suqs	case ('L'):
222241675Suqs		/* FALLTHROUGH */
223241675Suqs	case ('l'):
224241675Suqs		gly = ESCAPE_NUMBERED;
225241675Suqs		/* FALLTHROUGH */
226241675Suqs	case ('S'):
227241675Suqs		/* FALLTHROUGH */
228241675Suqs	case ('v'):
229241675Suqs		/* FALLTHROUGH */
230241675Suqs	case ('w'):
231241675Suqs		/* FALLTHROUGH */
232241675Suqs	case ('x'):
233241675Suqs		if (ESCAPE_ERROR == gly)
234241675Suqs			gly = ESCAPE_IGNORE;
235241675Suqs		if ('\'' != cp[i++])
236241675Suqs			return(ESCAPE_ERROR);
237241675Suqs		term = numeric = '\'';
238241675Suqs		break;
239241675Suqs
240241675Suqs	/*
241241675Suqs	 * Special handling for the numbered character escape.
242241675Suqs	 * XXX Do any other escapes need similar handling?
243241675Suqs	 */
244241675Suqs	case ('N'):
245241675Suqs		if ('\0' == cp[i])
246241675Suqs			return(ESCAPE_ERROR);
247241675Suqs		*end = &cp[++i];
248241675Suqs		if (isdigit((unsigned char)cp[i-1]))
249241675Suqs			return(ESCAPE_IGNORE);
250241675Suqs		while (isdigit((unsigned char)**end))
251241675Suqs			(*end)++;
252241675Suqs		if (start)
253241675Suqs			*start = &cp[i];
254241675Suqs		if (sz)
255241675Suqs			*sz = *end - &cp[i];
256241675Suqs		if ('\0' != **end)
257241675Suqs			(*end)++;
258241675Suqs		return(ESCAPE_NUMBERED);
259241675Suqs
260241675Suqs	/*
261241675Suqs	 * Sizes get a special category of their own.
262241675Suqs	 */
263241675Suqs	case ('s'):
264241675Suqs		gly = ESCAPE_IGNORE;
265241675Suqs
266241675Suqs		rstart = &cp[i];
267241675Suqs		if (start)
268241675Suqs			*start = rstart;
269241675Suqs
270241675Suqs		/* See +/- counts as a sign. */
271241675Suqs		c = cp[i];
272241675Suqs		if ('+' == c || '-' == c || ASCII_HYPH == c)
273241675Suqs			++i;
274241675Suqs
275241675Suqs		switch (cp[i++]) {
276241675Suqs		case ('('):
277241675Suqs			lim = 2;
278241675Suqs			break;
279241675Suqs		case ('['):
280241675Suqs			term = numeric = ']';
281241675Suqs			break;
282241675Suqs		case ('\''):
283241675Suqs			term = numeric = '\'';
284241675Suqs			break;
285241675Suqs		default:
286241675Suqs			lim = 1;
287241675Suqs			i--;
288241675Suqs			break;
289241675Suqs		}
290241675Suqs
291241675Suqs		/* See +/- counts as a sign. */
292241675Suqs		c = cp[i];
293241675Suqs		if ('+' == c || '-' == c || ASCII_HYPH == c)
294241675Suqs			++i;
295241675Suqs
296241675Suqs		break;
297241675Suqs
298241675Suqs	/*
299241675Suqs	 * Anything else is assumed to be a glyph.
300241675Suqs	 */
301241675Suqs	default:
302241675Suqs		gly = ESCAPE_SPECIAL;
303241675Suqs		lim = 1;
304241675Suqs		i--;
305241675Suqs		break;
306241675Suqs	}
307241675Suqs
308241675Suqs	assert(ESCAPE_ERROR != gly);
309241675Suqs
310241675Suqs	rstart = &cp[i];
311241675Suqs	if (start)
312241675Suqs		*start = rstart;
313241675Suqs
314241675Suqs	/*
315241675Suqs	 * If a terminating block has been specified, we need to
316241675Suqs	 * handle the case of recursion, which could have their
317241675Suqs	 * own terminating blocks that mess up our parse.  This, by the
318241675Suqs	 * way, means that the "start" and "size" values will be
319241675Suqs	 * effectively meaningless.
320241675Suqs	 */
321241675Suqs
322241675Suqs	ssz = 0;
323241675Suqs	if (numeric && -1 == (ssz = numescape(&cp[i])))
324241675Suqs		return(ESCAPE_ERROR);
325241675Suqs
326241675Suqs	i += ssz;
327241675Suqs	rlim = -1;
328241675Suqs
329241675Suqs	/*
330241675Suqs	 * We have a character terminator.  Try to read up to that
331241675Suqs	 * character.  If we can't (i.e., we hit the nil), then return
332241675Suqs	 * an error; if we can, calculate our length, read past the
333241675Suqs	 * terminating character, and exit.
334241675Suqs	 */
335241675Suqs
336241675Suqs	if ('\0' != term) {
337241675Suqs		*end = strchr(&cp[i], term);
338241675Suqs		if ('\0' == *end)
339241675Suqs			return(ESCAPE_ERROR);
340241675Suqs
341241675Suqs		rlim = *end - &cp[i];
342241675Suqs		if (sz)
343241675Suqs			*sz = rlim;
344241675Suqs		(*end)++;
345241675Suqs		goto out;
346241675Suqs	}
347241675Suqs
348241675Suqs	assert(lim > 0);
349241675Suqs
350241675Suqs	/*
351241675Suqs	 * We have a numeric limit.  If the string is shorter than that,
352241675Suqs	 * stop and return an error.  Else adjust our endpoint, length,
353241675Suqs	 * and return the current glyph.
354241675Suqs	 */
355241675Suqs
356241675Suqs	if ((size_t)lim > strlen(&cp[i]))
357241675Suqs		return(ESCAPE_ERROR);
358241675Suqs
359241675Suqs	rlim = lim;
360241675Suqs	if (sz)
361241675Suqs		*sz = rlim;
362241675Suqs
363241675Suqs	*end = &cp[i] + lim;
364241675Suqs
365241675Suqsout:
366241675Suqs	assert(rlim >= 0 && rstart);
367241675Suqs
368241675Suqs	/* Run post-processors. */
369241675Suqs
370241675Suqs	switch (gly) {
371241675Suqs	case (ESCAPE_FONT):
372241675Suqs		/*
373241675Suqs		 * Pretend that the constant-width font modes are the
374241675Suqs		 * same as the regular font modes.
375241675Suqs		 */
376241675Suqs		if (2 == rlim && 'C' == *rstart)
377241675Suqs			rstart++;
378241675Suqs		else if (1 != rlim)
379241675Suqs			break;
380241675Suqs
381241675Suqs		switch (*rstart) {
382241675Suqs		case ('3'):
383241675Suqs			/* FALLTHROUGH */
384241675Suqs		case ('B'):
385241675Suqs			gly = ESCAPE_FONTBOLD;
386241675Suqs			break;
387241675Suqs		case ('2'):
388241675Suqs			/* FALLTHROUGH */
389241675Suqs		case ('I'):
390241675Suqs			gly = ESCAPE_FONTITALIC;
391241675Suqs			break;
392241675Suqs		case ('P'):
393241675Suqs			gly = ESCAPE_FONTPREV;
394241675Suqs			break;
395241675Suqs		case ('1'):
396241675Suqs			/* FALLTHROUGH */
397241675Suqs		case ('R'):
398241675Suqs			gly = ESCAPE_FONTROMAN;
399241675Suqs			break;
400241675Suqs		}
401241675Suqs		break;
402241675Suqs	case (ESCAPE_SPECIAL):
403241675Suqs		if (1 != rlim)
404241675Suqs			break;
405241675Suqs		if ('c' == *rstart)
406241675Suqs			gly = ESCAPE_NOSPACE;
407241675Suqs		break;
408241675Suqs	default:
409241675Suqs		break;
410241675Suqs	}
411241675Suqs
412241675Suqs	return(gly);
413241675Suqs}
414241675Suqs
415241675Suqsvoid *
416241675Suqsmandoc_calloc(size_t num, size_t size)
417241675Suqs{
418241675Suqs	void		*ptr;
419241675Suqs
420241675Suqs	ptr = calloc(num, size);
421241675Suqs	if (NULL == ptr) {
422241675Suqs		perror(NULL);
423241675Suqs		exit((int)MANDOCLEVEL_SYSERR);
424241675Suqs	}
425241675Suqs
426241675Suqs	return(ptr);
427241675Suqs}
428241675Suqs
429241675Suqs
430241675Suqsvoid *
431241675Suqsmandoc_malloc(size_t size)
432241675Suqs{
433241675Suqs	void		*ptr;
434241675Suqs
435241675Suqs	ptr = malloc(size);
436241675Suqs	if (NULL == ptr) {
437241675Suqs		perror(NULL);
438241675Suqs		exit((int)MANDOCLEVEL_SYSERR);
439241675Suqs	}
440241675Suqs
441241675Suqs	return(ptr);
442241675Suqs}
443241675Suqs
444241675Suqs
445241675Suqsvoid *
446241675Suqsmandoc_realloc(void *ptr, size_t size)
447241675Suqs{
448241675Suqs
449241675Suqs	ptr = realloc(ptr, size);
450241675Suqs	if (NULL == ptr) {
451241675Suqs		perror(NULL);
452241675Suqs		exit((int)MANDOCLEVEL_SYSERR);
453241675Suqs	}
454241675Suqs
455241675Suqs	return(ptr);
456241675Suqs}
457241675Suqs
458241675Suqschar *
459241675Suqsmandoc_strndup(const char *ptr, size_t sz)
460241675Suqs{
461241675Suqs	char		*p;
462241675Suqs
463241675Suqs	p = mandoc_malloc(sz + 1);
464241675Suqs	memcpy(p, ptr, sz);
465241675Suqs	p[(int)sz] = '\0';
466241675Suqs	return(p);
467241675Suqs}
468241675Suqs
469241675Suqschar *
470241675Suqsmandoc_strdup(const char *ptr)
471241675Suqs{
472241675Suqs	char		*p;
473241675Suqs
474241675Suqs	p = strdup(ptr);
475241675Suqs	if (NULL == p) {
476241675Suqs		perror(NULL);
477241675Suqs		exit((int)MANDOCLEVEL_SYSERR);
478241675Suqs	}
479241675Suqs
480241675Suqs	return(p);
481241675Suqs}
482241675Suqs
483241675Suqs/*
484241675Suqs * Parse a quoted or unquoted roff-style request or macro argument.
485241675Suqs * Return a pointer to the parsed argument, which is either the original
486241675Suqs * pointer or advanced by one byte in case the argument is quoted.
487241675Suqs * Null-terminate the argument in place.
488241675Suqs * Collapse pairs of quotes inside quoted arguments.
489241675Suqs * Advance the argument pointer to the next argument,
490241675Suqs * or to the null byte terminating the argument line.
491241675Suqs */
492241675Suqschar *
493241675Suqsmandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
494241675Suqs{
495241675Suqs	char	 *start, *cp;
496241675Suqs	int	  quoted, pairs, white;
497241675Suqs
498241675Suqs	/* Quoting can only start with a new word. */
499241675Suqs	start = *cpp;
500241675Suqs	quoted = 0;
501241675Suqs	if ('"' == *start) {
502241675Suqs		quoted = 1;
503241675Suqs		start++;
504241675Suqs	}
505241675Suqs
506241675Suqs	pairs = 0;
507241675Suqs	white = 0;
508241675Suqs	for (cp = start; '\0' != *cp; cp++) {
509241675Suqs		/* Move left after quoted quotes and escaped backslashes. */
510241675Suqs		if (pairs)
511241675Suqs			cp[-pairs] = cp[0];
512241675Suqs		if ('\\' == cp[0]) {
513241675Suqs			if ('\\' == cp[1]) {
514241675Suqs				/* Poor man's copy mode. */
515241675Suqs				pairs++;
516241675Suqs				cp++;
517241675Suqs			} else if (0 == quoted && ' ' == cp[1])
518241675Suqs				/* Skip escaped blanks. */
519241675Suqs				cp++;
520241675Suqs		} else if (0 == quoted) {
521241675Suqs			if (' ' == cp[0]) {
522241675Suqs				/* Unescaped blanks end unquoted args. */
523241675Suqs				white = 1;
524241675Suqs				break;
525241675Suqs			}
526241675Suqs		} else if ('"' == cp[0]) {
527241675Suqs			if ('"' == cp[1]) {
528241675Suqs				/* Quoted quotes collapse. */
529241675Suqs				pairs++;
530241675Suqs				cp++;
531241675Suqs			} else {
532241675Suqs				/* Unquoted quotes end quoted args. */
533241675Suqs				quoted = 2;
534241675Suqs				break;
535241675Suqs			}
536241675Suqs		}
537241675Suqs	}
538241675Suqs
539241675Suqs	/* Quoted argument without a closing quote. */
540241675Suqs	if (1 == quoted)
541241675Suqs		mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
542241675Suqs
543241675Suqs	/* Null-terminate this argument and move to the next one. */
544241675Suqs	if (pairs)
545241675Suqs		cp[-pairs] = '\0';
546241675Suqs	if ('\0' != *cp) {
547241675Suqs		*cp++ = '\0';
548241675Suqs		while (' ' == *cp)
549241675Suqs			cp++;
550241675Suqs	}
551241675Suqs	*pos += (int)(cp - start) + (quoted ? 1 : 0);
552241675Suqs	*cpp = cp;
553241675Suqs
554241675Suqs	if ('\0' == *cp && (white || ' ' == cp[-1]))
555241675Suqs		mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
556241675Suqs
557241675Suqs	return(start);
558241675Suqs}
559241675Suqs
560241675Suqsstatic int
561241675Suqsa2time(time_t *t, const char *fmt, const char *p)
562241675Suqs{
563241675Suqs	struct tm	 tm;
564241675Suqs	char		*pp;
565241675Suqs
566241675Suqs	memset(&tm, 0, sizeof(struct tm));
567241675Suqs
568241675Suqs	pp = NULL;
569241675Suqs#ifdef	HAVE_STRPTIME
570241675Suqs	pp = strptime(p, fmt, &tm);
571241675Suqs#endif
572241675Suqs	if (NULL != pp && '\0' == *pp) {
573241675Suqs		*t = mktime(&tm);
574241675Suqs		return(1);
575241675Suqs	}
576241675Suqs
577241675Suqs	return(0);
578241675Suqs}
579241675Suqs
580241675Suqsstatic char *
581241675Suqstime2a(time_t t)
582241675Suqs{
583241675Suqs	struct tm	*tm;
584241675Suqs	char		*buf, *p;
585241675Suqs	size_t		 ssz;
586241675Suqs	int		 isz;
587241675Suqs
588241675Suqs	tm = localtime(&t);
589241675Suqs
590241675Suqs	/*
591241675Suqs	 * Reserve space:
592241675Suqs	 * up to 9 characters for the month (September) + blank
593241675Suqs	 * up to 2 characters for the day + comma + blank
594241675Suqs	 * 4 characters for the year and a terminating '\0'
595241675Suqs	 */
596241675Suqs	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
597241675Suqs
598241675Suqs	if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
599241675Suqs		goto fail;
600241675Suqs	p += (int)ssz;
601241675Suqs
602241675Suqs	if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
603241675Suqs		goto fail;
604241675Suqs	p += isz;
605241675Suqs
606241675Suqs	if (0 == strftime(p, 4 + 1, "%Y", tm))
607241675Suqs		goto fail;
608241675Suqs	return(buf);
609241675Suqs
610241675Suqsfail:
611241675Suqs	free(buf);
612241675Suqs	return(NULL);
613241675Suqs}
614241675Suqs
615241675Suqschar *
616241675Suqsmandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
617241675Suqs{
618241675Suqs	char		*out;
619241675Suqs	time_t		 t;
620241675Suqs
621241675Suqs	if (NULL == in || '\0' == *in ||
622241675Suqs	    0 == strcmp(in, "$" "Mdocdate$")) {
623241675Suqs		mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
624241675Suqs		time(&t);
625241675Suqs	}
626241675Suqs	else if (a2time(&t, "%Y-%m-%d", in))
627241675Suqs		t = 0;
628241675Suqs	else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
629241675Suqs	    !a2time(&t, "%b %d, %Y", in)) {
630241675Suqs		mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
631241675Suqs		t = 0;
632241675Suqs	}
633241675Suqs	out = t ? time2a(t) : NULL;
634241675Suqs	return(out ? out : mandoc_strdup(in));
635241675Suqs}
636241675Suqs
637241675Suqsint
638241675Suqsmandoc_eos(const char *p, size_t sz, int enclosed)
639241675Suqs{
640241675Suqs	const char *q;
641241675Suqs	int found;
642241675Suqs
643241675Suqs	if (0 == sz)
644241675Suqs		return(0);
645241675Suqs
646241675Suqs	/*
647241675Suqs	 * End-of-sentence recognition must include situations where
648241675Suqs	 * some symbols, such as `)', allow prior EOS punctuation to
649241675Suqs	 * propagate outward.
650241675Suqs	 */
651241675Suqs
652241675Suqs	found = 0;
653241675Suqs	for (q = p + (int)sz - 1; q >= p; q--) {
654241675Suqs		switch (*q) {
655241675Suqs		case ('\"'):
656241675Suqs			/* FALLTHROUGH */
657241675Suqs		case ('\''):
658241675Suqs			/* FALLTHROUGH */
659241675Suqs		case (']'):
660241675Suqs			/* FALLTHROUGH */
661241675Suqs		case (')'):
662241675Suqs			if (0 == found)
663241675Suqs				enclosed = 1;
664241675Suqs			break;
665241675Suqs		case ('.'):
666241675Suqs			/* FALLTHROUGH */
667241675Suqs		case ('!'):
668241675Suqs			/* FALLTHROUGH */
669241675Suqs		case ('?'):
670241675Suqs			found = 1;
671241675Suqs			break;
672241675Suqs		default:
673241675Suqs			return(found && (!enclosed || isalnum((unsigned char)*q)));
674241675Suqs		}
675241675Suqs	}
676241675Suqs
677241675Suqs	return(found && !enclosed);
678241675Suqs}
679241675Suqs
680241675Suqs/*
681241675Suqs * Find out whether a line is a macro line or not.  If it is, adjust the
682241675Suqs * current position and return one; if it isn't, return zero and don't
683241675Suqs * change the current position.
684241675Suqs */
685241675Suqsint
686241675Suqsmandoc_getcontrol(const char *cp, int *ppos)
687241675Suqs{
688241675Suqs	int		pos;
689241675Suqs
690241675Suqs	pos = *ppos;
691241675Suqs
692241675Suqs	if ('\\' == cp[pos] && '.' == cp[pos + 1])
693241675Suqs		pos += 2;
694241675Suqs	else if ('.' == cp[pos] || '\'' == cp[pos])
695241675Suqs		pos++;
696241675Suqs	else
697241675Suqs		return(0);
698241675Suqs
699241675Suqs	while (' ' == cp[pos] || '\t' == cp[pos])
700241675Suqs		pos++;
701241675Suqs
702241675Suqs	*ppos = pos;
703241675Suqs	return(1);
704241675Suqs}
705241675Suqs
706241675Suqs/*
707241675Suqs * Convert a string to a long that may not be <0.
708241675Suqs * If the string is invalid, or is less than 0, return -1.
709241675Suqs */
710241675Suqsint
711241675Suqsmandoc_strntoi(const char *p, size_t sz, int base)
712241675Suqs{
713241675Suqs	char		 buf[32];
714241675Suqs	char		*ep;
715241675Suqs	long		 v;
716241675Suqs
717241675Suqs	if (sz > 31)
718241675Suqs		return(-1);
719241675Suqs
720241675Suqs	memcpy(buf, p, sz);
721241675Suqs	buf[(int)sz] = '\0';
722241675Suqs
723241675Suqs	errno = 0;
724241675Suqs	v = strtol(buf, &ep, base);
725241675Suqs
726241675Suqs	if (buf[0] == '\0' || *ep != '\0')
727241675Suqs		return(-1);
728241675Suqs
729241675Suqs	if (v > INT_MAX)
730241675Suqs		v = INT_MAX;
731241675Suqs	if (v < INT_MIN)
732241675Suqs		v = INT_MIN;
733241675Suqs
734241675Suqs	return((int)v);
735241675Suqs}
736