apprentice.c revision 169942
168349Sobrien/*
2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995.
3133359Sobrien * Software written by Ian F. Darwin and others;
4133359Sobrien * maintained 1995-present by Christos Zoulas and others.
5133359Sobrien *
6133359Sobrien * Redistribution and use in source and binary forms, with or without
7133359Sobrien * modification, are permitted provided that the following conditions
8133359Sobrien * are met:
9133359Sobrien * 1. Redistributions of source code must retain the above copyright
10133359Sobrien *    notice immediately at the beginning of the file, without modification,
11133359Sobrien *    this list of conditions, and the following disclaimer.
12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
13133359Sobrien *    notice, this list of conditions and the following disclaimer in the
14133359Sobrien *    documentation and/or other materials provided with the distribution.
15133359Sobrien *
16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26133359Sobrien * SUCH DAMAGE.
27133359Sobrien */
28133359Sobrien/*
2968349Sobrien * apprentice - make one pass through /etc/magic, learning its secrets.
3068349Sobrien */
3168349Sobrien
3280588Sobrien#include "file.h"
33133359Sobrien#include "magic.h"
3468349Sobrien#include <stdlib.h>
3584685Sobrien#ifdef HAVE_UNISTD_H
3684685Sobrien#include <unistd.h>
3784685Sobrien#endif
3868349Sobrien#include <string.h>
39169942Sobrien#include <assert.h>
4068349Sobrien#include <ctype.h>
41103373Sobrien#include <fcntl.h>
42133359Sobrien#include <sys/stat.h>
43133359Sobrien#include <sys/param.h>
4474784Sobrien#ifdef QUICK
4574784Sobrien#include <sys/mman.h>
4674784Sobrien#endif
4768349Sobrien
4868349Sobrien#ifndef	lint
49169942SobrienFILE_RCSID("@(#)$Id: apprentice.c,v 1.100 2006/12/11 21:48:49 christos Exp $")
5068349Sobrien#endif	/* lint */
5168349Sobrien
5268349Sobrien#define	EATAB {while (isascii((unsigned char) *l) && \
5368349Sobrien		      isspace((unsigned char) *l))  ++l;}
5468349Sobrien#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
5568349Sobrien			tolower((unsigned char) (l)) : (l))
5675937Sobrien/*
5775937Sobrien * Work around a bug in headers on Digital Unix.
5875937Sobrien * At least confirmed for: OSF1 V4.0 878
5975937Sobrien */
6075937Sobrien#if defined(__osf__) && defined(__DECC)
6175937Sobrien#ifdef MAP_FAILED
6275937Sobrien#undef MAP_FAILED
6375937Sobrien#endif
6475937Sobrien#endif
6568349Sobrien
6675937Sobrien#ifndef MAP_FAILED
6775937Sobrien#define MAP_FAILED (void *) -1
6875937Sobrien#endif
6968349Sobrien
7075937Sobrien#ifndef MAP_FILE
7175937Sobrien#define MAP_FILE 0
7275937Sobrien#endif
7375937Sobrien
74133359Sobrien#ifndef MAXPATHLEN
75133359Sobrien#define MAXPATHLEN	1024
7668349Sobrien#endif
7768349Sobrien
78159764Sobrien#define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \
79139368Sobrien    (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16)
80159764Sobrien
81159764Sobrien#define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \
82159764Sobrien    (t) == FILE_SEARCH)
83139368Sobrien
84159764Sobrienstruct magic_entry {
85159764Sobrien	struct magic *mp;
86159764Sobrien	uint32_t cont_count;
87159764Sobrien	uint32_t max_count;
88159764Sobrien};
89159764Sobrien
90169942Sobrienconst int file_formats[] = { FILE_FORMAT_STRING };
91169942Sobrienconst size_t file_nformats = sizeof(file_formats) / sizeof(file_formats[0]);
92169942Sobrienconst char *file_names[] = { FILE_FORMAT_NAME };
93169942Sobrienconst size_t file_nnames = sizeof(file_names) / sizeof(file_names[0]);
94169942Sobrien
95159764Sobrienprivate int getvalue(struct magic_set *ms, struct magic *, const char **);
96133359Sobrienprivate int hextoint(int);
97159764Sobrienprivate const char *getstr(struct magic_set *, const char *, char *, int,
98159764Sobrien    int *);
99159764Sobrienprivate int parse(struct magic_set *, struct magic_entry **, uint32_t *,
100169942Sobrien    const char *, size_t, int);
101159764Sobrienprivate void eatsize(const char **);
102133359Sobrienprivate int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
103159764Sobrienprivate size_t apprentice_magic_strength(const struct magic *);
104159764Sobrienprivate int apprentice_sort(const void *, const void *);
105133359Sobrienprivate int apprentice_file(struct magic_set *, struct magic **, uint32_t *,
106133359Sobrien    const char *, int);
107133359Sobrienprivate void byteswap(struct magic *, uint32_t);
108133359Sobrienprivate void bs1(struct magic *);
109133359Sobrienprivate uint16_t swap2(uint16_t);
110133359Sobrienprivate uint32_t swap4(uint32_t);
111169942Sobrienprivate uint64_t swap8(uint64_t);
112139368Sobrienprivate char *mkdbname(const char *, char *, size_t, int);
113133359Sobrienprivate int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
114133359Sobrien    const char *);
115133359Sobrienprivate int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
116133359Sobrien    const char *);
117169942Sobrienprivate int check_format_type(const char *, int);
118139368Sobrienprivate int check_format(struct magic_set *, struct magic *);
11968349Sobrien
120133359Sobrienprivate size_t maxmagic = 0;
121133359Sobrienprivate size_t magicsize = sizeof(struct magic);
12268349Sobrien
123159764Sobrien
12480588Sobrien#ifdef COMPILE_ONLY
12574784Sobrien
126103373Sobrienint main(int, char *[]);
12780588Sobrien
12880588Sobrienint
129103373Sobrienmain(int argc, char *argv[])
13080588Sobrien{
13180588Sobrien	int ret;
132133359Sobrien	struct magic_set *ms;
133133359Sobrien	char *progname;
13480588Sobrien
13580588Sobrien	if ((progname = strrchr(argv[0], '/')) != NULL)
13680588Sobrien		progname++;
13780588Sobrien	else
13880588Sobrien		progname = argv[0];
13980588Sobrien
14080588Sobrien	if (argc != 2) {
141133359Sobrien		(void)fprintf(stderr, "Usage: %s file\n", progname);
142133359Sobrien		return 1;
14380588Sobrien	}
14480588Sobrien
145133359Sobrien	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
146133359Sobrien		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
147133359Sobrien		return 1;
148133359Sobrien	}
149133359Sobrien	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
150133359Sobrien	if (ret == 1)
151133359Sobrien		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
152133359Sobrien	magic_close(ms);
153133359Sobrien	return ret;
15480588Sobrien}
15580588Sobrien#endif /* COMPILE_ONLY */
15680588Sobrien
15780588Sobrien
15874784Sobrien/*
15974784Sobrien * Handle one file.
16074784Sobrien */
161133359Sobrienprivate int
162133359Sobrienapprentice_1(struct magic_set *ms, const char *fn, int action,
163133359Sobrien    struct mlist *mlist)
16474784Sobrien{
16574784Sobrien	struct magic *magic = NULL;
166103373Sobrien	uint32_t nmagic = 0;
16774784Sobrien	struct mlist *ml;
16874784Sobrien	int rv = -1;
169133359Sobrien	int mapped;
17074784Sobrien
171133359Sobrien	if (magicsize != FILE_MAGICSIZE) {
172133359Sobrien		file_error(ms, 0, "magic element size %lu != %lu",
173133359Sobrien		    (unsigned long)sizeof(*magic),
174133359Sobrien		    (unsigned long)FILE_MAGICSIZE);
175133359Sobrien		return -1;
17674784Sobrien	}
177133359Sobrien
178133359Sobrien	if (action == FILE_COMPILE) {
179133359Sobrien		rv = apprentice_file(ms, &magic, &nmagic, fn, action);
180133359Sobrien		if (rv != 0)
181133359Sobrien			return -1;
182133359Sobrien		rv = apprentice_compile(ms, &magic, &nmagic, fn);
183133359Sobrien		free(magic);
184133359Sobrien		return rv;
185133359Sobrien	}
186159764Sobrien
18780588Sobrien#ifndef COMPILE_ONLY
188133359Sobrien	if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
189133359Sobrien		if (ms->flags & MAGIC_CHECK)
190139368Sobrien			file_magwarn(ms, "using regular magic file `%s'", fn);
191133359Sobrien		rv = apprentice_file(ms, &magic, &nmagic, fn, action);
192133359Sobrien		if (rv != 0)
193133359Sobrien			return -1;
194133359Sobrien		mapped = 0;
195133359Sobrien	}
19674784Sobrien
197133359Sobrien	mapped = rv;
19874784Sobrien
199133359Sobrien	if (magic == NULL || nmagic == 0) {
200133359Sobrien		file_delmagic(magic, mapped, nmagic);
201133359Sobrien		return -1;
202133359Sobrien	}
203133359Sobrien
20474784Sobrien	if ((ml = malloc(sizeof(*ml))) == NULL) {
205133359Sobrien		file_delmagic(magic, mapped, nmagic);
206169942Sobrien		file_oomem(ms, sizeof(*ml));
207133359Sobrien		return -1;
20874784Sobrien	}
20974784Sobrien
21074784Sobrien	ml->magic = magic;
21174784Sobrien	ml->nmagic = nmagic;
212133359Sobrien	ml->mapped = mapped;
21374784Sobrien
214133359Sobrien	mlist->prev->next = ml;
215133359Sobrien	ml->prev = mlist->prev;
216133359Sobrien	ml->next = mlist;
217133359Sobrien	mlist->prev = ml;
21874784Sobrien
219133359Sobrien	return 0;
22080588Sobrien#endif /* COMPILE_ONLY */
22174784Sobrien}
22274784Sobrien
223133359Sobrienprotected void
224133359Sobrienfile_delmagic(struct magic *p, int type, size_t entries)
225133359Sobrien{
226133359Sobrien	if (p == NULL)
227133359Sobrien		return;
228133359Sobrien	switch (type) {
229133359Sobrien	case 2:
230133359Sobrien		p--;
231133359Sobrien		(void)munmap((void *)p, sizeof(*p) * (entries + 1));
232133359Sobrien		break;
233133359Sobrien	case 1:
234133359Sobrien		p--;
235133359Sobrien		/*FALLTHROUGH*/
236133359Sobrien	case 0:
237133359Sobrien		free(p);
238133359Sobrien		break;
239133359Sobrien	default:
240133359Sobrien		abort();
241133359Sobrien	}
242133359Sobrien}
24374784Sobrien
244133359Sobrien
245103373Sobrien/* const char *fn: list of magic files */
246133359Sobrienprotected struct mlist *
247133359Sobrienfile_apprentice(struct magic_set *ms, const char *fn, int action)
24868349Sobrien{
249133359Sobrien	char *p, *mfn, *afn = NULL;
25068349Sobrien	int file_err, errs = -1;
251133359Sobrien	struct mlist *mlist;
252169942Sobrien	static const char mime[] = ".mime";
25368349Sobrien
254133359Sobrien	if (fn == NULL)
255133359Sobrien		fn = getenv("MAGIC");
256133359Sobrien	if (fn == NULL)
257133359Sobrien		fn = MAGIC;
258133359Sobrien
259133359Sobrien	if ((fn = mfn = strdup(fn)) == NULL) {
260169942Sobrien		file_oomem(ms, strlen(fn));
261133359Sobrien		return NULL;
26268349Sobrien	}
263133359Sobrien
264133359Sobrien	if ((mlist = malloc(sizeof(*mlist))) == NULL) {
265133359Sobrien		free(mfn);
266169942Sobrien		file_oomem(ms, sizeof(*mlist));
267133359Sobrien		return NULL;
268133359Sobrien	}
269133359Sobrien	mlist->next = mlist->prev = mlist;
270133359Sobrien
27168349Sobrien	while (fn) {
27268349Sobrien		p = strchr(fn, PATHSEP);
27368349Sobrien		if (p)
27468349Sobrien			*p++ = '\0';
275133359Sobrien		if (*fn == '\0')
276133359Sobrien			break;
277133359Sobrien		if (ms->flags & MAGIC_MIME) {
278169942Sobrien			size_t len = strlen(fn) + sizeof(mime);
279169942Sobrien			if ((afn = malloc(len)) == NULL) {
280133359Sobrien				free(mfn);
281133359Sobrien				free(mlist);
282169942Sobrien				file_oomem(ms, len);
283133359Sobrien				return NULL;
284133359Sobrien			}
285133359Sobrien			(void)strcpy(afn, fn);
286169942Sobrien			(void)strcat(afn, mime);
287133359Sobrien			fn = afn;
288133359Sobrien		}
289133359Sobrien		file_err = apprentice_1(ms, fn, action, mlist);
29068349Sobrien		if (file_err > errs)
29168349Sobrien			errs = file_err;
292133359Sobrien		if (afn) {
293133359Sobrien			free(afn);
294133359Sobrien			afn = NULL;
295133359Sobrien		}
29668349Sobrien		fn = p;
29768349Sobrien	}
298133359Sobrien	if (errs == -1) {
299133359Sobrien		free(mfn);
300133359Sobrien		free(mlist);
301133359Sobrien		mlist = NULL;
302133359Sobrien		file_error(ms, 0, "could not find any magic files!");
303133359Sobrien		return NULL;
304133359Sobrien	}
30568349Sobrien	free(mfn);
306133359Sobrien	return mlist;
30768349Sobrien}
30868349Sobrien
309169942Sobrien/*
310169942Sobrien * Get weight of this magic entry, for sorting purposes.
311169942Sobrien */
312159764Sobrienprivate size_t
313159764Sobrienapprentice_magic_strength(const struct magic *m)
314159764Sobrien{
315169942Sobrien#define MULT 10
316169942Sobrien	size_t val = 2 * MULT;	/* baseline strength */
317169942Sobrien
318159764Sobrien	switch (m->type) {
319159764Sobrien	case FILE_BYTE:
320169942Sobrien		val += 1 * MULT;
321169942Sobrien		break;
322159764Sobrien
323159764Sobrien	case FILE_SHORT:
324159764Sobrien	case FILE_LESHORT:
325159764Sobrien	case FILE_BESHORT:
326169942Sobrien		val += 2 * MULT;
327169942Sobrien		break;
328159764Sobrien
329159764Sobrien	case FILE_LONG:
330159764Sobrien	case FILE_LELONG:
331159764Sobrien	case FILE_BELONG:
332159764Sobrien	case FILE_MELONG:
333169942Sobrien		val += 4 * MULT;
334169942Sobrien		break;
335159764Sobrien
336159764Sobrien	case FILE_PSTRING:
337159764Sobrien	case FILE_STRING:
338169942Sobrien		val += m->vallen * MULT;
339169942Sobrien		break;
340169942Sobrien
341159764Sobrien	case FILE_BESTRING16:
342159764Sobrien	case FILE_LESTRING16:
343169942Sobrien		val += m->vallen * MULT / 2;
344169942Sobrien		break;
345169942Sobrien
346159764Sobrien	case FILE_SEARCH:
347169942Sobrien	case FILE_REGEX:
348169942Sobrien		val += m->vallen;
349169942Sobrien		break;
350159764Sobrien
351159764Sobrien	case FILE_DATE:
352159764Sobrien	case FILE_LEDATE:
353159764Sobrien	case FILE_BEDATE:
354159764Sobrien	case FILE_MEDATE:
355159764Sobrien	case FILE_LDATE:
356159764Sobrien	case FILE_LELDATE:
357159764Sobrien	case FILE_BELDATE:
358159764Sobrien	case FILE_MELDATE:
359169942Sobrien		val += 4 * MULT;
360169942Sobrien		break;
361159764Sobrien
362169942Sobrien	case FILE_QUAD:
363169942Sobrien	case FILE_BEQUAD:
364169942Sobrien	case FILE_LEQUAD:
365169942Sobrien	case FILE_QDATE:
366169942Sobrien	case FILE_LEQDATE:
367169942Sobrien	case FILE_BEQDATE:
368169942Sobrien	case FILE_QLDATE:
369169942Sobrien	case FILE_LEQLDATE:
370169942Sobrien	case FILE_BEQLDATE:
371169942Sobrien		val += 8 * MULT;
372169942Sobrien		break;
373169942Sobrien
374159764Sobrien	default:
375169942Sobrien		val = 0;
376169942Sobrien		(void)fprintf(stderr, "Bad type %d\n", m->type);
377169942Sobrien		abort();
378159764Sobrien	}
379169942Sobrien
380169942Sobrien	switch (m->reln) {
381169942Sobrien	case 'x':	/* matches anything penalize */
382169942Sobrien		val = 0;
383169942Sobrien		break;
384169942Sobrien
385169942Sobrien	case '!':
386169942Sobrien	case '=':	/* Exact match, prefer */
387169942Sobrien		val += MULT;
388169942Sobrien		break;
389169942Sobrien
390169942Sobrien	case '>':
391169942Sobrien	case '<':	/* comparison match reduce strength */
392169942Sobrien		val -= 2 * MULT;
393169942Sobrien		break;
394169942Sobrien
395169942Sobrien	case '^':
396169942Sobrien	case '&':	/* masking bits, we could count them too */
397169942Sobrien		val -= MULT;
398169942Sobrien		break;
399169942Sobrien
400169942Sobrien	default:
401169942Sobrien		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
402169942Sobrien		abort();
403169942Sobrien	}
404169942Sobrien	return val;
405159764Sobrien}
406159764Sobrien
407169942Sobrien/*
408169942Sobrien * Sort callback for sorting entries by "strength" (basically length)
409169942Sobrien */
410159764Sobrienprivate int
411159764Sobrienapprentice_sort(const void *a, const void *b)
412159764Sobrien{
413159764Sobrien	const struct magic_entry *ma = a;
414159764Sobrien	const struct magic_entry *mb = b;
415159764Sobrien	size_t sa = apprentice_magic_strength(ma->mp);
416159764Sobrien	size_t sb = apprentice_magic_strength(mb->mp);
417159764Sobrien	if (sa == sb)
418159764Sobrien		return 0;
419159764Sobrien	else if (sa > sb)
420159764Sobrien		return -1;
421159764Sobrien	else
422159764Sobrien		return 1;
423159764Sobrien}
424159764Sobrien
42574784Sobrien/*
42674784Sobrien * parse from a file
427103373Sobrien * const char *fn: name of magic file
42874784Sobrien */
429133359Sobrienprivate int
430133359Sobrienapprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
431133359Sobrien    const char *fn, int action)
43268349Sobrien{
433133359Sobrien	private const char hdr[] =
43468349Sobrien		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
43568349Sobrien	FILE *f;
43668349Sobrien	char line[BUFSIZ+1];
43768349Sobrien	int errs = 0;
438159764Sobrien	struct magic_entry *marray;
439169942Sobrien	uint32_t marraycount, i, mentrycount = 0;
440169942Sobrien	size_t lineno = 0;
44168349Sobrien
442169942Sobrien	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
443169942Sobrien
444139368Sobrien	f = fopen(ms->file = fn, "r");
44574784Sobrien	if (f == NULL) {
44668349Sobrien		if (errno != ENOENT)
447133359Sobrien			file_error(ms, errno, "cannot read magic file `%s'",
448133359Sobrien			    fn);
44968349Sobrien		return -1;
45068349Sobrien	}
45168349Sobrien
452169942Sobrien        maxmagic = MAXMAGIS;
453159825Sobrien	if ((marray = calloc(maxmagic, sizeof(*marray))) == NULL) {
454133359Sobrien		(void)fclose(f);
455169942Sobrien		file_oomem(ms, maxmagic * sizeof(*marray));
456133359Sobrien		return -1;
45774784Sobrien	}
458159764Sobrien	marraycount = 0;
45974784Sobrien
460133359Sobrien	/* print silly verbose header for USG compat. */
461133359Sobrien	if (action == FILE_CHECK)
462133359Sobrien		(void)fprintf(stderr, "%s\n", hdr);
463133359Sobrien
464169942Sobrien	/* read and parse this file */
465139368Sobrien	for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) {
466139368Sobrien		size_t len;
467139368Sobrien		len = strlen(line);
468169942Sobrien		if (len == 0) /* null line, garbage, etc */
46968349Sobrien			continue;
470169942Sobrien		if (line[len - 1] == '\n') {
471169942Sobrien			lineno++;
472159764Sobrien			line[len - 1] = '\0'; /* delete newline */
473169942Sobrien		}
474169942Sobrien		if (line[0] == '\0')	/* empty, do not parse */
475169942Sobrien			continue;
476169942Sobrien		if (line[0] == '#')	/* comment, do not parse */
477169942Sobrien			continue;
478169942Sobrien		if (parse(ms, &marray, &marraycount, line, lineno, action) != 0)
479159764Sobrien			errs++;
48068349Sobrien	}
48168349Sobrien
482133359Sobrien	(void)fclose(f);
483159764Sobrien	if (errs)
484159764Sobrien		goto out;
485159764Sobrien
486159764Sobrien#ifndef NOORDER
487159764Sobrien	qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
488159764Sobrien#endif
489159764Sobrien
490169942Sobrien	for (i = 0; i < marraycount; i++)
491159764Sobrien		mentrycount += marray[i].cont_count;
492159764Sobrien
493159764Sobrien	if ((*magicp = malloc(sizeof(**magicp) * mentrycount)) == NULL) {
494169942Sobrien		file_oomem(ms, sizeof(**magicp) * mentrycount);
495159764Sobrien		errs++;
496159764Sobrien		goto out;
497159764Sobrien	}
498159764Sobrien
499159764Sobrien	mentrycount = 0;
500159764Sobrien	for (i = 0; i < marraycount; i++) {
501159764Sobrien		(void)memcpy(*magicp + mentrycount, marray[i].mp,
502159764Sobrien		    marray[i].cont_count * sizeof(**magicp));
503159764Sobrien		mentrycount += marray[i].cont_count;
504159764Sobrien	}
505159764Sobrienout:
506159764Sobrien	for (i = 0; i < marraycount; i++)
507159764Sobrien		free(marray[i].mp);
508159764Sobrien	free(marray);
50974784Sobrien	if (errs) {
51074784Sobrien		*magicp = NULL;
51174784Sobrien		*nmagicp = 0;
512159764Sobrien		return errs;
513159764Sobrien	} else {
514159764Sobrien		*nmagicp = mentrycount;
515159764Sobrien		return 0;
51674784Sobrien	}
517159764Sobrien
51868349Sobrien}
51968349Sobrien
52068349Sobrien/*
52168349Sobrien * extend the sign bit if the comparison is to be signed
52268349Sobrien */
523169942Sobrienprotected uint64_t
524169942Sobrienfile_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
52568349Sobrien{
52668349Sobrien	if (!(m->flag & UNSIGNED))
52768349Sobrien		switch(m->type) {
52868349Sobrien		/*
52968349Sobrien		 * Do not remove the casts below.  They are
53068349Sobrien		 * vital.  When later compared with the data,
53168349Sobrien		 * the sign extension must have happened.
53268349Sobrien		 */
533133359Sobrien		case FILE_BYTE:
53468349Sobrien			v = (char) v;
53568349Sobrien			break;
536133359Sobrien		case FILE_SHORT:
537133359Sobrien		case FILE_BESHORT:
538133359Sobrien		case FILE_LESHORT:
53968349Sobrien			v = (short) v;
54068349Sobrien			break;
541133359Sobrien		case FILE_DATE:
542133359Sobrien		case FILE_BEDATE:
543133359Sobrien		case FILE_LEDATE:
544159764Sobrien		case FILE_MEDATE:
545133359Sobrien		case FILE_LDATE:
546133359Sobrien		case FILE_BELDATE:
547133359Sobrien		case FILE_LELDATE:
548159764Sobrien		case FILE_MELDATE:
549133359Sobrien		case FILE_LONG:
550133359Sobrien		case FILE_BELONG:
551133359Sobrien		case FILE_LELONG:
552159764Sobrien		case FILE_MELONG:
553103373Sobrien			v = (int32_t) v;
55468349Sobrien			break;
555169942Sobrien		case FILE_QUAD:
556169942Sobrien		case FILE_BEQUAD:
557169942Sobrien		case FILE_LEQUAD:
558169942Sobrien		case FILE_QDATE:
559169942Sobrien		case FILE_QLDATE:
560169942Sobrien		case FILE_BEQDATE:
561169942Sobrien		case FILE_BEQLDATE:
562169942Sobrien		case FILE_LEQDATE:
563169942Sobrien		case FILE_LEQLDATE:
564169942Sobrien			v = (int64_t) v;
565169942Sobrien			break;
566133359Sobrien		case FILE_STRING:
567133359Sobrien		case FILE_PSTRING:
568139368Sobrien		case FILE_BESTRING16:
569139368Sobrien		case FILE_LESTRING16:
570133359Sobrien		case FILE_REGEX:
571159764Sobrien		case FILE_SEARCH:
572103373Sobrien			break;
57368349Sobrien		default:
574133359Sobrien			if (ms->flags & MAGIC_CHECK)
575139368Sobrien			    file_magwarn(ms, "cannot happen: m->type=%d\n",
576133359Sobrien				    m->type);
577133359Sobrien			return ~0U;
57868349Sobrien		}
57968349Sobrien	return v;
58068349Sobrien}
58168349Sobrien
58268349Sobrien/*
58368349Sobrien * parse one line from magic file, put into magic[index++] if valid
58468349Sobrien */
585133359Sobrienprivate int
586159764Sobrienparse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
587169942Sobrien    const char *line, size_t lineno, int action)
58868349Sobrien{
589169942Sobrien	size_t i;
590159764Sobrien	struct magic_entry *me;
59168349Sobrien	struct magic *m;
592159764Sobrien	const char *l = line;
59384685Sobrien	char *t;
594133359Sobrien	private const char *fops = FILE_OPS;
595169942Sobrien	uint64_t val;
596159825Sobrien	uint32_t cont_level;
59768349Sobrien
598159764Sobrien	cont_level = 0;
59968349Sobrien
60068349Sobrien	while (*l == '>') {
60168349Sobrien		++l;		/* step over */
602159764Sobrien		cont_level++;
60368349Sobrien	}
60468349Sobrien
605159764Sobrien#define ALLOC_CHUNK	(size_t)10
606159764Sobrien#define ALLOC_INCR	(size_t)200
607159764Sobrien
608159764Sobrien	if (cont_level != 0) {
609159764Sobrien		if (*nmentryp == 0) {
610159764Sobrien			file_error(ms, 0, "No current entry for continuation");
611159764Sobrien			return -1;
612159764Sobrien		}
613159764Sobrien		me = &(*mentryp)[*nmentryp - 1];
614159764Sobrien		if (me->cont_count == me->max_count) {
615159764Sobrien			struct magic *nm;
616159764Sobrien			size_t cnt = me->max_count + ALLOC_CHUNK;
617159764Sobrien			if ((nm = realloc(me->mp, sizeof(*nm) * cnt)) == NULL) {
618169942Sobrien				file_oomem(ms, sizeof(*nm) * cnt);
619159764Sobrien				return -1;
620159764Sobrien			}
621159764Sobrien			me->mp = m = nm;
622159764Sobrien			me->max_count = cnt;
623159764Sobrien		}
624159764Sobrien		m = &me->mp[me->cont_count++];
625159764Sobrien		memset(m, 0, sizeof(*m));
626159764Sobrien		m->cont_level = cont_level;
627159764Sobrien	} else {
628159764Sobrien		if (*nmentryp == maxmagic) {
629159764Sobrien			struct magic_entry *mp;
630159764Sobrien
631159764Sobrien			maxmagic += ALLOC_INCR;
632159764Sobrien			if ((mp = realloc(*mentryp, sizeof(*mp) * maxmagic)) ==
633159764Sobrien			    NULL) {
634169942Sobrien				file_oomem(ms, sizeof(*mp) * maxmagic);
635159764Sobrien				return -1;
636159764Sobrien			}
637159764Sobrien			(void)memset(&mp[*nmentryp], 0, sizeof(*mp) *
638159764Sobrien			    ALLOC_INCR);
639159764Sobrien			*mentryp = mp;
640159764Sobrien		}
641159764Sobrien		me = &(*mentryp)[*nmentryp];
642159764Sobrien		if (me->mp == NULL) {
643159764Sobrien			if ((m = malloc(sizeof(*m) * ALLOC_CHUNK)) == NULL) {
644169942Sobrien				file_oomem(ms, sizeof(*m) * ALLOC_CHUNK);
645159764Sobrien				return -1;
646159764Sobrien			}
647159764Sobrien			me->mp = m;
648159764Sobrien			me->max_count = ALLOC_CHUNK;
649159764Sobrien		} else
650159764Sobrien			m = me->mp;
651159764Sobrien		memset(m, 0, sizeof(*m));
652159764Sobrien		m->cont_level = 0;
653159764Sobrien		me->cont_count = 1;
654159764Sobrien	}
655169942Sobrien	m->lineno = lineno;
656159764Sobrien
657159764Sobrien	if (m->cont_level != 0 && *l == '&') {
658159764Sobrien                ++l;            /* step over */
659159764Sobrien                m->flag |= OFFADD;
660159764Sobrien        }
66168349Sobrien	if (m->cont_level != 0 && *l == '(') {
66268349Sobrien		++l;		/* step over */
66368349Sobrien		m->flag |= INDIR;
664159764Sobrien		if (m->flag & OFFADD)
665159764Sobrien			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
66668349Sobrien	}
66768349Sobrien	if (m->cont_level != 0 && *l == '&') {
66868349Sobrien                ++l;            /* step over */
66980588Sobrien                m->flag |= OFFADD;
67068349Sobrien        }
67168349Sobrien
67268349Sobrien	/* get offset, then skip over it */
673133359Sobrien	m->offset = (uint32_t)strtoul(l, &t, 0);
67468349Sobrien        if (l == t)
675133359Sobrien		if (ms->flags & MAGIC_CHECK)
676139368Sobrien			file_magwarn(ms, "offset `%s' invalid", l);
67768349Sobrien        l = t;
67868349Sobrien
67968349Sobrien	if (m->flag & INDIR) {
680133359Sobrien		m->in_type = FILE_LONG;
68174784Sobrien		m->in_offset = 0;
68268349Sobrien		/*
68368349Sobrien		 * read [.lbs][+-]nnnnn)
68468349Sobrien		 */
68568349Sobrien		if (*l == '.') {
68668349Sobrien			l++;
68768349Sobrien			switch (*l) {
68868349Sobrien			case 'l':
689133359Sobrien				m->in_type = FILE_LELONG;
69068349Sobrien				break;
69168349Sobrien			case 'L':
692133359Sobrien				m->in_type = FILE_BELONG;
69368349Sobrien				break;
694159764Sobrien			case 'm':
695159764Sobrien				m->in_type = FILE_MELONG;
696159764Sobrien				break;
69768349Sobrien			case 'h':
69868349Sobrien			case 's':
699133359Sobrien				m->in_type = FILE_LESHORT;
70068349Sobrien				break;
70168349Sobrien			case 'H':
70268349Sobrien			case 'S':
703133359Sobrien				m->in_type = FILE_BESHORT;
70468349Sobrien				break;
70568349Sobrien			case 'c':
70668349Sobrien			case 'b':
70768349Sobrien			case 'C':
70868349Sobrien			case 'B':
709133359Sobrien				m->in_type = FILE_BYTE;
71068349Sobrien				break;
71168349Sobrien			default:
712133359Sobrien				if (ms->flags & MAGIC_CHECK)
713139368Sobrien					file_magwarn(ms,
714139368Sobrien					    "indirect offset type `%c' invalid",
715133359Sobrien					    *l);
71668349Sobrien				break;
71768349Sobrien			}
71868349Sobrien			l++;
71968349Sobrien		}
72080588Sobrien		if (*l == '~') {
721159764Sobrien			m->in_op |= FILE_OPINVERSE;
72280588Sobrien			l++;
72380588Sobrien		}
72480588Sobrien		switch (*l) {
72580588Sobrien		case '&':
726133359Sobrien			m->in_op |= FILE_OPAND;
72780588Sobrien			l++;
72880588Sobrien			break;
72980588Sobrien		case '|':
730133359Sobrien			m->in_op |= FILE_OPOR;
73180588Sobrien			l++;
73280588Sobrien			break;
73380588Sobrien		case '^':
734133359Sobrien			m->in_op |= FILE_OPXOR;
73580588Sobrien			l++;
73680588Sobrien			break;
73780588Sobrien		case '+':
738133359Sobrien			m->in_op |= FILE_OPADD;
73980588Sobrien			l++;
74080588Sobrien			break;
74180588Sobrien		case '-':
742133359Sobrien			m->in_op |= FILE_OPMINUS;
74380588Sobrien			l++;
74480588Sobrien			break;
74580588Sobrien		case '*':
746133359Sobrien			m->in_op |= FILE_OPMULTIPLY;
74780588Sobrien			l++;
74880588Sobrien			break;
74980588Sobrien		case '/':
750133359Sobrien			m->in_op |= FILE_OPDIVIDE;
75180588Sobrien			l++;
75280588Sobrien			break;
75380588Sobrien		case '%':
754133359Sobrien			m->in_op |= FILE_OPMODULO;
75580588Sobrien			l++;
75680588Sobrien			break;
75780588Sobrien		}
758159764Sobrien		if (*l == '(') {
759159764Sobrien			m->in_op |= FILE_OPINDIRECT;
760159764Sobrien			l++;
761159764Sobrien		}
762159764Sobrien		if (isdigit((unsigned char)*l) || *l == '-') {
763159764Sobrien			m->in_offset = (int32_t)strtol(l, &t, 0);
764159764Sobrien			l = t;
765159764Sobrien		}
766159764Sobrien		if (*l++ != ')' ||
767159764Sobrien		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
768133359Sobrien			if (ms->flags & MAGIC_CHECK)
769139368Sobrien				file_magwarn(ms,
770139368Sobrien				    "missing ')' in indirect offset");
77168349Sobrien	}
77268349Sobrien
77368349Sobrien
77468349Sobrien	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
77568349Sobrien		++l;
77668349Sobrien	EATAB;
77768349Sobrien
77868349Sobrien	if (*l == 'u') {
77968349Sobrien		++l;
78068349Sobrien		m->flag |= UNSIGNED;
78168349Sobrien	}
78268349Sobrien
78368349Sobrien	/* get type, skip it */
784169942Sobrien	for (i = 0; i < file_nnames; i++) {
785169942Sobrien		size_t len = strlen(file_names[i]);
786169942Sobrien		if (strncmp(l, file_names[i], len) == 0) {
787169942Sobrien			m->type = i;
788169942Sobrien			l+= len;
789169942Sobrien			break;
790169942Sobrien		}
791169942Sobrien	}
792169942Sobrien	if (i == file_nnames) {
793133359Sobrien		if (ms->flags & MAGIC_CHECK)
794139368Sobrien			file_magwarn(ms, "type `%s' invalid", l);
79568349Sobrien		return -1;
79668349Sobrien	}
79768349Sobrien	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
79880588Sobrien	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
79980588Sobrien	if (*l == '~') {
800139368Sobrien		if (!IS_STRING(m->type))
801159764Sobrien			m->mask_op |= FILE_OPINVERSE;
80268349Sobrien		++l;
80380588Sobrien	}
804133359Sobrien	if ((t = strchr(fops,  *l)) != NULL) {
805133359Sobrien		uint32_t op = (uint32_t)(t - fops);
806159764Sobrien		if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) {
80780588Sobrien			++l;
808133359Sobrien			m->mask_op |= op;
809169942Sobrien			val = (uint64_t)strtoull(l, &t, 0);
810159764Sobrien			l = t;
811133359Sobrien			m->mask = file_signextend(ms, m, val);
81280588Sobrien			eatsize(&l);
81380588Sobrien		} else {
81480588Sobrien			m->mask = 0L;
815133359Sobrien			while (!isspace((unsigned char)*++l)) {
81668349Sobrien				switch (*l) {
81768349Sobrien				case CHAR_IGNORE_LOWERCASE:
81868349Sobrien					m->mask |= STRING_IGNORE_LOWERCASE;
81968349Sobrien					break;
82068349Sobrien				case CHAR_COMPACT_BLANK:
82168349Sobrien					m->mask |= STRING_COMPACT_BLANK;
82268349Sobrien					break;
82368349Sobrien				case CHAR_COMPACT_OPTIONAL_BLANK:
82468349Sobrien					m->mask |=
82568349Sobrien					    STRING_COMPACT_OPTIONAL_BLANK;
82668349Sobrien					break;
82768349Sobrien				default:
828133359Sobrien					if (ms->flags & MAGIC_CHECK)
829139368Sobrien						file_magwarn(ms,
830139368Sobrien						"string extension `%c' invalid",
831133359Sobrien						*l);
83268349Sobrien					return -1;
83368349Sobrien				}
83468349Sobrien			}
835159764Sobrien			++l;
83668349Sobrien		}
83780588Sobrien	}
838133359Sobrien	/*
839133359Sobrien	 * We used to set mask to all 1's here, instead let's just not do
840133359Sobrien	 * anything if mask = 0 (unless you have a better idea)
841133359Sobrien	 */
84268349Sobrien	EATAB;
84368349Sobrien
84468349Sobrien	switch (*l) {
84568349Sobrien	case '>':
84668349Sobrien	case '<':
84768349Sobrien	/* Old-style anding: "0 byte &0x80 dynamically linked" */
84868349Sobrien	case '&':
84968349Sobrien	case '^':
85068349Sobrien	case '=':
85168349Sobrien  		m->reln = *l;
85268349Sobrien  		++l;
85368349Sobrien		if (*l == '=') {
85468349Sobrien		   /* HP compat: ignore &= etc. */
85568349Sobrien		   ++l;
85668349Sobrien		}
85768349Sobrien		break;
85868349Sobrien	case '!':
859159764Sobrien		m->reln = *l;
860159764Sobrien		++l;
861159764Sobrien		break;
86268349Sobrien	default:
863159764Sobrien		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
864159764Sobrien		    isspace((unsigned char)l[1])) || !l[1])) {
86568349Sobrien			m->reln = *l;
86668349Sobrien			++l;
86768349Sobrien			goto GetDesc;	/* Bill The Cat */
86868349Sobrien		}
86968349Sobrien  		m->reln = '=';
87068349Sobrien		break;
87168349Sobrien	}
87268349Sobrien  	EATAB;
87368349Sobrien
874133359Sobrien	if (getvalue(ms, m, &l))
87568349Sobrien		return -1;
87668349Sobrien	/*
87768349Sobrien	 * TODO finish this macro and start using it!
87868349Sobrien	 * #define offsetcheck {if (offset > HOWMANY-1)
87968349Sobrien	 *	magwarn("offset too big"); }
88068349Sobrien	 */
88168349Sobrien
88268349Sobrien	/*
88368349Sobrien	 * now get last part - the description
88468349Sobrien	 */
88568349SobrienGetDesc:
88668349Sobrien	EATAB;
88768349Sobrien	if (l[0] == '\b') {
88868349Sobrien		++l;
88968349Sobrien		m->nospflag = 1;
89068349Sobrien	} else if ((l[0] == '\\') && (l[1] == 'b')) {
89168349Sobrien		++l;
89268349Sobrien		++l;
89368349Sobrien		m->nospflag = 1;
89468349Sobrien	} else
89568349Sobrien		m->nospflag = 0;
896169942Sobrien	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
897169942Sobrien		continue;
898169942Sobrien	if (i == sizeof(m->desc)) {
899169942Sobrien		m->desc[sizeof(m->desc) - 1] = '\0';
900169942Sobrien		if (ms->flags & MAGIC_CHECK)
901169942Sobrien			file_magwarn(ms, "description `%s' truncated", m->desc);
902169942Sobrien	}
90368349Sobrien
904169942Sobrien        /*
905169942Sobrien	 * We only do this check while compiling, or if any of the magic
906169942Sobrien	 * files were not compiled.
907169942Sobrien         */
908169942Sobrien        if (ms->flags & MAGIC_CHECK) {
909169942Sobrien		if (check_format(ms, m) == -1)
910133359Sobrien			return -1;
911133359Sobrien	}
912103373Sobrien#ifndef COMPILE_ONLY
913133359Sobrien	if (action == FILE_CHECK) {
914133359Sobrien		file_mdump(m);
91568349Sobrien	}
916103373Sobrien#endif
917159764Sobrien	if (m->cont_level == 0)
918159764Sobrien		++(*nmentryp);		/* make room for next */
91968349Sobrien	return 0;
92068349Sobrien}
92168349Sobrien
922169942Sobrienprivate int
923169942Sobriencheck_format_type(const char *ptr, int type)
924169942Sobrien{
925169942Sobrien	int quad = 0;
926169942Sobrien	if (*ptr == '\0') {
927169942Sobrien		/* Missing format string; bad */
928169942Sobrien		return -1;
929169942Sobrien	}
930169942Sobrien
931169942Sobrien	switch (type) {
932169942Sobrien	case FILE_FMT_QUAD:
933169942Sobrien		quad = 1;
934169942Sobrien		/*FALLTHROUGH*/
935169942Sobrien	case FILE_FMT_NUM:
936169942Sobrien		if (*ptr == '-')
937169942Sobrien			ptr++;
938169942Sobrien		if (*ptr == '.')
939169942Sobrien			ptr++;
940169942Sobrien		while (isdigit((unsigned char)*ptr)) ptr++;
941169942Sobrien		if (*ptr == '.')
942169942Sobrien			ptr++;
943169942Sobrien		while (isdigit((unsigned char)*ptr)) ptr++;
944169942Sobrien		if (quad) {
945169942Sobrien			if (*ptr++ != 'l')
946169942Sobrien				return -1;
947169942Sobrien			if (*ptr++ != 'l')
948169942Sobrien				return -1;
949169942Sobrien		}
950169942Sobrien
951169942Sobrien		switch (*ptr++) {
952169942Sobrien		case 'l':
953169942Sobrien			switch (*ptr++) {
954169942Sobrien			case 'i':
955169942Sobrien			case 'd':
956169942Sobrien			case 'u':
957169942Sobrien			case 'x':
958169942Sobrien			case 'X':
959169942Sobrien				return 0;
960169942Sobrien			default:
961169942Sobrien				return -1;
962169942Sobrien			}
963169942Sobrien
964169942Sobrien		case 'h':
965169942Sobrien			switch (*ptr++) {
966169942Sobrien			case 'h':
967169942Sobrien				switch (*ptr++) {
968169942Sobrien				case 'i':
969169942Sobrien				case 'd':
970169942Sobrien				case 'u':
971169942Sobrien				case 'x':
972169942Sobrien				case 'X':
973169942Sobrien					return 0;
974169942Sobrien				default:
975169942Sobrien					return -1;
976169942Sobrien				}
977169942Sobrien			case 'd':
978169942Sobrien				return 0;
979169942Sobrien			default:
980169942Sobrien				return -1;
981169942Sobrien			}
982169942Sobrien
983169942Sobrien		case 'i':
984169942Sobrien		case 'c':
985169942Sobrien		case 'd':
986169942Sobrien		case 'u':
987169942Sobrien		case 'x':
988169942Sobrien		case 'X':
989169942Sobrien			return 0;
990169942Sobrien
991169942Sobrien		default:
992169942Sobrien			return -1;
993169942Sobrien		}
994169942Sobrien
995169942Sobrien	case FILE_FMT_STR:
996169942Sobrien		if (*ptr == '-')
997169942Sobrien			ptr++;
998169942Sobrien		while (isdigit((unsigned char )*ptr))
999169942Sobrien			ptr++;
1000169942Sobrien		if (*ptr == '.') {
1001169942Sobrien			ptr++;
1002169942Sobrien			while (isdigit((unsigned char )*ptr))
1003169942Sobrien				ptr++;
1004169942Sobrien		}
1005169942Sobrien
1006169942Sobrien		switch (*ptr++) {
1007169942Sobrien		case 's':
1008169942Sobrien			return 0;
1009169942Sobrien		default:
1010169942Sobrien			return -1;
1011169942Sobrien		}
1012169942Sobrien
1013169942Sobrien	default:
1014169942Sobrien		/* internal error */
1015169942Sobrien		abort();
1016169942Sobrien	}
1017169942Sobrien	/*NOTREACHED*/
1018169942Sobrien	return -1;
1019169942Sobrien}
1020169942Sobrien
1021133359Sobrien/*
1022133359Sobrien * Check that the optional printf format in description matches
1023133359Sobrien * the type of the magic.
1024133359Sobrien */
1025133359Sobrienprivate int
1026139368Sobriencheck_format(struct magic_set *ms, struct magic *m)
1027133359Sobrien{
1028133359Sobrien	char *ptr;
1029133359Sobrien
1030133359Sobrien	for (ptr = m->desc; *ptr; ptr++)
1031133359Sobrien		if (*ptr == '%')
1032133359Sobrien			break;
1033133359Sobrien	if (*ptr == '\0') {
1034133359Sobrien		/* No format string; ok */
1035133359Sobrien		return 1;
1036133359Sobrien	}
1037169942Sobrien
1038169942Sobrien	assert(file_nformats == file_nnames);
1039169942Sobrien
1040169942Sobrien	if (m->type >= file_nformats) {
1041169942Sobrien		file_error(ms, 0, "Internal error inconsistency between "
1042169942Sobrien		    "m->type and format strings");
1043169942Sobrien		return -1;
1044133359Sobrien	}
1045169942Sobrien	if (file_formats[m->type] == FILE_FMT_NONE) {
1046169942Sobrien		file_error(ms, 0, "No format string for `%s' with description "
1047169942Sobrien		    "`%s'", m->desc, file_names[m->type]);
1048169942Sobrien		return -1;
1049133359Sobrien	}
1050169942Sobrien
1051169942Sobrien	ptr++;
1052169942Sobrien	if (check_format_type(ptr, file_formats[m->type]) == -1) {
1053169942Sobrien		/*
1054169942Sobrien		 * TODO: this error message is unhelpful if the format
1055169942Sobrien		 * string is not one character long
1056169942Sobrien		 */
1057169942Sobrien		file_error(ms, 0, "Printf format `%c' is not valid for type "
1058169942Sobrien		    " `%s' in description `%s'", *ptr,
1059169942Sobrien		    file_names[m->type], m->desc);
1060169942Sobrien		return -1;
1061169942Sobrien	}
1062169942Sobrien
1063133359Sobrien	for (; *ptr; ptr++) {
1064169942Sobrien		if (*ptr == '%') {
1065169942Sobrien			file_error(ms, 0,
1066169942Sobrien			    "Too many format strings (should have at most one) "
1067169942Sobrien			    "for `%s' with description `%s'",
1068169942Sobrien			    file_names[m->type], m->desc);
1069169942Sobrien			return -1;
1070133359Sobrien		}
1071133359Sobrien	}
1072169942Sobrien	return 0;
1073133359Sobrien}
1074133359Sobrien
107568349Sobrien/*
107668349Sobrien * Read a numeric value from a pointer, into the value union of a magic
107768349Sobrien * pointer, according to the magic type.  Update the string pointer to point
107868349Sobrien * just after the number read.  Return 0 for success, non-zero for failure.
107968349Sobrien */
1080133359Sobrienprivate int
1081159764Sobriengetvalue(struct magic_set *ms, struct magic *m, const char **p)
108268349Sobrien{
108368349Sobrien	int slen;
108468349Sobrien
1085133359Sobrien	switch (m->type) {
1086139368Sobrien	case FILE_BESTRING16:
1087139368Sobrien	case FILE_LESTRING16:
1088133359Sobrien	case FILE_STRING:
1089133359Sobrien	case FILE_PSTRING:
1090133359Sobrien	case FILE_REGEX:
1091159764Sobrien	case FILE_SEARCH:
1092133359Sobrien		*p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen);
1093133359Sobrien		if (*p == NULL) {
1094133359Sobrien			if (ms->flags & MAGIC_CHECK)
1095139368Sobrien				file_magwarn(ms, "cannot get string from `%s'",
1096133359Sobrien				    m->value.s);
1097133359Sobrien			return -1;
1098133359Sobrien		}
109968349Sobrien		m->vallen = slen;
1100133359Sobrien		return 0;
1101133359Sobrien	default:
110268349Sobrien		if (m->reln != 'x') {
1103159764Sobrien			char *ep;
1104169942Sobrien			m->value.q = file_signextend(ms, m,
1105169942Sobrien			    (uint64_t)strtoull(*p, &ep, 0));
1106159764Sobrien			*p = ep;
110768349Sobrien			eatsize(p);
110868349Sobrien		}
1109133359Sobrien		return 0;
1110133359Sobrien	}
111168349Sobrien}
111268349Sobrien
111368349Sobrien/*
111468349Sobrien * Convert a string containing C character escapes.  Stop at an unescaped
111568349Sobrien * space or tab.
111668349Sobrien * Copy the converted version to "p", returning its length in *slen.
111768349Sobrien * Return updated scan pointer as function result.
111868349Sobrien */
1119159764Sobrienprivate const char *
1120159764Sobriengetstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen)
112168349Sobrien{
1122159764Sobrien	const char *origs = s;
1123159764Sobrien	char 	*origp = p;
112468349Sobrien	char	*pmax = p + plen - 1;
112568349Sobrien	int	c;
112668349Sobrien	int	val;
112768349Sobrien
112868349Sobrien	while ((c = *s++) != '\0') {
112968349Sobrien		if (isspace((unsigned char) c))
113068349Sobrien			break;
113168349Sobrien		if (p >= pmax) {
1132133359Sobrien			file_error(ms, 0, "string too long: `%s'", origs);
1133133359Sobrien			return NULL;
113468349Sobrien		}
113568349Sobrien		if(c == '\\') {
113668349Sobrien			switch(c = *s++) {
113768349Sobrien
113868349Sobrien			case '\0':
113968349Sobrien				goto out;
114068349Sobrien
114168349Sobrien			default:
114268349Sobrien				*p++ = (char) c;
114368349Sobrien				break;
114468349Sobrien
114568349Sobrien			case 'n':
114668349Sobrien				*p++ = '\n';
114768349Sobrien				break;
114868349Sobrien
114968349Sobrien			case 'r':
115068349Sobrien				*p++ = '\r';
115168349Sobrien				break;
115268349Sobrien
115368349Sobrien			case 'b':
115468349Sobrien				*p++ = '\b';
115568349Sobrien				break;
115668349Sobrien
115768349Sobrien			case 't':
115868349Sobrien				*p++ = '\t';
115968349Sobrien				break;
116068349Sobrien
116168349Sobrien			case 'f':
116268349Sobrien				*p++ = '\f';
116368349Sobrien				break;
116468349Sobrien
116568349Sobrien			case 'v':
116668349Sobrien				*p++ = '\v';
116768349Sobrien				break;
116868349Sobrien
116968349Sobrien			/* \ and up to 3 octal digits */
117068349Sobrien			case '0':
117168349Sobrien			case '1':
117268349Sobrien			case '2':
117368349Sobrien			case '3':
117468349Sobrien			case '4':
117568349Sobrien			case '5':
117668349Sobrien			case '6':
117768349Sobrien			case '7':
117868349Sobrien				val = c - '0';
117968349Sobrien				c = *s++;  /* try for 2 */
118068349Sobrien				if(c >= '0' && c <= '7') {
118168349Sobrien					val = (val<<3) | (c - '0');
118268349Sobrien					c = *s++;  /* try for 3 */
118368349Sobrien					if(c >= '0' && c <= '7')
118468349Sobrien						val = (val<<3) | (c-'0');
118568349Sobrien					else
118668349Sobrien						--s;
118768349Sobrien				}
118868349Sobrien				else
118968349Sobrien					--s;
119068349Sobrien				*p++ = (char)val;
119168349Sobrien				break;
119268349Sobrien
119368349Sobrien			/* \x and up to 2 hex digits */
119468349Sobrien			case 'x':
119568349Sobrien				val = 'x';	/* Default if no digits */
119668349Sobrien				c = hextoint(*s++);	/* Get next char */
119768349Sobrien				if (c >= 0) {
119868349Sobrien					val = c;
119968349Sobrien					c = hextoint(*s++);
120068349Sobrien					if (c >= 0)
120168349Sobrien						val = (val << 4) + c;
120268349Sobrien					else
120368349Sobrien						--s;
120468349Sobrien				} else
120568349Sobrien					--s;
120668349Sobrien				*p++ = (char)val;
120768349Sobrien				break;
120868349Sobrien			}
120968349Sobrien		} else
121068349Sobrien			*p++ = (char)c;
121168349Sobrien	}
121268349Sobrienout:
121368349Sobrien	*p = '\0';
121468349Sobrien	*slen = p - origp;
121568349Sobrien	return s;
121668349Sobrien}
121768349Sobrien
121868349Sobrien
121968349Sobrien/* Single hex char to int; -1 if not a hex char. */
1220133359Sobrienprivate int
1221103373Sobrienhextoint(int c)
122268349Sobrien{
122368349Sobrien	if (!isascii((unsigned char) c))
122468349Sobrien		return -1;
122568349Sobrien	if (isdigit((unsigned char) c))
122668349Sobrien		return c - '0';
122768349Sobrien	if ((c >= 'a')&&(c <= 'f'))
122868349Sobrien		return c + 10 - 'a';
122968349Sobrien	if (( c>= 'A')&&(c <= 'F'))
123068349Sobrien		return c + 10 - 'A';
123168349Sobrien	return -1;
123268349Sobrien}
123368349Sobrien
123468349Sobrien
123568349Sobrien/*
123668349Sobrien * Print a string containing C character escapes.
123768349Sobrien */
1238133359Sobrienprotected void
1239133359Sobrienfile_showstr(FILE *fp, const char *s, size_t len)
124068349Sobrien{
124168349Sobrien	char	c;
124268349Sobrien
124368349Sobrien	for (;;) {
124468349Sobrien		c = *s++;
1245133359Sobrien		if (len == ~0U) {
124668349Sobrien			if (c == '\0')
124768349Sobrien				break;
124868349Sobrien		}
124968349Sobrien		else  {
125068349Sobrien			if (len-- == 0)
125168349Sobrien				break;
125268349Sobrien		}
125368349Sobrien		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
125468349Sobrien			(void) fputc(c, fp);
125568349Sobrien		else {
125668349Sobrien			(void) fputc('\\', fp);
125768349Sobrien			switch (c) {
125868349Sobrien
125968349Sobrien			case '\n':
126068349Sobrien				(void) fputc('n', fp);
126168349Sobrien				break;
126268349Sobrien
126368349Sobrien			case '\r':
126468349Sobrien				(void) fputc('r', fp);
126568349Sobrien				break;
126668349Sobrien
126768349Sobrien			case '\b':
126868349Sobrien				(void) fputc('b', fp);
126968349Sobrien				break;
127068349Sobrien
127168349Sobrien			case '\t':
127268349Sobrien				(void) fputc('t', fp);
127368349Sobrien				break;
127468349Sobrien
127568349Sobrien			case '\f':
127668349Sobrien				(void) fputc('f', fp);
127768349Sobrien				break;
127868349Sobrien
127968349Sobrien			case '\v':
128068349Sobrien				(void) fputc('v', fp);
128168349Sobrien				break;
128268349Sobrien
128368349Sobrien			default:
128468349Sobrien				(void) fprintf(fp, "%.3o", c & 0377);
128568349Sobrien				break;
128668349Sobrien			}
128768349Sobrien		}
128868349Sobrien	}
128968349Sobrien}
129068349Sobrien
129168349Sobrien/*
129268349Sobrien * eatsize(): Eat the size spec from a number [eg. 10UL]
129368349Sobrien */
1294133359Sobrienprivate void
1295159764Sobrieneatsize(const char **p)
129668349Sobrien{
1297159764Sobrien	const char *l = *p;
129868349Sobrien
129968349Sobrien	if (LOWCASE(*l) == 'u')
130068349Sobrien		l++;
130168349Sobrien
130268349Sobrien	switch (LOWCASE(*l)) {
130368349Sobrien	case 'l':    /* long */
130468349Sobrien	case 's':    /* short */
130568349Sobrien	case 'h':    /* short */
130668349Sobrien	case 'b':    /* char/byte */
130768349Sobrien	case 'c':    /* char/byte */
130868349Sobrien		l++;
130968349Sobrien		/*FALLTHROUGH*/
131068349Sobrien	default:
131168349Sobrien		break;
131268349Sobrien	}
131368349Sobrien
131468349Sobrien	*p = l;
131568349Sobrien}
131674784Sobrien
131774784Sobrien/*
1318103373Sobrien * handle a compiled file.
131974784Sobrien */
1320133359Sobrienprivate int
1321133359Sobrienapprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
1322133359Sobrien    const char *fn)
132374784Sobrien{
132474784Sobrien	int fd;
132574784Sobrien	struct stat st;
1326103373Sobrien	uint32_t *ptr;
1327103373Sobrien	uint32_t version;
132874784Sobrien	int needsbyteswap;
1329133359Sobrien	char buf[MAXPATHLEN];
1330139368Sobrien	char *dbname = mkdbname(fn, buf, sizeof(buf), 0);
1331133359Sobrien	void *mm = NULL;
133274784Sobrien
133380588Sobrien	if (dbname == NULL)
133480588Sobrien		return -1;
133580588Sobrien
1336159764Sobrien	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
133774784Sobrien		return -1;
133874784Sobrien
133974784Sobrien	if (fstat(fd, &st) == -1) {
1340133359Sobrien		file_error(ms, errno, "cannot stat `%s'", dbname);
134174784Sobrien		goto error;
134274784Sobrien	}
1343133359Sobrien	if (st.st_size < 16) {
1344133359Sobrien		file_error(ms, 0, "file `%s' is too small", dbname);
1345133359Sobrien		goto error;
1346133359Sobrien	}
134774784Sobrien
134880588Sobrien#ifdef QUICK
1349103373Sobrien	if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
135074784Sobrien	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
1351133359Sobrien		file_error(ms, errno, "cannot map `%s'", dbname);
135274784Sobrien		goto error;
135374784Sobrien	}
1354133359Sobrien#define RET	2
135580588Sobrien#else
1356103373Sobrien	if ((mm = malloc((size_t)st.st_size)) == NULL) {
1357169942Sobrien		file_oomem(ms, (size_t)st.st_size);
135880588Sobrien		goto error;
135980588Sobrien	}
1360103373Sobrien	if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
1361133359Sobrien		file_badread(ms);
136280588Sobrien		goto error;
136380588Sobrien	}
1364133359Sobrien#define RET	1
136580588Sobrien#endif
1366103373Sobrien	*magicp = mm;
136774784Sobrien	(void)close(fd);
136875937Sobrien	fd = -1;
1369133359Sobrien	ptr = (uint32_t *)(void *)*magicp;
137074784Sobrien	if (*ptr != MAGICNO) {
137174784Sobrien		if (swap4(*ptr) != MAGICNO) {
1372133359Sobrien			file_error(ms, 0, "bad magic in `%s'");
137374784Sobrien			goto error;
137474784Sobrien		}
137574784Sobrien		needsbyteswap = 1;
137674784Sobrien	} else
137774784Sobrien		needsbyteswap = 0;
137874784Sobrien	if (needsbyteswap)
137974784Sobrien		version = swap4(ptr[1]);
138074784Sobrien	else
138174784Sobrien		version = ptr[1];
138274784Sobrien	if (version != VERSIONNO) {
1383133359Sobrien		file_error(ms, 0, "version mismatch (%d != %d) in `%s'",
1384133359Sobrien		    version, VERSIONNO, dbname);
138574784Sobrien		goto error;
138674784Sobrien	}
1387133359Sobrien	*nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1;
138874784Sobrien	(*magicp)++;
138974784Sobrien	if (needsbyteswap)
139074784Sobrien		byteswap(*magicp, *nmagicp);
1391133359Sobrien	return RET;
139274784Sobrien
139374784Sobrienerror:
139474784Sobrien	if (fd != -1)
139574784Sobrien		(void)close(fd);
1396103373Sobrien	if (mm) {
139780588Sobrien#ifdef QUICK
1398133359Sobrien		(void)munmap((void *)mm, (size_t)st.st_size);
139980588Sobrien#else
1400103373Sobrien		free(mm);
140180588Sobrien#endif
140280588Sobrien	} else {
140374784Sobrien		*magicp = NULL;
140474784Sobrien		*nmagicp = 0;
140574784Sobrien	}
140674784Sobrien	return -1;
140774784Sobrien}
140874784Sobrien
1409133359Sobrienprivate const uint32_t ar[] = {
1410133359Sobrien    MAGICNO, VERSIONNO
1411133359Sobrien};
141274784Sobrien/*
141374784Sobrien * handle an mmaped file.
141474784Sobrien */
1415133359Sobrienprivate int
1416133359Sobrienapprentice_compile(struct magic_set *ms, struct magic **magicp,
1417133359Sobrien    uint32_t *nmagicp, const char *fn)
141874784Sobrien{
141974784Sobrien	int fd;
1420133359Sobrien	char buf[MAXPATHLEN];
1421139368Sobrien	char *dbname = mkdbname(fn, buf, sizeof(buf), 1);
142274784Sobrien
142380588Sobrien	if (dbname == NULL)
142480588Sobrien		return -1;
142580588Sobrien
1426159764Sobrien	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) {
1427133359Sobrien		file_error(ms, errno, "cannot open `%s'", dbname);
142874784Sobrien		return -1;
142974784Sobrien	}
143074784Sobrien
1431133359Sobrien	if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
1432133359Sobrien		file_error(ms, errno, "error writing `%s'", dbname);
143374784Sobrien		return -1;
143474784Sobrien	}
143574784Sobrien
1436133359Sobrien	if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
1437133359Sobrien	    != sizeof(struct magic)) {
1438133359Sobrien		file_error(ms, errno, "error seeking `%s'", dbname);
143974784Sobrien		return -1;
144074784Sobrien	}
144174784Sobrien
1442133359Sobrien	if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
1443133359Sobrien	    != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
1444133359Sobrien		file_error(ms, errno, "error writing `%s'", dbname);
144574784Sobrien		return -1;
144674784Sobrien	}
144774784Sobrien
144874784Sobrien	(void)close(fd);
144974784Sobrien	return 0;
145074784Sobrien}
145174784Sobrien
1452133359Sobrienprivate const char ext[] = ".mgc";
145374784Sobrien/*
145474784Sobrien * make a dbname
145574784Sobrien */
1456133359Sobrienprivate char *
1457139368Sobrienmkdbname(const char *fn, char *buf, size_t bufsiz, int strip)
145874784Sobrien{
1459139368Sobrien	if (strip) {
1460139368Sobrien		const char *p;
1461139368Sobrien		if ((p = strrchr(fn, '/')) != NULL)
1462139368Sobrien			fn = ++p;
1463139368Sobrien	}
1464139368Sobrien
1465133359Sobrien	(void)snprintf(buf, bufsiz, "%s%s", fn, ext);
146674784Sobrien	return buf;
146774784Sobrien}
146874784Sobrien
146974784Sobrien/*
147074784Sobrien * Byteswap an mmap'ed file if needed
147174784Sobrien */
1472133359Sobrienprivate void
1473103373Sobrienbyteswap(struct magic *magic, uint32_t nmagic)
147474784Sobrien{
1475103373Sobrien	uint32_t i;
147674784Sobrien	for (i = 0; i < nmagic; i++)
147774784Sobrien		bs1(&magic[i]);
147874784Sobrien}
147974784Sobrien
148074784Sobrien/*
148174784Sobrien * swap a short
148274784Sobrien */
1483133359Sobrienprivate uint16_t
1484103373Sobrienswap2(uint16_t sv)
148574784Sobrien{
1486103373Sobrien	uint16_t rv;
1487133359Sobrien	uint8_t *s = (uint8_t *)(void *)&sv;
1488133359Sobrien	uint8_t *d = (uint8_t *)(void *)&rv;
148974784Sobrien	d[0] = s[1];
149074784Sobrien	d[1] = s[0];
149174784Sobrien	return rv;
149274784Sobrien}
149374784Sobrien
149474784Sobrien/*
149574784Sobrien * swap an int
149674784Sobrien */
1497133359Sobrienprivate uint32_t
1498103373Sobrienswap4(uint32_t sv)
149974784Sobrien{
1500103373Sobrien	uint32_t rv;
1501133359Sobrien	uint8_t *s = (uint8_t *)(void *)&sv;
1502133359Sobrien	uint8_t *d = (uint8_t *)(void *)&rv;
150374784Sobrien	d[0] = s[3];
150474784Sobrien	d[1] = s[2];
150574784Sobrien	d[2] = s[1];
150674784Sobrien	d[3] = s[0];
150774784Sobrien	return rv;
150874784Sobrien}
150974784Sobrien
151074784Sobrien/*
1511169942Sobrien * swap a quad
1512169942Sobrien */
1513169942Sobrienprivate uint64_t
1514169942Sobrienswap8(uint64_t sv)
1515169942Sobrien{
1516169942Sobrien	uint32_t rv;
1517169942Sobrien	uint8_t *s = (uint8_t *)(void *)&sv;
1518169942Sobrien	uint8_t *d = (uint8_t *)(void *)&rv;
1519169942Sobrien	d[0] = s[3];
1520169942Sobrien	d[1] = s[2];
1521169942Sobrien	d[2] = s[1];
1522169942Sobrien	d[3] = s[0];
1523169942Sobrien	d[4] = s[7];
1524169942Sobrien	d[5] = s[6];
1525169942Sobrien	d[6] = s[5];
1526169942Sobrien	d[7] = s[4];
1527169942Sobrien	return rv;
1528169942Sobrien}
1529169942Sobrien
1530169942Sobrien/*
153174784Sobrien * byteswap a single magic entry
153274784Sobrien */
1533133359Sobrienprivate void
1534133359Sobrienbs1(struct magic *m)
153574784Sobrien{
153674784Sobrien	m->cont_level = swap2(m->cont_level);
1537133359Sobrien	m->offset = swap4((uint32_t)m->offset);
1538133359Sobrien	m->in_offset = swap4((uint32_t)m->in_offset);
1539159764Sobrien	if (!IS_STRING(m->type))
1540169942Sobrien		m->value.q = swap8(m->value.q);
1541169942Sobrien	m->mask = swap8(m->mask);
154274784Sobrien}
1543