apprentice.c revision 186691
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "file.h"
33#include "magic.h"
34#include "patchlevel.h"
35#include <stdlib.h>
36#ifdef HAVE_UNISTD_H
37#include <unistd.h>
38#endif
39#include <string.h>
40#include <assert.h>
41#include <ctype.h>
42#include <fcntl.h>
43#include <sys/stat.h>
44#include <sys/param.h>
45#ifdef QUICK
46#include <sys/mman.h>
47#endif
48#include <sys/types.h>
49#include <dirent.h>
50
51#ifndef	lint
52FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $")
53#endif	/* lint */
54
55#define	EATAB {while (isascii((unsigned char) *l) && \
56		      isspace((unsigned char) *l))  ++l;}
57#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
58			tolower((unsigned char) (l)) : (l))
59/*
60 * Work around a bug in headers on Digital Unix.
61 * At least confirmed for: OSF1 V4.0 878
62 */
63#if defined(__osf__) && defined(__DECC)
64#ifdef MAP_FAILED
65#undef MAP_FAILED
66#endif
67#endif
68
69#ifndef MAP_FAILED
70#define MAP_FAILED (void *) -1
71#endif
72
73#ifndef MAP_FILE
74#define MAP_FILE 0
75#endif
76
77#ifndef MAXPATHLEN
78#define MAXPATHLEN	1024
79#endif
80
81struct magic_entry {
82	struct magic *mp;
83	uint32_t cont_count;
84	uint32_t max_count;
85};
86
87int file_formats[FILE_NAMES_SIZE];
88const size_t file_nformats = FILE_NAMES_SIZE;
89const char *file_names[FILE_NAMES_SIZE];
90const size_t file_nnames = FILE_NAMES_SIZE;
91
92private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
93private int hextoint(int);
94private const char *getstr(struct magic_set *, const char *, char *, int,
95    int *, int);
96private int parse(struct magic_set *, struct magic_entry **, uint32_t *,
97    const char *, size_t, int);
98private void eatsize(const char **);
99private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
100private size_t apprentice_magic_strength(const struct magic *);
101private int apprentice_sort(const void *, const void *);
102private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
103    const char *, int);
104private void byteswap(struct magic *, uint32_t);
105private void bs1(struct magic *);
106private uint16_t swap2(uint16_t);
107private uint32_t swap4(uint32_t);
108private uint64_t swap8(uint64_t);
109private void mkdbname(const char *, char **, int);
110private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
111    const char *);
112private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
113    const char *);
114private int check_format_type(const char *, int);
115private int check_format(struct magic_set *, struct magic *);
116private int get_op(char);
117private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
118private int parse_strength(struct magic_set *, struct magic_entry *,
119    const char *);
120
121
122private size_t maxmagic = 0;
123private size_t magicsize = sizeof(struct magic);
124
125private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
126
127private struct {
128	const char *name;
129	size_t len;
130	int (*fun)(struct magic_set *, struct magic_entry *, const char *);
131} bang[] = {
132#define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
133	DECLARE_FIELD(mime),
134	DECLARE_FIELD(strength),
135#undef	DECLARE_FIELD
136	{ NULL, 0, NULL }
137};
138
139#ifdef COMPILE_ONLY
140
141int main(int, char *[]);
142
143int
144main(int argc, char *argv[])
145{
146	int ret;
147	struct magic_set *ms;
148	char *progname;
149
150	if ((progname = strrchr(argv[0], '/')) != NULL)
151		progname++;
152	else
153		progname = argv[0];
154
155	if (argc != 2) {
156		(void)fprintf(stderr, "Usage: %s file\n", progname);
157		return 1;
158	}
159
160	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
161		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
162		return 1;
163	}
164	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
165	if (ret == 1)
166		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
167	magic_close(ms);
168	return ret;
169}
170#endif /* COMPILE_ONLY */
171
172static const struct type_tbl_s {
173	const char name[16];
174	const size_t len;
175	const int type;
176	const int format;
177} type_tbl[] = {
178# define XX(s)		s, (sizeof(s) - 1)
179# define XX_NULL	"", 0
180	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
181	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
182	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_STR },
183	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
184	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
185	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
186	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
187	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
188	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
189	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
190	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
191	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
192	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
193	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
194	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
195	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
196	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
197	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
198	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
199	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
200	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
201	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
202	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
203	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
204	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
205	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
206	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
207	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
208	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
209	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
210	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
211	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
212	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
213	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
214	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
215	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
216	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
217	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
218	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
219# undef XX
220# undef XX_NULL
221};
222
223private int
224get_type(const char *l, const char **t)
225{
226	const struct type_tbl_s *p;
227
228	for (p = type_tbl; p->len; p++) {
229		if (strncmp(l, p->name, p->len) == 0) {
230			if (t)
231				*t = l + p->len;
232			break;
233		}
234	}
235	return p->type;
236}
237
238private void
239init_file_tables(void)
240{
241	static int done = 0;
242	const struct type_tbl_s *p;
243
244	if (done)
245		return;
246	done++;
247
248	for (p = type_tbl; p->len; p++) {
249		assert(p->type < FILE_NAMES_SIZE);
250		file_names[p->type] = p->name;
251		file_formats[p->type] = p->format;
252	}
253}
254
255/*
256 * Handle one file or directory.
257 */
258private int
259apprentice_1(struct magic_set *ms, const char *fn, int action,
260    struct mlist *mlist)
261{
262	struct magic *magic = NULL;
263	uint32_t nmagic = 0;
264	struct mlist *ml;
265	int rv = -1;
266	int mapped;
267
268	if (magicsize != FILE_MAGICSIZE) {
269		file_error(ms, 0, "magic element size %lu != %lu",
270		    (unsigned long)sizeof(*magic),
271		    (unsigned long)FILE_MAGICSIZE);
272		return -1;
273	}
274
275	if (action == FILE_COMPILE) {
276		rv = apprentice_load(ms, &magic, &nmagic, fn, action);
277		if (rv != 0)
278			return -1;
279		rv = apprentice_compile(ms, &magic, &nmagic, fn);
280		free(magic);
281		return rv;
282	}
283
284#ifndef COMPILE_ONLY
285	if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
286		if (ms->flags & MAGIC_CHECK)
287			file_magwarn(ms, "using regular magic file `%s'", fn);
288		rv = apprentice_load(ms, &magic, &nmagic, fn, action);
289		if (rv != 0)
290			return -1;
291	}
292
293	mapped = rv;
294
295	if (magic == NULL) {
296		file_delmagic(magic, mapped, nmagic);
297		return -1;
298	}
299
300	if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) {
301		file_delmagic(magic, mapped, nmagic);
302		file_oomem(ms, sizeof(*ml));
303		return -1;
304	}
305
306	ml->magic = magic;
307	ml->nmagic = nmagic;
308	ml->mapped = mapped;
309
310	mlist->prev->next = ml;
311	ml->prev = mlist->prev;
312	ml->next = mlist;
313	mlist->prev = ml;
314
315	return 0;
316#endif /* COMPILE_ONLY */
317}
318
319protected void
320file_delmagic(struct magic *p, int type, size_t entries)
321{
322	if (p == NULL)
323		return;
324	switch (type) {
325#ifdef QUICK
326	case 2:
327		p--;
328		(void)munmap((void *)p, sizeof(*p) * (entries + 1));
329		break;
330#endif
331	case 1:
332		p--;
333		/*FALLTHROUGH*/
334	case 0:
335		free(p);
336		break;
337	default:
338		abort();
339	}
340}
341
342/* const char *fn: list of magic files and directories */
343protected struct mlist *
344file_apprentice(struct magic_set *ms, const char *fn, int action)
345{
346	char *p, *mfn;
347	int file_err, errs = -1;
348	struct mlist *mlist;
349
350	init_file_tables();
351
352	if (fn == NULL)
353		fn = getenv("MAGIC");
354	if (fn == NULL)
355		fn = MAGIC;
356
357	if ((mfn = strdup(fn)) == NULL) {
358		file_oomem(ms, strlen(fn));
359		return NULL;
360	}
361	fn = mfn;
362
363	if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) {
364		free(mfn);
365		file_oomem(ms, sizeof(*mlist));
366		return NULL;
367	}
368	mlist->next = mlist->prev = mlist;
369
370	while (fn) {
371		p = strchr(fn, PATHSEP);
372		if (p)
373			*p++ = '\0';
374		if (*fn == '\0')
375			break;
376		file_err = apprentice_1(ms, fn, action, mlist);
377		errs = MAX(errs, file_err);
378		fn = p;
379	}
380	if (errs == -1) {
381		free(mfn);
382		free(mlist);
383		mlist = NULL;
384		file_error(ms, 0, "could not find any magic files!");
385		return NULL;
386	}
387	free(mfn);
388	return mlist;
389}
390
391/*
392 * Get weight of this magic entry, for sorting purposes.
393 */
394private size_t
395apprentice_magic_strength(const struct magic *m)
396{
397#define MULT 10
398	size_t val = 2 * MULT;	/* baseline strength */
399
400	switch (m->type) {
401	case FILE_DEFAULT:	/* make sure this sorts last */
402		if (m->factor_op != FILE_FACTOR_OP_NONE)
403			abort();
404		return 0;
405
406	case FILE_BYTE:
407		val += 1 * MULT;
408		break;
409
410	case FILE_SHORT:
411	case FILE_LESHORT:
412	case FILE_BESHORT:
413		val += 2 * MULT;
414		break;
415
416	case FILE_LONG:
417	case FILE_LELONG:
418	case FILE_BELONG:
419	case FILE_MELONG:
420		val += 4 * MULT;
421		break;
422
423	case FILE_PSTRING:
424	case FILE_STRING:
425		val += m->vallen * MULT;
426		break;
427
428	case FILE_BESTRING16:
429	case FILE_LESTRING16:
430		val += m->vallen * MULT / 2;
431		break;
432
433	case FILE_SEARCH:
434	case FILE_REGEX:
435		val += m->vallen * MAX(MULT / m->vallen, 1);
436		break;
437
438	case FILE_DATE:
439	case FILE_LEDATE:
440	case FILE_BEDATE:
441	case FILE_MEDATE:
442	case FILE_LDATE:
443	case FILE_LELDATE:
444	case FILE_BELDATE:
445	case FILE_MELDATE:
446	case FILE_FLOAT:
447	case FILE_BEFLOAT:
448	case FILE_LEFLOAT:
449		val += 4 * MULT;
450		break;
451
452	case FILE_QUAD:
453	case FILE_BEQUAD:
454	case FILE_LEQUAD:
455	case FILE_QDATE:
456	case FILE_LEQDATE:
457	case FILE_BEQDATE:
458	case FILE_QLDATE:
459	case FILE_LEQLDATE:
460	case FILE_BEQLDATE:
461	case FILE_DOUBLE:
462	case FILE_BEDOUBLE:
463	case FILE_LEDOUBLE:
464		val += 8 * MULT;
465		break;
466
467	default:
468		val = 0;
469		(void)fprintf(stderr, "Bad type %d\n", m->type);
470		abort();
471	}
472
473	switch (m->reln) {
474	case 'x':	/* matches anything penalize */
475	case '!':       /* matches almost anything penalize */
476		val = 0;
477		break;
478
479	case '=':	/* Exact match, prefer */
480		val += MULT;
481		break;
482
483	case '>':
484	case '<':	/* comparison match reduce strength */
485		val -= 2 * MULT;
486		break;
487
488	case '^':
489	case '&':	/* masking bits, we could count them too */
490		val -= MULT;
491		break;
492
493	default:
494		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
495		abort();
496	}
497
498	if (val == 0)	/* ensure we only return 0 for FILE_DEFAULT */
499		val = 1;
500
501	switch (m->factor_op) {
502	case FILE_FACTOR_OP_NONE:
503		break;
504	case FILE_FACTOR_OP_PLUS:
505		val += m->factor;
506		break;
507	case FILE_FACTOR_OP_MINUS:
508		val -= m->factor;
509		break;
510	case FILE_FACTOR_OP_TIMES:
511		val *= m->factor;
512		break;
513	case FILE_FACTOR_OP_DIV:
514		val /= m->factor;
515		break;
516	default:
517		abort();
518	}
519
520	/*
521	 * Magic entries with no description get a bonus because they depend
522	 * on subsequent magic entries to print something.
523	 */
524	if (m->desc[0] == '\0')
525		val++;
526	return val;
527}
528
529/*
530 * Sort callback for sorting entries by "strength" (basically length)
531 */
532private int
533apprentice_sort(const void *a, const void *b)
534{
535	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
536	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
537	size_t sa = apprentice_magic_strength(ma->mp);
538	size_t sb = apprentice_magic_strength(mb->mp);
539	if (sa == sb)
540		return 0;
541	else if (sa > sb)
542		return -1;
543	else
544		return 1;
545}
546
547private void
548set_test_type(struct magic *mstart, struct magic *m)
549{
550	switch (m->type) {
551	case FILE_BYTE:
552	case FILE_SHORT:
553	case FILE_LONG:
554	case FILE_DATE:
555	case FILE_BESHORT:
556	case FILE_BELONG:
557	case FILE_BEDATE:
558	case FILE_LESHORT:
559	case FILE_LELONG:
560	case FILE_LEDATE:
561	case FILE_LDATE:
562	case FILE_BELDATE:
563	case FILE_LELDATE:
564	case FILE_MEDATE:
565	case FILE_MELDATE:
566	case FILE_MELONG:
567	case FILE_QUAD:
568	case FILE_LEQUAD:
569	case FILE_BEQUAD:
570	case FILE_QDATE:
571	case FILE_LEQDATE:
572	case FILE_BEQDATE:
573	case FILE_QLDATE:
574	case FILE_LEQLDATE:
575	case FILE_BEQLDATE:
576	case FILE_FLOAT:
577	case FILE_BEFLOAT:
578	case FILE_LEFLOAT:
579	case FILE_DOUBLE:
580	case FILE_BEDOUBLE:
581	case FILE_LEDOUBLE:
582	case FILE_STRING:
583	case FILE_PSTRING:
584	case FILE_BESTRING16:
585	case FILE_LESTRING16:
586		/* binary test, set flag */
587		mstart->flag |= BINTEST;
588		break;
589	case FILE_REGEX:
590	case FILE_SEARCH:
591		/* binary test if pattern is not text */
592		if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0)
593			mstart->flag |= BINTEST;
594		break;
595	case FILE_DEFAULT:
596		/* can't deduce anything; we shouldn't see this at the
597		   top level anyway */
598		break;
599	case FILE_INVALID:
600	default:
601		/* invalid search type, but no need to complain here */
602		break;
603	}
604}
605
606/*
607 * Load and parse one file.
608 */
609private void
610load_1(struct magic_set *ms, int action, const char *fn, int *errs,
611   struct magic_entry **marray, uint32_t *marraycount)
612{
613	char line[BUFSIZ];
614	size_t lineno = 0;
615	FILE *f = fopen(ms->file = fn, "r");
616	if (f == NULL) {
617		if (errno != ENOENT)
618			file_error(ms, errno, "cannot read magic file `%s'",
619				   fn);
620		(*errs)++;
621	} else {
622		/* read and parse this file */
623		for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) {
624			size_t len;
625			len = strlen(line);
626			if (len == 0) /* null line, garbage, etc */
627				continue;
628			if (line[len - 1] == '\n') {
629				lineno++;
630				line[len - 1] = '\0'; /* delete newline */
631			}
632			if (line[0] == '\0')	/* empty, do not parse */
633				continue;
634			if (line[0] == '#')	/* comment, do not parse */
635				continue;
636			if (line[0] == '!' && line[1] == ':') {
637				size_t i;
638
639				for (i = 0; bang[i].name != NULL; i++) {
640					if (len - 2 > bang[i].len &&
641					    memcmp(bang[i].name, line + 2,
642					    bang[i].len) == 0)
643						break;
644				}
645				if (bang[i].name == NULL) {
646					file_error(ms, 0,
647					    "Unknown !: entry `%s'", line);
648					(*errs)++;
649					continue;
650				}
651				if (*marraycount == 0) {
652					file_error(ms, 0,
653					    "No current entry for :!%s type",
654						bang[i].name);
655					(*errs)++;
656					continue;
657				}
658				if ((*bang[i].fun)(ms,
659				    &(*marray)[*marraycount - 1],
660				    line + bang[i].len + 2) != 0) {
661					(*errs)++;
662					continue;
663				}
664				continue;
665			}
666			if (parse(ms, marray, marraycount, line, lineno,
667			    action) != 0)
668				(*errs)++;
669		}
670
671		(void)fclose(f);
672	}
673}
674
675/*
676 * parse a file or directory of files
677 * const char *fn: name of magic file or directory
678 */
679private int
680apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
681    const char *fn, int action)
682{
683	int errs = 0;
684	struct magic_entry *marray;
685	uint32_t marraycount, i, mentrycount = 0, starttest;
686	size_t slen;
687	char subfn[MAXPATHLEN];
688	struct stat st;
689	DIR *dir;
690	struct dirent *d;
691
692	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
693
694        maxmagic = MAXMAGIS;
695	if ((marray = CAST(struct magic_entry *, calloc(maxmagic,
696	    sizeof(*marray)))) == NULL) {
697		file_oomem(ms, maxmagic * sizeof(*marray));
698		return -1;
699	}
700	marraycount = 0;
701
702	/* print silly verbose header for USG compat. */
703	if (action == FILE_CHECK)
704		(void)fprintf(stderr, "%s\n", usg_hdr);
705
706	/* load directory or file */
707	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
708		dir = opendir(fn);
709		if (dir) {
710			while ((d = readdir(dir)) != NULL) {
711				snprintf(subfn, sizeof(subfn), "%s/%s",
712				    fn, d->d_name);
713				if (stat(subfn, &st) == 0 &&
714				    S_ISREG(st.st_mode)) {
715					load_1(ms, action, subfn, &errs,
716					    &marray, &marraycount);
717				}
718			}
719			closedir(dir);
720		} else
721			errs++;
722	} else
723		load_1(ms, action, fn, &errs, &marray, &marraycount);
724	if (errs)
725		goto out;
726
727	/* Set types of tests */
728	for (i = 0; i < marraycount; ) {
729		if (marray[i].mp->cont_level != 0) {
730			i++;
731			continue;
732		}
733
734		starttest = i;
735		do {
736			static const char text[] = "text";
737			static const char binary[] = "binary";
738			static const size_t len = sizeof(text);
739			set_test_type(marray[starttest].mp, marray[i].mp);
740			if ((ms->flags & MAGIC_DEBUG) == 0)
741				continue;
742			(void)fprintf(stderr, "%s%s%s: %s\n",
743			    marray[i].mp->mimetype,
744			    marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
745			    marray[i].mp->desc[0] ? marray[i].mp->desc :
746			    "(no description)",
747			    marray[i].mp->flag & BINTEST ? binary : text);
748			if (marray[i].mp->flag & BINTEST) {
749				char *p = strstr(marray[i].mp->desc, text);
750				if (p && (p == marray[i].mp->desc ||
751				    isspace((unsigned char)p[-1])) &&
752				    (p + len - marray[i].mp->desc ==
753				    MAXstring || (p[len] == '\0' ||
754				    isspace((unsigned char)p[len]))))
755					(void)fprintf(stderr, "*** Possible "
756					    "binary test for text type\n");
757			}
758		} while (++i < marraycount && marray[i].mp->cont_level != 0);
759	}
760
761	qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
762
763	/*
764	 * Make sure that any level 0 "default" line is last (if one exists).
765	 */
766	for (i = 0; i < marraycount; i++) {
767		if (marray[i].mp->cont_level == 0 &&
768		    marray[i].mp->type == FILE_DEFAULT) {
769			while (++i < marraycount)
770				if (marray[i].mp->cont_level == 0)
771					break;
772			if (i != marraycount) {
773				ms->line = marray[i].mp->lineno; /* XXX - Ugh! */
774				file_magwarn(ms,
775				    "level 0 \"default\" did not sort last");
776			}
777			break;
778		}
779	}
780
781	for (i = 0; i < marraycount; i++)
782		mentrycount += marray[i].cont_count;
783
784	slen = sizeof(**magicp) * mentrycount;
785	if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) {
786		file_oomem(ms, slen);
787		errs++;
788		goto out;
789	}
790
791	mentrycount = 0;
792	for (i = 0; i < marraycount; i++) {
793		(void)memcpy(*magicp + mentrycount, marray[i].mp,
794		    marray[i].cont_count * sizeof(**magicp));
795		mentrycount += marray[i].cont_count;
796	}
797out:
798	for (i = 0; i < marraycount; i++)
799		free(marray[i].mp);
800	free(marray);
801	if (errs) {
802		*magicp = NULL;
803		*nmagicp = 0;
804		return errs;
805	} else {
806		*nmagicp = mentrycount;
807		return 0;
808	}
809
810}
811
812/*
813 * extend the sign bit if the comparison is to be signed
814 */
815protected uint64_t
816file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
817{
818	if (!(m->flag & UNSIGNED)) {
819		switch(m->type) {
820		/*
821		 * Do not remove the casts below.  They are
822		 * vital.  When later compared with the data,
823		 * the sign extension must have happened.
824		 */
825		case FILE_BYTE:
826			v = (char) v;
827			break;
828		case FILE_SHORT:
829		case FILE_BESHORT:
830		case FILE_LESHORT:
831			v = (short) v;
832			break;
833		case FILE_DATE:
834		case FILE_BEDATE:
835		case FILE_LEDATE:
836		case FILE_MEDATE:
837		case FILE_LDATE:
838		case FILE_BELDATE:
839		case FILE_LELDATE:
840		case FILE_MELDATE:
841		case FILE_LONG:
842		case FILE_BELONG:
843		case FILE_LELONG:
844		case FILE_MELONG:
845		case FILE_FLOAT:
846		case FILE_BEFLOAT:
847		case FILE_LEFLOAT:
848			v = (int32_t) v;
849			break;
850		case FILE_QUAD:
851		case FILE_BEQUAD:
852		case FILE_LEQUAD:
853		case FILE_QDATE:
854		case FILE_QLDATE:
855		case FILE_BEQDATE:
856		case FILE_BEQLDATE:
857		case FILE_LEQDATE:
858		case FILE_LEQLDATE:
859		case FILE_DOUBLE:
860		case FILE_BEDOUBLE:
861		case FILE_LEDOUBLE:
862			v = (int64_t) v;
863			break;
864		case FILE_STRING:
865		case FILE_PSTRING:
866		case FILE_BESTRING16:
867		case FILE_LESTRING16:
868		case FILE_REGEX:
869		case FILE_SEARCH:
870		case FILE_DEFAULT:
871			break;
872		default:
873			if (ms->flags & MAGIC_CHECK)
874			    file_magwarn(ms, "cannot happen: m->type=%d\n",
875				    m->type);
876			return ~0U;
877		}
878	}
879	return v;
880}
881
882private int
883string_modifier_check(struct magic_set *ms, struct magic *m)
884{
885	if ((ms->flags & MAGIC_CHECK) == 0)
886		return 0;
887
888	switch (m->type) {
889	case FILE_BESTRING16:
890	case FILE_LESTRING16:
891		if (m->str_flags != 0) {
892			file_magwarn(ms,
893			    "no modifiers allowed for 16-bit strings\n");
894			return -1;
895		}
896		break;
897	case FILE_STRING:
898	case FILE_PSTRING:
899		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
900			file_magwarn(ms,
901			    "'/%c' only allowed on regex and search\n",
902			    CHAR_REGEX_OFFSET_START);
903			return -1;
904		}
905		break;
906	case FILE_SEARCH:
907		if (m->str_range == 0) {
908			file_magwarn(ms,
909			    "missing range; defaulting to %d\n",
910                            STRING_DEFAULT_RANGE);
911			m->str_range = STRING_DEFAULT_RANGE;
912			return -1;
913		}
914		break;
915	case FILE_REGEX:
916		if ((m->str_flags & STRING_COMPACT_BLANK) != 0) {
917			file_magwarn(ms, "'/%c' not allowed on regex\n",
918			    CHAR_COMPACT_BLANK);
919			return -1;
920		}
921		if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) {
922			file_magwarn(ms, "'/%c' not allowed on regex\n",
923			    CHAR_COMPACT_OPTIONAL_BLANK);
924			return -1;
925		}
926		break;
927	default:
928		file_magwarn(ms, "coding error: m->type=%d\n",
929		    m->type);
930		return -1;
931	}
932	return 0;
933}
934
935private int
936get_op(char c)
937{
938	switch (c) {
939	case '&':
940		return FILE_OPAND;
941	case '|':
942		return FILE_OPOR;
943	case '^':
944		return FILE_OPXOR;
945	case '+':
946		return FILE_OPADD;
947	case '-':
948		return FILE_OPMINUS;
949	case '*':
950		return FILE_OPMULTIPLY;
951	case '/':
952		return FILE_OPDIVIDE;
953	case '%':
954		return FILE_OPMODULO;
955	default:
956		return -1;
957	}
958}
959
960#ifdef ENABLE_CONDITIONALS
961private int
962get_cond(const char *l, const char **t)
963{
964	static const struct cond_tbl_s {
965		char name[8];
966		size_t len;
967		int cond;
968	} cond_tbl[] = {
969		{ "if",		2,	COND_IF },
970		{ "elif",	4,	COND_ELIF },
971		{ "else",	4,	COND_ELSE },
972		{ "",		0,	COND_NONE },
973	};
974	const struct cond_tbl_s *p;
975
976	for (p = cond_tbl; p->len; p++) {
977		if (strncmp(l, p->name, p->len) == 0 &&
978		    isspace((unsigned char)l[p->len])) {
979			if (t)
980				*t = l + p->len;
981			break;
982		}
983	}
984	return p->cond;
985}
986
987private int
988check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
989{
990	int last_cond;
991	last_cond = ms->c.li[cont_level].last_cond;
992
993	switch (cond) {
994	case COND_IF:
995		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
996			if (ms->flags & MAGIC_CHECK)
997				file_magwarn(ms, "syntax error: `if'");
998			return -1;
999		}
1000		last_cond = COND_IF;
1001		break;
1002
1003	case COND_ELIF:
1004		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1005			if (ms->flags & MAGIC_CHECK)
1006				file_magwarn(ms, "syntax error: `elif'");
1007			return -1;
1008		}
1009		last_cond = COND_ELIF;
1010		break;
1011
1012	case COND_ELSE:
1013		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1014			if (ms->flags & MAGIC_CHECK)
1015				file_magwarn(ms, "syntax error: `else'");
1016			return -1;
1017		}
1018		last_cond = COND_NONE;
1019		break;
1020
1021	case COND_NONE:
1022		last_cond = COND_NONE;
1023		break;
1024	}
1025
1026	ms->c.li[cont_level].last_cond = last_cond;
1027	return 0;
1028}
1029#endif /* ENABLE_CONDITIONALS */
1030
1031/*
1032 * parse one line from magic file, put into magic[index++] if valid
1033 */
1034private int
1035parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
1036    const char *line, size_t lineno, int action)
1037{
1038#ifdef ENABLE_CONDITIONALS
1039	static uint32_t last_cont_level = 0;
1040#endif
1041	size_t i;
1042	struct magic_entry *me;
1043	struct magic *m;
1044	const char *l = line;
1045	char *t;
1046	int op;
1047	uint32_t cont_level;
1048
1049	cont_level = 0;
1050
1051	while (*l == '>') {
1052		++l;		/* step over */
1053		cont_level++;
1054	}
1055#ifdef ENABLE_CONDITIONALS
1056	if (cont_level == 0 || cont_level > last_cont_level)
1057		if (file_check_mem(ms, cont_level) == -1)
1058			return -1;
1059	last_cont_level = cont_level;
1060#endif
1061
1062#define ALLOC_CHUNK	(size_t)10
1063#define ALLOC_INCR	(size_t)200
1064
1065	if (cont_level != 0) {
1066		if (*nmentryp == 0) {
1067			file_error(ms, 0, "No current entry for continuation");
1068			return -1;
1069		}
1070		me = &(*mentryp)[*nmentryp - 1];
1071		if (me->cont_count == me->max_count) {
1072			struct magic *nm;
1073			size_t cnt = me->max_count + ALLOC_CHUNK;
1074			if ((nm = CAST(struct magic *, realloc(me->mp,
1075			    sizeof(*nm) * cnt))) == NULL) {
1076				file_oomem(ms, sizeof(*nm) * cnt);
1077				return -1;
1078			}
1079			me->mp = m = nm;
1080			me->max_count = cnt;
1081		}
1082		m = &me->mp[me->cont_count++];
1083		(void)memset(m, 0, sizeof(*m));
1084		m->cont_level = cont_level;
1085	} else {
1086		if (*nmentryp == maxmagic) {
1087			struct magic_entry *mp;
1088
1089			maxmagic += ALLOC_INCR;
1090			if ((mp = CAST(struct magic_entry *,
1091			    realloc(*mentryp, sizeof(*mp) * maxmagic))) ==
1092			    NULL) {
1093				file_oomem(ms, sizeof(*mp) * maxmagic);
1094				return -1;
1095			}
1096			(void)memset(&mp[*nmentryp], 0, sizeof(*mp) *
1097			    ALLOC_INCR);
1098			*mentryp = mp;
1099		}
1100		me = &(*mentryp)[*nmentryp];
1101		if (me->mp == NULL) {
1102			size_t len = sizeof(*m) * ALLOC_CHUNK;
1103			if ((m = CAST(struct magic *, malloc(len))) == NULL) {
1104				file_oomem(ms, len);
1105				return -1;
1106			}
1107			me->mp = m;
1108			me->max_count = ALLOC_CHUNK;
1109		} else
1110			m = me->mp;
1111		(void)memset(m, 0, sizeof(*m));
1112		m->factor_op = FILE_FACTOR_OP_NONE;
1113		m->cont_level = 0;
1114		me->cont_count = 1;
1115	}
1116	m->lineno = lineno;
1117
1118	if (*l == '&') {  /* m->cont_level == 0 checked below. */
1119                ++l;            /* step over */
1120                m->flag |= OFFADD;
1121        }
1122	if (*l == '(') {
1123		++l;		/* step over */
1124		m->flag |= INDIR;
1125		if (m->flag & OFFADD)
1126			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
1127
1128		if (*l == '&') {  /* m->cont_level == 0 checked below */
1129			++l;            /* step over */
1130			m->flag |= OFFADD;
1131		}
1132	}
1133	/* Indirect offsets are not valid at level 0. */
1134	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
1135		if (ms->flags & MAGIC_CHECK)
1136			file_magwarn(ms, "relative offset at level 0");
1137
1138	/* get offset, then skip over it */
1139	m->offset = (uint32_t)strtoul(l, &t, 0);
1140        if (l == t)
1141		if (ms->flags & MAGIC_CHECK)
1142			file_magwarn(ms, "offset `%s' invalid", l);
1143        l = t;
1144
1145	if (m->flag & INDIR) {
1146		m->in_type = FILE_LONG;
1147		m->in_offset = 0;
1148		/*
1149		 * read [.lbs][+-]nnnnn)
1150		 */
1151		if (*l == '.') {
1152			l++;
1153			switch (*l) {
1154			case 'l':
1155				m->in_type = FILE_LELONG;
1156				break;
1157			case 'L':
1158				m->in_type = FILE_BELONG;
1159				break;
1160			case 'm':
1161				m->in_type = FILE_MELONG;
1162				break;
1163			case 'h':
1164			case 's':
1165				m->in_type = FILE_LESHORT;
1166				break;
1167			case 'H':
1168			case 'S':
1169				m->in_type = FILE_BESHORT;
1170				break;
1171			case 'c':
1172			case 'b':
1173			case 'C':
1174			case 'B':
1175				m->in_type = FILE_BYTE;
1176				break;
1177			case 'e':
1178			case 'f':
1179			case 'g':
1180				m->in_type = FILE_LEDOUBLE;
1181				break;
1182			case 'E':
1183			case 'F':
1184			case 'G':
1185				m->in_type = FILE_BEDOUBLE;
1186				break;
1187			default:
1188				if (ms->flags & MAGIC_CHECK)
1189					file_magwarn(ms,
1190					    "indirect offset type `%c' invalid",
1191					    *l);
1192				break;
1193			}
1194			l++;
1195		}
1196
1197		m->in_op = 0;
1198		if (*l == '~') {
1199			m->in_op |= FILE_OPINVERSE;
1200			l++;
1201		}
1202		if ((op = get_op(*l)) != -1) {
1203			m->in_op |= op;
1204			l++;
1205		}
1206		if (*l == '(') {
1207			m->in_op |= FILE_OPINDIRECT;
1208			l++;
1209		}
1210		if (isdigit((unsigned char)*l) || *l == '-') {
1211			m->in_offset = (int32_t)strtol(l, &t, 0);
1212			if (l == t)
1213				if (ms->flags & MAGIC_CHECK)
1214					file_magwarn(ms,
1215					    "in_offset `%s' invalid", l);
1216			l = t;
1217		}
1218		if (*l++ != ')' ||
1219		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
1220			if (ms->flags & MAGIC_CHECK)
1221				file_magwarn(ms,
1222				    "missing ')' in indirect offset");
1223	}
1224	EATAB;
1225
1226#ifdef ENABLE_CONDITIONALS
1227	m->cond = get_cond(l, &l);
1228	if (check_cond(ms, m->cond, cont_level) == -1)
1229		return -1;
1230
1231	EATAB;
1232#endif
1233
1234	if (*l == 'u') {
1235		++l;
1236		m->flag |= UNSIGNED;
1237	}
1238
1239	m->type = get_type(l, &l);
1240	if (m->type == FILE_INVALID) {
1241		if (ms->flags & MAGIC_CHECK)
1242			file_magwarn(ms, "type `%s' invalid", l);
1243		return -1;
1244	}
1245
1246	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1247	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
1248
1249	m->mask_op = 0;
1250	if (*l == '~') {
1251		if (!IS_STRING(m->type))
1252			m->mask_op |= FILE_OPINVERSE;
1253		else if (ms->flags & MAGIC_CHECK)
1254			file_magwarn(ms, "'~' invalid for string types");
1255		++l;
1256	}
1257	m->str_range = 0;
1258	m->str_flags = 0;
1259	m->num_mask = 0;
1260	if ((op = get_op(*l)) != -1) {
1261		if (!IS_STRING(m->type)) {
1262			uint64_t val;
1263			++l;
1264			m->mask_op |= op;
1265			val = (uint64_t)strtoull(l, &t, 0);
1266			l = t;
1267			m->num_mask = file_signextend(ms, m, val);
1268			eatsize(&l);
1269		}
1270		else if (op == FILE_OPDIVIDE) {
1271			int have_range = 0;
1272			while (!isspace((unsigned char)*++l)) {
1273				switch (*l) {
1274				case '0':  case '1':  case '2':
1275				case '3':  case '4':  case '5':
1276				case '6':  case '7':  case '8':
1277				case '9':
1278					if (have_range &&
1279					    (ms->flags & MAGIC_CHECK))
1280						file_magwarn(ms,
1281						    "multiple ranges");
1282					have_range = 1;
1283					m->str_range = strtoul(l, &t, 0);
1284					if (m->str_range == 0)
1285						file_magwarn(ms,
1286						    "zero range");
1287					l = t - 1;
1288					break;
1289				case CHAR_COMPACT_BLANK:
1290					m->str_flags |= STRING_COMPACT_BLANK;
1291					break;
1292				case CHAR_COMPACT_OPTIONAL_BLANK:
1293					m->str_flags |=
1294					    STRING_COMPACT_OPTIONAL_BLANK;
1295					break;
1296				case CHAR_IGNORE_LOWERCASE:
1297					m->str_flags |= STRING_IGNORE_LOWERCASE;
1298					break;
1299				case CHAR_IGNORE_UPPERCASE:
1300					m->str_flags |= STRING_IGNORE_UPPERCASE;
1301					break;
1302				case CHAR_REGEX_OFFSET_START:
1303					m->str_flags |= REGEX_OFFSET_START;
1304					break;
1305				default:
1306					if (ms->flags & MAGIC_CHECK)
1307						file_magwarn(ms,
1308						"string extension `%c' invalid",
1309						*l);
1310					return -1;
1311				}
1312				/* allow multiple '/' for readability */
1313				if (l[1] == '/' &&
1314				    !isspace((unsigned char)l[2]))
1315					l++;
1316			}
1317			if (string_modifier_check(ms, m) == -1)
1318				return -1;
1319		}
1320		else {
1321			if (ms->flags & MAGIC_CHECK)
1322				file_magwarn(ms, "invalid string op: %c", *t);
1323			return -1;
1324		}
1325	}
1326	/*
1327	 * We used to set mask to all 1's here, instead let's just not do
1328	 * anything if mask = 0 (unless you have a better idea)
1329	 */
1330	EATAB;
1331
1332	switch (*l) {
1333	case '>':
1334	case '<':
1335  		m->reln = *l;
1336  		++l;
1337		if (*l == '=') {
1338			if (ms->flags & MAGIC_CHECK) {
1339				file_magwarn(ms, "%c= not supported",
1340				    m->reln);
1341				return -1;
1342			}
1343		   ++l;
1344		}
1345		break;
1346	/* Old-style anding: "0 byte &0x80 dynamically linked" */
1347	case '&':
1348	case '^':
1349	case '=':
1350  		m->reln = *l;
1351  		++l;
1352		if (*l == '=') {
1353		   /* HP compat: ignore &= etc. */
1354		   ++l;
1355		}
1356		break;
1357	case '!':
1358		m->reln = *l;
1359		++l;
1360		break;
1361	default:
1362  		m->reln = '=';	/* the default relation */
1363		if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
1364		    isspace((unsigned char)l[1])) || !l[1])) {
1365			m->reln = *l;
1366			++l;
1367		}
1368		break;
1369	}
1370	/*
1371	 * Grab the value part, except for an 'x' reln.
1372	 */
1373	if (m->reln != 'x' && getvalue(ms, m, &l, action))
1374		return -1;
1375
1376	/*
1377	 * TODO finish this macro and start using it!
1378	 * #define offsetcheck {if (offset > HOWMANY-1)
1379	 *	magwarn("offset too big"); }
1380	 */
1381
1382	/*
1383	 * Now get last part - the description
1384	 */
1385	EATAB;
1386	if (l[0] == '\b') {
1387		++l;
1388		m->flag |= NOSPACE;
1389	} else if ((l[0] == '\\') && (l[1] == 'b')) {
1390		++l;
1391		++l;
1392		m->flag |= NOSPACE;
1393	}
1394	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
1395		continue;
1396	if (i == sizeof(m->desc)) {
1397		m->desc[sizeof(m->desc) - 1] = '\0';
1398		if (ms->flags & MAGIC_CHECK)
1399			file_magwarn(ms, "description `%s' truncated", m->desc);
1400	}
1401
1402        /*
1403	 * We only do this check while compiling, or if any of the magic
1404	 * files were not compiled.
1405         */
1406        if (ms->flags & MAGIC_CHECK) {
1407		if (check_format(ms, m) == -1)
1408			return -1;
1409	}
1410#ifndef COMPILE_ONLY
1411	if (action == FILE_CHECK) {
1412		file_mdump(m);
1413	}
1414#endif
1415	m->mimetype[0] = '\0';		/* initialise MIME type to none */
1416	if (m->cont_level == 0)
1417		++(*nmentryp);		/* make room for next */
1418	return 0;
1419}
1420
1421/*
1422 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
1423 * if valid
1424 */
1425private int
1426parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
1427{
1428	const char *l = line;
1429	char *el;
1430	unsigned long factor;
1431	struct magic *m = &me->mp[0];
1432
1433	if (m->factor_op != FILE_FACTOR_OP_NONE) {
1434		file_magwarn(ms,
1435		    "Current entry already has a strength type: %c %d",
1436		    m->factor_op, m->factor);
1437		return -1;
1438	}
1439	EATAB;
1440	switch (*l) {
1441	case FILE_FACTOR_OP_NONE:
1442	case FILE_FACTOR_OP_PLUS:
1443	case FILE_FACTOR_OP_MINUS:
1444	case FILE_FACTOR_OP_TIMES:
1445	case FILE_FACTOR_OP_DIV:
1446		m->factor_op = *l++;
1447		break;
1448	default:
1449		file_magwarn(ms, "Unknown factor op `%c'", *l);
1450		return -1;
1451	}
1452	EATAB;
1453	factor = strtoul(l, &el, 0);
1454	if (factor > 255) {
1455		file_magwarn(ms, "Too large factor `%lu'", factor);
1456		goto out;
1457	}
1458	if (*el && !isspace((unsigned char)*el)) {
1459		file_magwarn(ms, "Bad factor `%s'", l);
1460		goto out;
1461	}
1462	m->factor = (uint8_t)factor;
1463	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
1464		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
1465		    m->factor_op, m->factor);
1466		goto out;
1467	}
1468	return 0;
1469out:
1470	m->factor_op = FILE_FACTOR_OP_NONE;
1471	m->factor = 0;
1472	return -1;
1473}
1474
1475/*
1476 * parse a MIME annotation line from magic file, put into magic[index - 1]
1477 * if valid
1478 */
1479private int
1480parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
1481{
1482	size_t i;
1483	const char *l = line;
1484	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
1485
1486	if (m->mimetype[0] != '\0') {
1487		file_magwarn(ms, "Current entry already has a MIME type `%s',"
1488		    " new type `%s'", m->mimetype, l);
1489		return -1;
1490	}
1491
1492	EATAB;
1493	for (i = 0;
1494	     *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
1495	     || strchr("-+/.", *l)) && i < sizeof(m->mimetype);
1496	     m->mimetype[i++] = *l++)
1497		continue;
1498	if (i == sizeof(m->mimetype)) {
1499		m->desc[sizeof(m->mimetype) - 1] = '\0';
1500		if (ms->flags & MAGIC_CHECK)
1501			file_magwarn(ms, "MIME type `%s' truncated %zu",
1502			    m->mimetype, i);
1503	} else
1504		m->mimetype[i] = '\0';
1505
1506	if (i > 0)
1507		return 0;
1508	else
1509		return -1;
1510}
1511
1512private int
1513check_format_type(const char *ptr, int type)
1514{
1515	int quad = 0;
1516	if (*ptr == '\0') {
1517		/* Missing format string; bad */
1518		return -1;
1519	}
1520
1521	switch (type) {
1522	case FILE_FMT_QUAD:
1523		quad = 1;
1524		/*FALLTHROUGH*/
1525	case FILE_FMT_NUM:
1526		if (*ptr == '-')
1527			ptr++;
1528		if (*ptr == '.')
1529			ptr++;
1530		while (isdigit((unsigned char)*ptr)) ptr++;
1531		if (*ptr == '.')
1532			ptr++;
1533		while (isdigit((unsigned char)*ptr)) ptr++;
1534		if (quad) {
1535			if (*ptr++ != 'l')
1536				return -1;
1537			if (*ptr++ != 'l')
1538				return -1;
1539		}
1540
1541		switch (*ptr++) {
1542		case 'l':
1543			switch (*ptr++) {
1544			case 'i':
1545			case 'd':
1546			case 'u':
1547			case 'x':
1548			case 'X':
1549				return 0;
1550			default:
1551				return -1;
1552			}
1553
1554		case 'h':
1555			switch (*ptr++) {
1556			case 'h':
1557				switch (*ptr++) {
1558				case 'i':
1559				case 'd':
1560				case 'u':
1561				case 'x':
1562				case 'X':
1563					return 0;
1564				default:
1565					return -1;
1566				}
1567			case 'd':
1568				return 0;
1569			default:
1570				return -1;
1571			}
1572
1573		case 'i':
1574		case 'c':
1575		case 'd':
1576		case 'u':
1577		case 'x':
1578		case 'X':
1579			return 0;
1580
1581		default:
1582			return -1;
1583		}
1584
1585	case FILE_FMT_FLOAT:
1586	case FILE_FMT_DOUBLE:
1587		if (*ptr == '-')
1588			ptr++;
1589		if (*ptr == '.')
1590			ptr++;
1591		while (isdigit((unsigned char)*ptr)) ptr++;
1592		if (*ptr == '.')
1593			ptr++;
1594		while (isdigit((unsigned char)*ptr)) ptr++;
1595
1596		switch (*ptr++) {
1597		case 'e':
1598		case 'E':
1599		case 'f':
1600		case 'F':
1601		case 'g':
1602		case 'G':
1603			return 0;
1604
1605		default:
1606			return -1;
1607		}
1608
1609
1610	case FILE_FMT_STR:
1611		if (*ptr == '-')
1612			ptr++;
1613		while (isdigit((unsigned char )*ptr))
1614			ptr++;
1615		if (*ptr == '.') {
1616			ptr++;
1617			while (isdigit((unsigned char )*ptr))
1618				ptr++;
1619		}
1620
1621		switch (*ptr++) {
1622		case 's':
1623			return 0;
1624		default:
1625			return -1;
1626		}
1627
1628	default:
1629		/* internal error */
1630		abort();
1631	}
1632	/*NOTREACHED*/
1633	return -1;
1634}
1635
1636/*
1637 * Check that the optional printf format in description matches
1638 * the type of the magic.
1639 */
1640private int
1641check_format(struct magic_set *ms, struct magic *m)
1642{
1643	char *ptr;
1644
1645	for (ptr = m->desc; *ptr; ptr++)
1646		if (*ptr == '%')
1647			break;
1648	if (*ptr == '\0') {
1649		/* No format string; ok */
1650		return 1;
1651	}
1652
1653	assert(file_nformats == file_nnames);
1654
1655	if (m->type >= file_nformats) {
1656		file_magwarn(ms, "Internal error inconsistency between "
1657		    "m->type and format strings");
1658		return -1;
1659	}
1660	if (file_formats[m->type] == FILE_FMT_NONE) {
1661		file_magwarn(ms, "No format string for `%s' with description "
1662		    "`%s'", m->desc, file_names[m->type]);
1663		return -1;
1664	}
1665
1666	ptr++;
1667	if (check_format_type(ptr, file_formats[m->type]) == -1) {
1668		/*
1669		 * TODO: this error message is unhelpful if the format
1670		 * string is not one character long
1671		 */
1672		file_magwarn(ms, "Printf format `%c' is not valid for type "
1673		    "`%s' in description `%s'",
1674		    ptr && *ptr ? *ptr : '?',
1675		    file_names[m->type], m->desc);
1676		return -1;
1677	}
1678
1679	for (; *ptr; ptr++) {
1680		if (*ptr == '%') {
1681			file_magwarn(ms,
1682			    "Too many format strings (should have at most one) "
1683			    "for `%s' with description `%s'",
1684			    file_names[m->type], m->desc);
1685			return -1;
1686		}
1687	}
1688	return 0;
1689}
1690
1691/*
1692 * Read a numeric value from a pointer, into the value union of a magic
1693 * pointer, according to the magic type.  Update the string pointer to point
1694 * just after the number read.  Return 0 for success, non-zero for failure.
1695 */
1696private int
1697getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
1698{
1699	int slen;
1700
1701	switch (m->type) {
1702	case FILE_BESTRING16:
1703	case FILE_LESTRING16:
1704	case FILE_STRING:
1705	case FILE_PSTRING:
1706	case FILE_REGEX:
1707	case FILE_SEARCH:
1708		*p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen, action);
1709		if (*p == NULL) {
1710			if (ms->flags & MAGIC_CHECK)
1711				file_magwarn(ms, "cannot get string from `%s'",
1712				    m->value.s);
1713			return -1;
1714		}
1715		m->vallen = slen;
1716		if (m->type == FILE_PSTRING)
1717			m->vallen++;
1718		return 0;
1719	case FILE_FLOAT:
1720	case FILE_BEFLOAT:
1721	case FILE_LEFLOAT:
1722		if (m->reln != 'x') {
1723			char *ep;
1724#ifdef HAVE_STRTOF
1725			m->value.f = strtof(*p, &ep);
1726#else
1727			m->value.f = (float)strtod(*p, &ep);
1728#endif
1729			*p = ep;
1730		}
1731		return 0;
1732	case FILE_DOUBLE:
1733	case FILE_BEDOUBLE:
1734	case FILE_LEDOUBLE:
1735		if (m->reln != 'x') {
1736			char *ep;
1737			m->value.d = strtod(*p, &ep);
1738			*p = ep;
1739		}
1740		return 0;
1741	default:
1742		if (m->reln != 'x') {
1743			char *ep;
1744			m->value.q = file_signextend(ms, m,
1745			    (uint64_t)strtoull(*p, &ep, 0));
1746			*p = ep;
1747			eatsize(p);
1748		}
1749		return 0;
1750	}
1751}
1752
1753/*
1754 * Convert a string containing C character escapes.  Stop at an unescaped
1755 * space or tab.
1756 * Copy the converted version to "p", returning its length in *slen.
1757 * Return updated scan pointer as function result.
1758 */
1759private const char *
1760getstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen, int action)
1761{
1762	const char *origs = s;
1763	char 	*origp = p;
1764	char	*pmax = p + plen - 1;
1765	int	c;
1766	int	val;
1767
1768	while ((c = *s++) != '\0') {
1769		if (isspace((unsigned char) c))
1770			break;
1771		if (p >= pmax) {
1772			file_error(ms, 0, "string too long: `%s'", origs);
1773			return NULL;
1774		}
1775		if (c == '\\') {
1776			switch(c = *s++) {
1777
1778			case '\0':
1779				if (action == FILE_COMPILE)
1780					file_magwarn(ms, "incomplete escape");
1781				goto out;
1782
1783			case '\t':
1784				if (action == FILE_COMPILE) {
1785					file_magwarn(ms,
1786					    "escaped tab found, use \\t instead");
1787					action++;
1788				}
1789				/*FALLTHROUGH*/
1790			default:
1791				if (action == FILE_COMPILE) {
1792					if (isprint((unsigned char)c))
1793					    file_magwarn(ms,
1794						"no need to escape `%c'", c);
1795					else
1796					    file_magwarn(ms,
1797						"unknown escape sequence: \\%03o", c);
1798				}
1799				/*FALLTHROUGH*/
1800			/* space, perhaps force people to use \040? */
1801			case ' ':
1802#if 0
1803			/*
1804			 * Other things people escape, but shouldn't need to,
1805			 * so we disallow them
1806			 */
1807			case '\'':
1808			case '"':
1809			case '?':
1810#endif
1811			/* Relations */
1812			case '>':
1813			case '<':
1814			case '&':
1815			case '^':
1816			case '=':
1817			case '!':
1818			/* and baskslash itself */
1819			case '\\':
1820				*p++ = (char) c;
1821				break;
1822
1823			case 'a':
1824				*p++ = '\a';
1825				break;
1826
1827			case 'b':
1828				*p++ = '\b';
1829				break;
1830
1831			case 'f':
1832				*p++ = '\f';
1833				break;
1834
1835			case 'n':
1836				*p++ = '\n';
1837				break;
1838
1839			case 'r':
1840				*p++ = '\r';
1841				break;
1842
1843			case 't':
1844				*p++ = '\t';
1845				break;
1846
1847			case 'v':
1848				*p++ = '\v';
1849				break;
1850
1851			/* \ and up to 3 octal digits */
1852			case '0':
1853			case '1':
1854			case '2':
1855			case '3':
1856			case '4':
1857			case '5':
1858			case '6':
1859			case '7':
1860				val = c - '0';
1861				c = *s++;  /* try for 2 */
1862				if (c >= '0' && c <= '7') {
1863					val = (val << 3) | (c - '0');
1864					c = *s++;  /* try for 3 */
1865					if (c >= '0' && c <= '7')
1866						val = (val << 3) | (c-'0');
1867					else
1868						--s;
1869				}
1870				else
1871					--s;
1872				*p++ = (char)val;
1873				break;
1874
1875			/* \x and up to 2 hex digits */
1876			case 'x':
1877				val = 'x';	/* Default if no digits */
1878				c = hextoint(*s++);	/* Get next char */
1879				if (c >= 0) {
1880					val = c;
1881					c = hextoint(*s++);
1882					if (c >= 0)
1883						val = (val << 4) + c;
1884					else
1885						--s;
1886				} else
1887					--s;
1888				*p++ = (char)val;
1889				break;
1890			}
1891		} else
1892			*p++ = (char)c;
1893	}
1894out:
1895	*p = '\0';
1896	*slen = p - origp;
1897	return s;
1898}
1899
1900
1901/* Single hex char to int; -1 if not a hex char. */
1902private int
1903hextoint(int c)
1904{
1905	if (!isascii((unsigned char) c))
1906		return -1;
1907	if (isdigit((unsigned char) c))
1908		return c - '0';
1909	if ((c >= 'a') && (c <= 'f'))
1910		return c + 10 - 'a';
1911	if (( c>= 'A') && (c <= 'F'))
1912		return c + 10 - 'A';
1913	return -1;
1914}
1915
1916
1917/*
1918 * Print a string containing C character escapes.
1919 */
1920protected void
1921file_showstr(FILE *fp, const char *s, size_t len)
1922{
1923	char	c;
1924
1925	for (;;) {
1926		c = *s++;
1927		if (len == ~0U) {
1928			if (c == '\0')
1929				break;
1930		}
1931		else  {
1932			if (len-- == 0)
1933				break;
1934		}
1935		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
1936			(void) fputc(c, fp);
1937		else {
1938			(void) fputc('\\', fp);
1939			switch (c) {
1940			case '\a':
1941				(void) fputc('a', fp);
1942				break;
1943
1944			case '\b':
1945				(void) fputc('b', fp);
1946				break;
1947
1948			case '\f':
1949				(void) fputc('f', fp);
1950				break;
1951
1952			case '\n':
1953				(void) fputc('n', fp);
1954				break;
1955
1956			case '\r':
1957				(void) fputc('r', fp);
1958				break;
1959
1960			case '\t':
1961				(void) fputc('t', fp);
1962				break;
1963
1964			case '\v':
1965				(void) fputc('v', fp);
1966				break;
1967
1968			default:
1969				(void) fprintf(fp, "%.3o", c & 0377);
1970				break;
1971			}
1972		}
1973	}
1974}
1975
1976/*
1977 * eatsize(): Eat the size spec from a number [eg. 10UL]
1978 */
1979private void
1980eatsize(const char **p)
1981{
1982	const char *l = *p;
1983
1984	if (LOWCASE(*l) == 'u')
1985		l++;
1986
1987	switch (LOWCASE(*l)) {
1988	case 'l':    /* long */
1989	case 's':    /* short */
1990	case 'h':    /* short */
1991	case 'b':    /* char/byte */
1992	case 'c':    /* char/byte */
1993		l++;
1994		/*FALLTHROUGH*/
1995	default:
1996		break;
1997	}
1998
1999	*p = l;
2000}
2001
2002/*
2003 * handle a compiled file.
2004 */
2005private int
2006apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
2007    const char *fn)
2008{
2009	int fd;
2010	struct stat st;
2011	uint32_t *ptr;
2012	uint32_t version;
2013	int needsbyteswap;
2014	char *dbname = NULL;
2015	void *mm = NULL;
2016
2017	mkdbname(fn, &dbname, 0);
2018	if (dbname == NULL)
2019		goto error2;
2020
2021	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
2022		goto error2;
2023
2024	if (fstat(fd, &st) == -1) {
2025		file_error(ms, errno, "cannot stat `%s'", dbname);
2026		goto error1;
2027	}
2028	if (st.st_size < 8) {
2029		file_error(ms, 0, "file `%s' is too small", dbname);
2030		goto error1;
2031	}
2032
2033#ifdef QUICK
2034	if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
2035	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
2036		file_error(ms, errno, "cannot map `%s'", dbname);
2037		goto error1;
2038	}
2039#define RET	2
2040#else
2041	if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) {
2042		file_oomem(ms, (size_t)st.st_size);
2043		goto error1;
2044	}
2045	if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
2046		file_badread(ms);
2047		goto error1;
2048	}
2049#define RET	1
2050#endif
2051	*magicp = CAST(struct magic *, mm);
2052	(void)close(fd);
2053	fd = -1;
2054	ptr = (uint32_t *)(void *)*magicp;
2055	if (*ptr != MAGICNO) {
2056		if (swap4(*ptr) != MAGICNO) {
2057			file_error(ms, 0, "bad magic in `%s'", dbname);
2058			goto error1;
2059		}
2060		needsbyteswap = 1;
2061	} else
2062		needsbyteswap = 0;
2063	if (needsbyteswap)
2064		version = swap4(ptr[1]);
2065	else
2066		version = ptr[1];
2067	if (version != VERSIONNO) {
2068		file_error(ms, 0, "File %d.%d supports only %d version magic "
2069		    "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
2070		    VERSIONNO, dbname, version);
2071		goto error1;
2072	}
2073	*nmagicp = (uint32_t)(st.st_size / sizeof(struct magic));
2074	if (*nmagicp > 0)
2075		(*nmagicp)--;
2076	(*magicp)++;
2077	if (needsbyteswap)
2078		byteswap(*magicp, *nmagicp);
2079	free(dbname);
2080	return RET;
2081
2082error1:
2083	if (fd != -1)
2084		(void)close(fd);
2085	if (mm) {
2086#ifdef QUICK
2087		(void)munmap((void *)mm, (size_t)st.st_size);
2088#else
2089		free(mm);
2090#endif
2091	} else {
2092		*magicp = NULL;
2093		*nmagicp = 0;
2094	}
2095error2:
2096	free(dbname);
2097	return -1;
2098}
2099
2100private const uint32_t ar[] = {
2101    MAGICNO, VERSIONNO
2102};
2103/*
2104 * handle an mmaped file.
2105 */
2106private int
2107apprentice_compile(struct magic_set *ms, struct magic **magicp,
2108    uint32_t *nmagicp, const char *fn)
2109{
2110	int fd;
2111	char *dbname;
2112	int rv = -1;
2113
2114	mkdbname(fn, &dbname, 1);
2115
2116	if (dbname == NULL)
2117		goto out;
2118
2119	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) {
2120		file_error(ms, errno, "cannot open `%s'", dbname);
2121		goto out;
2122	}
2123
2124	if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
2125		file_error(ms, errno, "error writing `%s'", dbname);
2126		goto out;
2127	}
2128
2129	if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
2130	    != sizeof(struct magic)) {
2131		file_error(ms, errno, "error seeking `%s'", dbname);
2132		goto out;
2133	}
2134
2135	if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
2136	    != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
2137		file_error(ms, errno, "error writing `%s'", dbname);
2138		goto out;
2139	}
2140
2141	(void)close(fd);
2142	rv = 0;
2143out:
2144	free(dbname);
2145	return rv;
2146}
2147
2148private const char ext[] = ".mgc";
2149/*
2150 * make a dbname
2151 */
2152private void
2153mkdbname(const char *fn, char **buf, int strip)
2154{
2155	const char *p;
2156	if (strip) {
2157		if ((p = strrchr(fn, '/')) != NULL)
2158			fn = ++p;
2159	}
2160
2161	if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0')
2162		*buf = strdup(fn);
2163	else
2164		(void)asprintf(buf, "%s%s", fn, ext);
2165
2166	if (buf && *buf && strlen(*buf) > MAXPATHLEN) {
2167		free(*buf);
2168		*buf = NULL;
2169	}
2170}
2171
2172/*
2173 * Byteswap an mmap'ed file if needed
2174 */
2175private void
2176byteswap(struct magic *magic, uint32_t nmagic)
2177{
2178	uint32_t i;
2179	for (i = 0; i < nmagic; i++)
2180		bs1(&magic[i]);
2181}
2182
2183/*
2184 * swap a short
2185 */
2186private uint16_t
2187swap2(uint16_t sv)
2188{
2189	uint16_t rv;
2190	uint8_t *s = (uint8_t *)(void *)&sv;
2191	uint8_t *d = (uint8_t *)(void *)&rv;
2192	d[0] = s[1];
2193	d[1] = s[0];
2194	return rv;
2195}
2196
2197/*
2198 * swap an int
2199 */
2200private uint32_t
2201swap4(uint32_t sv)
2202{
2203	uint32_t rv;
2204	uint8_t *s = (uint8_t *)(void *)&sv;
2205	uint8_t *d = (uint8_t *)(void *)&rv;
2206	d[0] = s[3];
2207	d[1] = s[2];
2208	d[2] = s[1];
2209	d[3] = s[0];
2210	return rv;
2211}
2212
2213/*
2214 * swap a quad
2215 */
2216private uint64_t
2217swap8(uint64_t sv)
2218{
2219	uint64_t rv;
2220	uint8_t *s = (uint8_t *)(void *)&sv;
2221	uint8_t *d = (uint8_t *)(void *)&rv;
2222#if 0
2223	d[0] = s[3];
2224	d[1] = s[2];
2225	d[2] = s[1];
2226	d[3] = s[0];
2227	d[4] = s[7];
2228	d[5] = s[6];
2229	d[6] = s[5];
2230	d[7] = s[4];
2231#else
2232	d[0] = s[7];
2233	d[1] = s[6];
2234	d[2] = s[5];
2235	d[3] = s[4];
2236	d[4] = s[3];
2237	d[5] = s[2];
2238	d[6] = s[1];
2239	d[7] = s[0];
2240#endif
2241	return rv;
2242}
2243
2244/*
2245 * byteswap a single magic entry
2246 */
2247private void
2248bs1(struct magic *m)
2249{
2250	m->cont_level = swap2(m->cont_level);
2251	m->offset = swap4((uint32_t)m->offset);
2252	m->in_offset = swap4((uint32_t)m->in_offset);
2253	m->lineno = swap4((uint32_t)m->lineno);
2254	if (IS_STRING(m->type)) {
2255		m->str_range = swap4(m->str_range);
2256		m->str_flags = swap4(m->str_flags);
2257	}
2258	else {
2259		m->value.q = swap8(m->value.q);
2260		m->num_mask = swap8(m->num_mask);
2261	}
2262}
2263