file.h revision 267829
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file.h - definitions for file(1) program
30 * @(#)$File: file.h,v 1.135 2011/09/20 15:30:14 christos Exp $
31 */
32
33#ifndef __file_h__
34#define __file_h__
35
36#ifdef HAVE_CONFIG_H
37#include <config.h>
38#endif
39
40#ifdef WIN32
41  #ifdef _WIN64
42    #define SIZE_T_FORMAT "I64"
43  #else
44    #define SIZE_T_FORMAT ""
45  #endif
46  #define INT64_T_FORMAT "I64"
47#else
48  #define SIZE_T_FORMAT "z"
49  #define INT64_T_FORMAT "ll"
50#endif
51
52#include <stdio.h>	/* Include that here, to make sure __P gets defined */
53#include <errno.h>
54#include <fcntl.h>	/* For open and flags */
55#ifdef HAVE_STDINT_H
56#ifndef __STDC_LIMIT_MACROS
57#define __STDC_LIMIT_MACROS
58#endif
59#include <stdint.h>
60#endif
61#ifdef HAVE_INTTYPES_H
62#include <inttypes.h>
63#endif
64#include <regex.h>
65#include <sys/types.h>
66#include <sys/param.h>
67/* Do this here and now, because struct stat gets re-defined on solaris */
68#include <sys/stat.h>
69#include <stdarg.h>
70
71#define ENABLE_CONDITIONALS
72
73#ifndef MAGIC
74#define MAGIC "/etc/magic"
75#endif
76
77#if defined(__EMX__) || defined (WIN32)
78#define PATHSEP	';'
79#else
80#define PATHSEP	':'
81#endif
82
83#define private static
84#ifndef protected
85#define protected
86#endif
87#define public
88
89#ifndef __arraycount
90#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
91#endif
92
93#ifndef __GNUC_PREREQ__
94#ifdef __GNUC__
95#define	__GNUC_PREREQ__(x, y)						\
96	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
97	 (__GNUC__ > (x)))
98#else
99#define	__GNUC_PREREQ__(x, y)	0
100#endif
101#endif
102
103#ifndef __GNUC__
104#ifndef __attribute__
105#define __attribute__(a)
106#endif
107#endif
108
109#ifndef MIN
110#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
111#endif
112
113#ifndef MAX
114#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
115#endif
116
117#ifndef HOWMANY
118# define HOWMANY (256 * 1024)	/* how much of the file to look at */
119#endif
120#define MAXMAGIS 8192		/* max entries in any one magic file
121				   or directory */
122#define MAXDESC	64		/* max leng of text description/MIME type */
123#define MAXstring 64		/* max leng of "string" types */
124
125#define MAGICNO		0xF11E041C
126#define VERSIONNO	8
127#define FILE_MAGICSIZE	232
128
129#define	FILE_LOAD	0
130#define FILE_CHECK	1
131#define FILE_COMPILE	2
132#define FILE_LIST	3
133
134union VALUETYPE {
135	uint8_t b;
136	uint16_t h;
137	uint32_t l;
138	uint64_t q;
139	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
140	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
141	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
142	char s[MAXstring];	/* the search string or regex pattern */
143	unsigned char us[MAXstring];
144	float f;
145	double d;
146};
147
148struct magic {
149	/* Word 1 */
150	uint16_t cont_level;	/* level of ">" */
151	uint8_t flag;
152#define INDIR		0x01	/* if '(...)' appears */
153#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
154#define INDIROFFADD	0x04	/* if '>&(' appears */
155#define UNSIGNED	0x08	/* comparison is unsigned */
156#define NOSPACE		0x10	/* suppress space character before output */
157#define BINTEST		0x20	/* test is for a binary type (set only
158				   for top-level tests) */
159#define TEXTTEST	0x40	/* for passing to file_softmagic */
160
161	uint8_t factor;
162
163	/* Word 2 */
164	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
165	uint8_t vallen;		/* length of string value, if any */
166	uint8_t type;		/* comparison type (FILE_*) */
167	uint8_t in_type;	/* type of indirection */
168#define 			FILE_INVALID	0
169#define 			FILE_BYTE	1
170#define				FILE_SHORT	2
171#define				FILE_DEFAULT	3
172#define				FILE_LONG	4
173#define				FILE_STRING	5
174#define				FILE_DATE	6
175#define				FILE_BESHORT	7
176#define				FILE_BELONG	8
177#define				FILE_BEDATE	9
178#define				FILE_LESHORT	10
179#define				FILE_LELONG	11
180#define				FILE_LEDATE	12
181#define				FILE_PSTRING	13
182#define				FILE_LDATE	14
183#define				FILE_BELDATE	15
184#define				FILE_LELDATE	16
185#define				FILE_REGEX	17
186#define				FILE_BESTRING16	18
187#define				FILE_LESTRING16	19
188#define				FILE_SEARCH	20
189#define				FILE_MEDATE	21
190#define				FILE_MELDATE	22
191#define				FILE_MELONG	23
192#define				FILE_QUAD	24
193#define				FILE_LEQUAD	25
194#define				FILE_BEQUAD	26
195#define				FILE_QDATE	27
196#define				FILE_LEQDATE	28
197#define				FILE_BEQDATE	29
198#define				FILE_QLDATE	30
199#define				FILE_LEQLDATE	31
200#define				FILE_BEQLDATE	32
201#define				FILE_FLOAT	33
202#define				FILE_BEFLOAT	34
203#define				FILE_LEFLOAT	35
204#define				FILE_DOUBLE	36
205#define				FILE_BEDOUBLE	37
206#define				FILE_LEDOUBLE	38
207#define				FILE_BEID3	39
208#define				FILE_LEID3	40
209#define				FILE_INDIRECT	41
210#define				FILE_NAMES_SIZE	42/* size of array to contain all names */
211
212#define IS_STRING(t) \
213	((t) == FILE_STRING || \
214	 (t) == FILE_PSTRING || \
215	 (t) == FILE_BESTRING16 || \
216	 (t) == FILE_LESTRING16 || \
217	 (t) == FILE_REGEX || \
218	 (t) == FILE_SEARCH || \
219	 (t) == FILE_DEFAULT)
220
221#define FILE_FMT_NONE 0
222#define FILE_FMT_NUM  1 /* "cduxXi" */
223#define FILE_FMT_STR  2 /* "s" */
224#define FILE_FMT_QUAD 3 /* "ll" */
225#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
226#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
227
228	/* Word 3 */
229	uint8_t in_op;		/* operator for indirection */
230	uint8_t mask_op;	/* operator for mask */
231#ifdef ENABLE_CONDITIONALS
232	uint8_t cond;		/* conditional type */
233#else
234	uint8_t dummy;
235#endif
236	uint8_t factor_op;
237#define		FILE_FACTOR_OP_PLUS	'+'
238#define		FILE_FACTOR_OP_MINUS	'-'
239#define		FILE_FACTOR_OP_TIMES	'*'
240#define		FILE_FACTOR_OP_DIV	'/'
241#define		FILE_FACTOR_OP_NONE	'\0'
242
243#define				FILE_OPS	"&|^+-*/%"
244#define				FILE_OPAND	0
245#define				FILE_OPOR	1
246#define				FILE_OPXOR	2
247#define				FILE_OPADD	3
248#define				FILE_OPMINUS	4
249#define				FILE_OPMULTIPLY	5
250#define				FILE_OPDIVIDE	6
251#define				FILE_OPMODULO	7
252#define				FILE_OPS_MASK	0x07 /* mask for above ops */
253#define				FILE_UNUSED_1	0x08
254#define				FILE_UNUSED_2	0x10
255#define				FILE_UNUSED_3	0x20
256#define				FILE_OPINVERSE	0x40
257#define				FILE_OPINDIRECT	0x80
258
259#ifdef ENABLE_CONDITIONALS
260#define				COND_NONE	0
261#define				COND_IF		1
262#define				COND_ELIF	2
263#define				COND_ELSE	3
264#endif /* ENABLE_CONDITIONALS */
265
266	/* Word 4 */
267	uint32_t offset;	/* offset to magic number */
268	/* Word 5 */
269	int32_t in_offset;	/* offset from indirection */
270	/* Word 6 */
271	uint32_t lineno;	/* line number in magic file */
272	/* Word 7,8 */
273	union {
274		uint64_t _mask;	/* for use with numeric and date types */
275		struct {
276			uint32_t _count;	/* repeat/line count */
277			uint32_t _flags;	/* modifier flags */
278		} _s;		/* for use with string types */
279	} _u;
280#define num_mask _u._mask
281#define str_range _u._s._count
282#define str_flags _u._s._flags
283	/* Words 9-16 */
284	union VALUETYPE value;	/* either number or string */
285	/* Words 17-32 */
286	char desc[MAXDESC];	/* description */
287	/* Words 33-48 */
288	char mimetype[MAXDESC]; /* MIME type */
289	/* Words 49-50 */
290	char apple[8];
291};
292
293#define BIT(A)   (1 << (A))
294#define STRING_COMPACT_WHITESPACE		BIT(0)
295#define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
296#define STRING_IGNORE_LOWERCASE			BIT(2)
297#define STRING_IGNORE_UPPERCASE			BIT(3)
298#define REGEX_OFFSET_START			BIT(4)
299#define STRING_TEXTTEST				BIT(5)
300#define STRING_BINTEST				BIT(6)
301#define PSTRING_1_BE				BIT(7)
302#define PSTRING_1_LE				BIT(7)
303#define PSTRING_2_BE				BIT(8)
304#define PSTRING_2_LE				BIT(9)
305#define PSTRING_4_BE				BIT(10)
306#define PSTRING_4_LE				BIT(11)
307#define PSTRING_LEN	\
308    (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
309#define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
310#define CHAR_COMPACT_WHITESPACE			'W'
311#define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
312#define CHAR_IGNORE_LOWERCASE			'c'
313#define CHAR_IGNORE_UPPERCASE			'C'
314#define CHAR_REGEX_OFFSET_START			's'
315#define CHAR_TEXTTEST				't'
316#define CHAR_BINTEST				'b'
317#define CHAR_PSTRING_1_BE			'B'
318#define CHAR_PSTRING_1_LE			'B'
319#define CHAR_PSTRING_2_BE			'H'
320#define CHAR_PSTRING_2_LE			'h'
321#define CHAR_PSTRING_4_BE			'L'
322#define CHAR_PSTRING_4_LE			'l'
323#define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
324#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
325#define STRING_DEFAULT_RANGE		100
326
327
328/* list of magic entries */
329struct mlist {
330	struct magic *magic;		/* array of magic entries */
331	uint32_t nmagic;			/* number of entries in array */
332	int mapped;  /* allocation type: 0 => apprentice_file
333		      *                  1 => apprentice_map + malloc
334		      *                  2 => apprentice_map + mmap */
335	struct mlist *next, *prev;
336};
337
338#ifdef __cplusplus
339#define CAST(T, b)	static_cast<T>(b)
340#define RCAST(T, b)	reinterpret_cast<T>(b)
341#else
342#define CAST(T, b)	(T)(b)
343#define RCAST(T, b)	(T)(b)
344#endif
345
346struct level_info {
347	int32_t off;
348	int got_match;
349#ifdef ENABLE_CONDITIONALS
350	int last_match;
351	int last_cond;	/* used for error checking by parse() */
352#endif
353};
354struct magic_set {
355	struct mlist *mlist;
356	struct cont {
357		size_t len;
358		struct level_info *li;
359	} c;
360	struct out {
361		char *buf;		/* Accumulation buffer */
362		char *pbuf;		/* Printable buffer */
363	} o;
364	uint32_t offset;
365	int error;
366	int flags;			/* Control magic tests. */
367	int event_flags;		/* Note things that happened. */
368#define 		EVENT_HAD_ERR		0x01
369	const char *file;
370	size_t line;			/* current magic line number */
371
372	/* data for searches */
373	struct {
374		const char *s;		/* start of search in original source */
375		size_t s_len;		/* length of search region */
376		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
377		size_t rm_len;		/* match length */
378	} search;
379
380	/* FIXME: Make the string dynamically allocated so that e.g.
381	   strings matched in files can be longer than MAXstring */
382	union VALUETYPE ms_value;	/* either number or string */
383};
384
385/* Type for Unicode characters */
386typedef unsigned long unichar;
387
388struct stat;
389protected const char *file_fmttime(uint32_t, int);
390protected int file_buffer(struct magic_set *, int, const char *, const void *,
391    size_t);
392protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
393protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
394protected int file_vprintf(struct magic_set *, const char *, va_list);
395protected size_t file_printedlen(const struct magic_set *);
396protected int file_replace(struct magic_set *, const char *, const char *);
397protected int file_printf(struct magic_set *, const char *, ...)
398    __attribute__((__format__(__printf__, 2, 3)));
399protected int file_reset(struct magic_set *);
400protected int file_tryelf(struct magic_set *, int, const unsigned char *,
401    size_t);
402protected int file_trycdf(struct magic_set *, int, const unsigned char *,
403    size_t);
404#if HAVE_FORK
405protected int file_zmagic(struct magic_set *, int, const char *,
406    const unsigned char *, size_t);
407#endif
408protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
409    int);
410protected int file_ascmagic_with_encoding(struct magic_set *,
411    const unsigned char *, size_t, unichar *, size_t, const char *,
412    const char *, int);
413protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
414    unichar **, size_t *, const char **, const char **, const char **);
415protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
416protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
417    size_t, int, int);
418protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
419protected uint64_t file_signextend(struct magic_set *, struct magic *,
420    uint64_t);
421protected void file_delmagic(struct magic *, int type, size_t entries);
422protected void file_badread(struct magic_set *);
423protected void file_badseek(struct magic_set *);
424protected void file_oomem(struct magic_set *, size_t);
425protected void file_error(struct magic_set *, int, const char *, ...)
426    __attribute__((__format__(__printf__, 3, 4)));
427protected void file_magerror(struct magic_set *, const char *, ...)
428    __attribute__((__format__(__printf__, 2, 3)));
429protected void file_magwarn(struct magic_set *, const char *, ...)
430    __attribute__((__format__(__printf__, 2, 3)));
431protected void file_mdump(struct magic *);
432protected void file_showstr(FILE *, const char *, size_t);
433protected size_t file_mbswidth(const char *);
434protected const char *file_getbuffer(struct magic_set *);
435protected ssize_t sread(int, void *, size_t, int);
436protected int file_check_mem(struct magic_set *, unsigned int);
437protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
438    size_t *);
439protected size_t file_pstring_length_size(const struct magic *);
440protected size_t file_pstring_get_length(const struct magic *, const char *);
441#ifdef __EMX__
442protected int file_os2_apptype(struct magic_set *, const char *, const void *,
443    size_t);
444#endif /* __EMX__ */
445
446
447#ifndef COMPILE_ONLY
448extern const char *file_names[];
449extern const size_t file_nnames;
450#endif
451
452#ifndef HAVE_STRERROR
453extern int sys_nerr;
454extern char *sys_errlist[];
455#define strerror(e) \
456	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
457#endif
458
459#ifndef HAVE_STRTOUL
460#define strtoul(a, b, c)	strtol(a, b, c)
461#endif
462
463#ifndef HAVE_VASPRINTF
464int vasprintf(char **, const char *, va_list);
465#endif
466#ifndef HAVE_ASPRINTF
467int asprintf(char **ptr, const char *format_string, ...);
468#endif
469
470#ifndef HAVE_STRLCPY
471size_t strlcpy(char *dst, const char *src, size_t siz);
472#endif
473#ifndef HAVE_STRLCAT
474size_t strlcat(char *dst, const char *src, size_t siz);
475#endif
476#ifndef HAVE_GETLINE
477ssize_t getline(char **dst, size_t *len, FILE *fp);
478ssize_t getdelim(char **dst, size_t *len, int delimiter, FILE *fp);
479#endif
480
481#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
482#define QUICK
483#endif
484
485#ifndef O_BINARY
486#define O_BINARY	0
487#endif
488
489#ifndef __cplusplus
490#if defined(__GNUC__) && (__GNUC__ >= 3)
491#define FILE_RCSID(id) \
492static const char rcsid[] __attribute__((__used__)) = id;
493#else
494#define FILE_RCSID(id) \
495static const char *rcsid(const char *p) { \
496	return rcsid(p = id); \
497}
498#endif
499#else
500#define FILE_RCSID(id)
501#endif
502
503#endif /* __file_h__ */
504