apprentice.c revision 169942
168349Sobrien/* 2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995. 3133359Sobrien * Software written by Ian F. Darwin and others; 4133359Sobrien * maintained 1995-present by Christos Zoulas and others. 5133359Sobrien * 6133359Sobrien * Redistribution and use in source and binary forms, with or without 7133359Sobrien * modification, are permitted provided that the following conditions 8133359Sobrien * are met: 9133359Sobrien * 1. Redistributions of source code must retain the above copyright 10133359Sobrien * notice immediately at the beginning of the file, without modification, 11133359Sobrien * this list of conditions, and the following disclaimer. 12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright 13133359Sobrien * notice, this list of conditions and the following disclaimer in the 14133359Sobrien * documentation and/or other materials provided with the distribution. 15133359Sobrien * 16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26133359Sobrien * SUCH DAMAGE. 27133359Sobrien */ 28133359Sobrien/* 2968349Sobrien * apprentice - make one pass through /etc/magic, learning its secrets. 3068349Sobrien */ 3168349Sobrien 3280588Sobrien#include "file.h" 33133359Sobrien#include "magic.h" 3468349Sobrien#include <stdlib.h> 3584685Sobrien#ifdef HAVE_UNISTD_H 3684685Sobrien#include <unistd.h> 3784685Sobrien#endif 3868349Sobrien#include <string.h> 39169942Sobrien#include <assert.h> 4068349Sobrien#include <ctype.h> 41103373Sobrien#include <fcntl.h> 42133359Sobrien#include <sys/stat.h> 43133359Sobrien#include <sys/param.h> 4474784Sobrien#ifdef QUICK 4574784Sobrien#include <sys/mman.h> 4674784Sobrien#endif 4768349Sobrien 4868349Sobrien#ifndef lint 49169942SobrienFILE_RCSID("@(#)$Id: apprentice.c,v 1.100 2006/12/11 21:48:49 christos Exp $") 5068349Sobrien#endif /* lint */ 5168349Sobrien 5268349Sobrien#define EATAB {while (isascii((unsigned char) *l) && \ 5368349Sobrien isspace((unsigned char) *l)) ++l;} 5468349Sobrien#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 5568349Sobrien tolower((unsigned char) (l)) : (l)) 5675937Sobrien/* 5775937Sobrien * Work around a bug in headers on Digital Unix. 5875937Sobrien * At least confirmed for: OSF1 V4.0 878 5975937Sobrien */ 6075937Sobrien#if defined(__osf__) && defined(__DECC) 6175937Sobrien#ifdef MAP_FAILED 6275937Sobrien#undef MAP_FAILED 6375937Sobrien#endif 6475937Sobrien#endif 6568349Sobrien 6675937Sobrien#ifndef MAP_FAILED 6775937Sobrien#define MAP_FAILED (void *) -1 6875937Sobrien#endif 6968349Sobrien 7075937Sobrien#ifndef MAP_FILE 7175937Sobrien#define MAP_FILE 0 7275937Sobrien#endif 7375937Sobrien 74133359Sobrien#ifndef MAXPATHLEN 75133359Sobrien#define MAXPATHLEN 1024 7668349Sobrien#endif 7768349Sobrien 78159764Sobrien#define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ 79139368Sobrien (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16) 80159764Sobrien 81159764Sobrien#define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \ 82159764Sobrien (t) == FILE_SEARCH) 83139368Sobrien 84159764Sobrienstruct magic_entry { 85159764Sobrien struct magic *mp; 86159764Sobrien uint32_t cont_count; 87159764Sobrien uint32_t max_count; 88159764Sobrien}; 89159764Sobrien 90169942Sobrienconst int file_formats[] = { FILE_FORMAT_STRING }; 91169942Sobrienconst size_t file_nformats = sizeof(file_formats) / sizeof(file_formats[0]); 92169942Sobrienconst char *file_names[] = { FILE_FORMAT_NAME }; 93169942Sobrienconst size_t file_nnames = sizeof(file_names) / sizeof(file_names[0]); 94169942Sobrien 95159764Sobrienprivate int getvalue(struct magic_set *ms, struct magic *, const char **); 96133359Sobrienprivate int hextoint(int); 97159764Sobrienprivate const char *getstr(struct magic_set *, const char *, char *, int, 98159764Sobrien int *); 99159764Sobrienprivate int parse(struct magic_set *, struct magic_entry **, uint32_t *, 100169942Sobrien const char *, size_t, int); 101159764Sobrienprivate void eatsize(const char **); 102133359Sobrienprivate int apprentice_1(struct magic_set *, const char *, int, struct mlist *); 103159764Sobrienprivate size_t apprentice_magic_strength(const struct magic *); 104159764Sobrienprivate int apprentice_sort(const void *, const void *); 105133359Sobrienprivate int apprentice_file(struct magic_set *, struct magic **, uint32_t *, 106133359Sobrien const char *, int); 107133359Sobrienprivate void byteswap(struct magic *, uint32_t); 108133359Sobrienprivate void bs1(struct magic *); 109133359Sobrienprivate uint16_t swap2(uint16_t); 110133359Sobrienprivate uint32_t swap4(uint32_t); 111169942Sobrienprivate uint64_t swap8(uint64_t); 112139368Sobrienprivate char *mkdbname(const char *, char *, size_t, int); 113133359Sobrienprivate int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 114133359Sobrien const char *); 115133359Sobrienprivate int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 116133359Sobrien const char *); 117169942Sobrienprivate int check_format_type(const char *, int); 118139368Sobrienprivate int check_format(struct magic_set *, struct magic *); 11968349Sobrien 120133359Sobrienprivate size_t maxmagic = 0; 121133359Sobrienprivate size_t magicsize = sizeof(struct magic); 12268349Sobrien 123159764Sobrien 12480588Sobrien#ifdef COMPILE_ONLY 12574784Sobrien 126103373Sobrienint main(int, char *[]); 12780588Sobrien 12880588Sobrienint 129103373Sobrienmain(int argc, char *argv[]) 13080588Sobrien{ 13180588Sobrien int ret; 132133359Sobrien struct magic_set *ms; 133133359Sobrien char *progname; 13480588Sobrien 13580588Sobrien if ((progname = strrchr(argv[0], '/')) != NULL) 13680588Sobrien progname++; 13780588Sobrien else 13880588Sobrien progname = argv[0]; 13980588Sobrien 14080588Sobrien if (argc != 2) { 141133359Sobrien (void)fprintf(stderr, "Usage: %s file\n", progname); 142133359Sobrien return 1; 14380588Sobrien } 14480588Sobrien 145133359Sobrien if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 146133359Sobrien (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 147133359Sobrien return 1; 148133359Sobrien } 149133359Sobrien ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 150133359Sobrien if (ret == 1) 151133359Sobrien (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 152133359Sobrien magic_close(ms); 153133359Sobrien return ret; 15480588Sobrien} 15580588Sobrien#endif /* COMPILE_ONLY */ 15680588Sobrien 15780588Sobrien 15874784Sobrien/* 15974784Sobrien * Handle one file. 16074784Sobrien */ 161133359Sobrienprivate int 162133359Sobrienapprentice_1(struct magic_set *ms, const char *fn, int action, 163133359Sobrien struct mlist *mlist) 16474784Sobrien{ 16574784Sobrien struct magic *magic = NULL; 166103373Sobrien uint32_t nmagic = 0; 16774784Sobrien struct mlist *ml; 16874784Sobrien int rv = -1; 169133359Sobrien int mapped; 17074784Sobrien 171133359Sobrien if (magicsize != FILE_MAGICSIZE) { 172133359Sobrien file_error(ms, 0, "magic element size %lu != %lu", 173133359Sobrien (unsigned long)sizeof(*magic), 174133359Sobrien (unsigned long)FILE_MAGICSIZE); 175133359Sobrien return -1; 17674784Sobrien } 177133359Sobrien 178133359Sobrien if (action == FILE_COMPILE) { 179133359Sobrien rv = apprentice_file(ms, &magic, &nmagic, fn, action); 180133359Sobrien if (rv != 0) 181133359Sobrien return -1; 182133359Sobrien rv = apprentice_compile(ms, &magic, &nmagic, fn); 183133359Sobrien free(magic); 184133359Sobrien return rv; 185133359Sobrien } 186159764Sobrien 18780588Sobrien#ifndef COMPILE_ONLY 188133359Sobrien if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { 189133359Sobrien if (ms->flags & MAGIC_CHECK) 190139368Sobrien file_magwarn(ms, "using regular magic file `%s'", fn); 191133359Sobrien rv = apprentice_file(ms, &magic, &nmagic, fn, action); 192133359Sobrien if (rv != 0) 193133359Sobrien return -1; 194133359Sobrien mapped = 0; 195133359Sobrien } 19674784Sobrien 197133359Sobrien mapped = rv; 19874784Sobrien 199133359Sobrien if (magic == NULL || nmagic == 0) { 200133359Sobrien file_delmagic(magic, mapped, nmagic); 201133359Sobrien return -1; 202133359Sobrien } 203133359Sobrien 20474784Sobrien if ((ml = malloc(sizeof(*ml))) == NULL) { 205133359Sobrien file_delmagic(magic, mapped, nmagic); 206169942Sobrien file_oomem(ms, sizeof(*ml)); 207133359Sobrien return -1; 20874784Sobrien } 20974784Sobrien 21074784Sobrien ml->magic = magic; 21174784Sobrien ml->nmagic = nmagic; 212133359Sobrien ml->mapped = mapped; 21374784Sobrien 214133359Sobrien mlist->prev->next = ml; 215133359Sobrien ml->prev = mlist->prev; 216133359Sobrien ml->next = mlist; 217133359Sobrien mlist->prev = ml; 21874784Sobrien 219133359Sobrien return 0; 22080588Sobrien#endif /* COMPILE_ONLY */ 22174784Sobrien} 22274784Sobrien 223133359Sobrienprotected void 224133359Sobrienfile_delmagic(struct magic *p, int type, size_t entries) 225133359Sobrien{ 226133359Sobrien if (p == NULL) 227133359Sobrien return; 228133359Sobrien switch (type) { 229133359Sobrien case 2: 230133359Sobrien p--; 231133359Sobrien (void)munmap((void *)p, sizeof(*p) * (entries + 1)); 232133359Sobrien break; 233133359Sobrien case 1: 234133359Sobrien p--; 235133359Sobrien /*FALLTHROUGH*/ 236133359Sobrien case 0: 237133359Sobrien free(p); 238133359Sobrien break; 239133359Sobrien default: 240133359Sobrien abort(); 241133359Sobrien } 242133359Sobrien} 24374784Sobrien 244133359Sobrien 245103373Sobrien/* const char *fn: list of magic files */ 246133359Sobrienprotected struct mlist * 247133359Sobrienfile_apprentice(struct magic_set *ms, const char *fn, int action) 24868349Sobrien{ 249133359Sobrien char *p, *mfn, *afn = NULL; 25068349Sobrien int file_err, errs = -1; 251133359Sobrien struct mlist *mlist; 252169942Sobrien static const char mime[] = ".mime"; 25368349Sobrien 254133359Sobrien if (fn == NULL) 255133359Sobrien fn = getenv("MAGIC"); 256133359Sobrien if (fn == NULL) 257133359Sobrien fn = MAGIC; 258133359Sobrien 259133359Sobrien if ((fn = mfn = strdup(fn)) == NULL) { 260169942Sobrien file_oomem(ms, strlen(fn)); 261133359Sobrien return NULL; 26268349Sobrien } 263133359Sobrien 264133359Sobrien if ((mlist = malloc(sizeof(*mlist))) == NULL) { 265133359Sobrien free(mfn); 266169942Sobrien file_oomem(ms, sizeof(*mlist)); 267133359Sobrien return NULL; 268133359Sobrien } 269133359Sobrien mlist->next = mlist->prev = mlist; 270133359Sobrien 27168349Sobrien while (fn) { 27268349Sobrien p = strchr(fn, PATHSEP); 27368349Sobrien if (p) 27468349Sobrien *p++ = '\0'; 275133359Sobrien if (*fn == '\0') 276133359Sobrien break; 277133359Sobrien if (ms->flags & MAGIC_MIME) { 278169942Sobrien size_t len = strlen(fn) + sizeof(mime); 279169942Sobrien if ((afn = malloc(len)) == NULL) { 280133359Sobrien free(mfn); 281133359Sobrien free(mlist); 282169942Sobrien file_oomem(ms, len); 283133359Sobrien return NULL; 284133359Sobrien } 285133359Sobrien (void)strcpy(afn, fn); 286169942Sobrien (void)strcat(afn, mime); 287133359Sobrien fn = afn; 288133359Sobrien } 289133359Sobrien file_err = apprentice_1(ms, fn, action, mlist); 29068349Sobrien if (file_err > errs) 29168349Sobrien errs = file_err; 292133359Sobrien if (afn) { 293133359Sobrien free(afn); 294133359Sobrien afn = NULL; 295133359Sobrien } 29668349Sobrien fn = p; 29768349Sobrien } 298133359Sobrien if (errs == -1) { 299133359Sobrien free(mfn); 300133359Sobrien free(mlist); 301133359Sobrien mlist = NULL; 302133359Sobrien file_error(ms, 0, "could not find any magic files!"); 303133359Sobrien return NULL; 304133359Sobrien } 30568349Sobrien free(mfn); 306133359Sobrien return mlist; 30768349Sobrien} 30868349Sobrien 309169942Sobrien/* 310169942Sobrien * Get weight of this magic entry, for sorting purposes. 311169942Sobrien */ 312159764Sobrienprivate size_t 313159764Sobrienapprentice_magic_strength(const struct magic *m) 314159764Sobrien{ 315169942Sobrien#define MULT 10 316169942Sobrien size_t val = 2 * MULT; /* baseline strength */ 317169942Sobrien 318159764Sobrien switch (m->type) { 319159764Sobrien case FILE_BYTE: 320169942Sobrien val += 1 * MULT; 321169942Sobrien break; 322159764Sobrien 323159764Sobrien case FILE_SHORT: 324159764Sobrien case FILE_LESHORT: 325159764Sobrien case FILE_BESHORT: 326169942Sobrien val += 2 * MULT; 327169942Sobrien break; 328159764Sobrien 329159764Sobrien case FILE_LONG: 330159764Sobrien case FILE_LELONG: 331159764Sobrien case FILE_BELONG: 332159764Sobrien case FILE_MELONG: 333169942Sobrien val += 4 * MULT; 334169942Sobrien break; 335159764Sobrien 336159764Sobrien case FILE_PSTRING: 337159764Sobrien case FILE_STRING: 338169942Sobrien val += m->vallen * MULT; 339169942Sobrien break; 340169942Sobrien 341159764Sobrien case FILE_BESTRING16: 342159764Sobrien case FILE_LESTRING16: 343169942Sobrien val += m->vallen * MULT / 2; 344169942Sobrien break; 345169942Sobrien 346159764Sobrien case FILE_SEARCH: 347169942Sobrien case FILE_REGEX: 348169942Sobrien val += m->vallen; 349169942Sobrien break; 350159764Sobrien 351159764Sobrien case FILE_DATE: 352159764Sobrien case FILE_LEDATE: 353159764Sobrien case FILE_BEDATE: 354159764Sobrien case FILE_MEDATE: 355159764Sobrien case FILE_LDATE: 356159764Sobrien case FILE_LELDATE: 357159764Sobrien case FILE_BELDATE: 358159764Sobrien case FILE_MELDATE: 359169942Sobrien val += 4 * MULT; 360169942Sobrien break; 361159764Sobrien 362169942Sobrien case FILE_QUAD: 363169942Sobrien case FILE_BEQUAD: 364169942Sobrien case FILE_LEQUAD: 365169942Sobrien case FILE_QDATE: 366169942Sobrien case FILE_LEQDATE: 367169942Sobrien case FILE_BEQDATE: 368169942Sobrien case FILE_QLDATE: 369169942Sobrien case FILE_LEQLDATE: 370169942Sobrien case FILE_BEQLDATE: 371169942Sobrien val += 8 * MULT; 372169942Sobrien break; 373169942Sobrien 374159764Sobrien default: 375169942Sobrien val = 0; 376169942Sobrien (void)fprintf(stderr, "Bad type %d\n", m->type); 377169942Sobrien abort(); 378159764Sobrien } 379169942Sobrien 380169942Sobrien switch (m->reln) { 381169942Sobrien case 'x': /* matches anything penalize */ 382169942Sobrien val = 0; 383169942Sobrien break; 384169942Sobrien 385169942Sobrien case '!': 386169942Sobrien case '=': /* Exact match, prefer */ 387169942Sobrien val += MULT; 388169942Sobrien break; 389169942Sobrien 390169942Sobrien case '>': 391169942Sobrien case '<': /* comparison match reduce strength */ 392169942Sobrien val -= 2 * MULT; 393169942Sobrien break; 394169942Sobrien 395169942Sobrien case '^': 396169942Sobrien case '&': /* masking bits, we could count them too */ 397169942Sobrien val -= MULT; 398169942Sobrien break; 399169942Sobrien 400169942Sobrien default: 401169942Sobrien (void)fprintf(stderr, "Bad relation %c\n", m->reln); 402169942Sobrien abort(); 403169942Sobrien } 404169942Sobrien return val; 405159764Sobrien} 406159764Sobrien 407169942Sobrien/* 408169942Sobrien * Sort callback for sorting entries by "strength" (basically length) 409169942Sobrien */ 410159764Sobrienprivate int 411159764Sobrienapprentice_sort(const void *a, const void *b) 412159764Sobrien{ 413159764Sobrien const struct magic_entry *ma = a; 414159764Sobrien const struct magic_entry *mb = b; 415159764Sobrien size_t sa = apprentice_magic_strength(ma->mp); 416159764Sobrien size_t sb = apprentice_magic_strength(mb->mp); 417159764Sobrien if (sa == sb) 418159764Sobrien return 0; 419159764Sobrien else if (sa > sb) 420159764Sobrien return -1; 421159764Sobrien else 422159764Sobrien return 1; 423159764Sobrien} 424159764Sobrien 42574784Sobrien/* 42674784Sobrien * parse from a file 427103373Sobrien * const char *fn: name of magic file 42874784Sobrien */ 429133359Sobrienprivate int 430133359Sobrienapprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 431133359Sobrien const char *fn, int action) 43268349Sobrien{ 433133359Sobrien private const char hdr[] = 43468349Sobrien "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 43568349Sobrien FILE *f; 43668349Sobrien char line[BUFSIZ+1]; 43768349Sobrien int errs = 0; 438159764Sobrien struct magic_entry *marray; 439169942Sobrien uint32_t marraycount, i, mentrycount = 0; 440169942Sobrien size_t lineno = 0; 44168349Sobrien 442169942Sobrien ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 443169942Sobrien 444139368Sobrien f = fopen(ms->file = fn, "r"); 44574784Sobrien if (f == NULL) { 44668349Sobrien if (errno != ENOENT) 447133359Sobrien file_error(ms, errno, "cannot read magic file `%s'", 448133359Sobrien fn); 44968349Sobrien return -1; 45068349Sobrien } 45168349Sobrien 452169942Sobrien maxmagic = MAXMAGIS; 453159825Sobrien if ((marray = calloc(maxmagic, sizeof(*marray))) == NULL) { 454133359Sobrien (void)fclose(f); 455169942Sobrien file_oomem(ms, maxmagic * sizeof(*marray)); 456133359Sobrien return -1; 45774784Sobrien } 458159764Sobrien marraycount = 0; 45974784Sobrien 460133359Sobrien /* print silly verbose header for USG compat. */ 461133359Sobrien if (action == FILE_CHECK) 462133359Sobrien (void)fprintf(stderr, "%s\n", hdr); 463133359Sobrien 464169942Sobrien /* read and parse this file */ 465139368Sobrien for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) { 466139368Sobrien size_t len; 467139368Sobrien len = strlen(line); 468169942Sobrien if (len == 0) /* null line, garbage, etc */ 46968349Sobrien continue; 470169942Sobrien if (line[len - 1] == '\n') { 471169942Sobrien lineno++; 472159764Sobrien line[len - 1] = '\0'; /* delete newline */ 473169942Sobrien } 474169942Sobrien if (line[0] == '\0') /* empty, do not parse */ 475169942Sobrien continue; 476169942Sobrien if (line[0] == '#') /* comment, do not parse */ 477169942Sobrien continue; 478169942Sobrien if (parse(ms, &marray, &marraycount, line, lineno, action) != 0) 479159764Sobrien errs++; 48068349Sobrien } 48168349Sobrien 482133359Sobrien (void)fclose(f); 483159764Sobrien if (errs) 484159764Sobrien goto out; 485159764Sobrien 486159764Sobrien#ifndef NOORDER 487159764Sobrien qsort(marray, marraycount, sizeof(*marray), apprentice_sort); 488159764Sobrien#endif 489159764Sobrien 490169942Sobrien for (i = 0; i < marraycount; i++) 491159764Sobrien mentrycount += marray[i].cont_count; 492159764Sobrien 493159764Sobrien if ((*magicp = malloc(sizeof(**magicp) * mentrycount)) == NULL) { 494169942Sobrien file_oomem(ms, sizeof(**magicp) * mentrycount); 495159764Sobrien errs++; 496159764Sobrien goto out; 497159764Sobrien } 498159764Sobrien 499159764Sobrien mentrycount = 0; 500159764Sobrien for (i = 0; i < marraycount; i++) { 501159764Sobrien (void)memcpy(*magicp + mentrycount, marray[i].mp, 502159764Sobrien marray[i].cont_count * sizeof(**magicp)); 503159764Sobrien mentrycount += marray[i].cont_count; 504159764Sobrien } 505159764Sobrienout: 506159764Sobrien for (i = 0; i < marraycount; i++) 507159764Sobrien free(marray[i].mp); 508159764Sobrien free(marray); 50974784Sobrien if (errs) { 51074784Sobrien *magicp = NULL; 51174784Sobrien *nmagicp = 0; 512159764Sobrien return errs; 513159764Sobrien } else { 514159764Sobrien *nmagicp = mentrycount; 515159764Sobrien return 0; 51674784Sobrien } 517159764Sobrien 51868349Sobrien} 51968349Sobrien 52068349Sobrien/* 52168349Sobrien * extend the sign bit if the comparison is to be signed 52268349Sobrien */ 523169942Sobrienprotected uint64_t 524169942Sobrienfile_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 52568349Sobrien{ 52668349Sobrien if (!(m->flag & UNSIGNED)) 52768349Sobrien switch(m->type) { 52868349Sobrien /* 52968349Sobrien * Do not remove the casts below. They are 53068349Sobrien * vital. When later compared with the data, 53168349Sobrien * the sign extension must have happened. 53268349Sobrien */ 533133359Sobrien case FILE_BYTE: 53468349Sobrien v = (char) v; 53568349Sobrien break; 536133359Sobrien case FILE_SHORT: 537133359Sobrien case FILE_BESHORT: 538133359Sobrien case FILE_LESHORT: 53968349Sobrien v = (short) v; 54068349Sobrien break; 541133359Sobrien case FILE_DATE: 542133359Sobrien case FILE_BEDATE: 543133359Sobrien case FILE_LEDATE: 544159764Sobrien case FILE_MEDATE: 545133359Sobrien case FILE_LDATE: 546133359Sobrien case FILE_BELDATE: 547133359Sobrien case FILE_LELDATE: 548159764Sobrien case FILE_MELDATE: 549133359Sobrien case FILE_LONG: 550133359Sobrien case FILE_BELONG: 551133359Sobrien case FILE_LELONG: 552159764Sobrien case FILE_MELONG: 553103373Sobrien v = (int32_t) v; 55468349Sobrien break; 555169942Sobrien case FILE_QUAD: 556169942Sobrien case FILE_BEQUAD: 557169942Sobrien case FILE_LEQUAD: 558169942Sobrien case FILE_QDATE: 559169942Sobrien case FILE_QLDATE: 560169942Sobrien case FILE_BEQDATE: 561169942Sobrien case FILE_BEQLDATE: 562169942Sobrien case FILE_LEQDATE: 563169942Sobrien case FILE_LEQLDATE: 564169942Sobrien v = (int64_t) v; 565169942Sobrien break; 566133359Sobrien case FILE_STRING: 567133359Sobrien case FILE_PSTRING: 568139368Sobrien case FILE_BESTRING16: 569139368Sobrien case FILE_LESTRING16: 570133359Sobrien case FILE_REGEX: 571159764Sobrien case FILE_SEARCH: 572103373Sobrien break; 57368349Sobrien default: 574133359Sobrien if (ms->flags & MAGIC_CHECK) 575139368Sobrien file_magwarn(ms, "cannot happen: m->type=%d\n", 576133359Sobrien m->type); 577133359Sobrien return ~0U; 57868349Sobrien } 57968349Sobrien return v; 58068349Sobrien} 58168349Sobrien 58268349Sobrien/* 58368349Sobrien * parse one line from magic file, put into magic[index++] if valid 58468349Sobrien */ 585133359Sobrienprivate int 586159764Sobrienparse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 587169942Sobrien const char *line, size_t lineno, int action) 58868349Sobrien{ 589169942Sobrien size_t i; 590159764Sobrien struct magic_entry *me; 59168349Sobrien struct magic *m; 592159764Sobrien const char *l = line; 59384685Sobrien char *t; 594133359Sobrien private const char *fops = FILE_OPS; 595169942Sobrien uint64_t val; 596159825Sobrien uint32_t cont_level; 59768349Sobrien 598159764Sobrien cont_level = 0; 59968349Sobrien 60068349Sobrien while (*l == '>') { 60168349Sobrien ++l; /* step over */ 602159764Sobrien cont_level++; 60368349Sobrien } 60468349Sobrien 605159764Sobrien#define ALLOC_CHUNK (size_t)10 606159764Sobrien#define ALLOC_INCR (size_t)200 607159764Sobrien 608159764Sobrien if (cont_level != 0) { 609159764Sobrien if (*nmentryp == 0) { 610159764Sobrien file_error(ms, 0, "No current entry for continuation"); 611159764Sobrien return -1; 612159764Sobrien } 613159764Sobrien me = &(*mentryp)[*nmentryp - 1]; 614159764Sobrien if (me->cont_count == me->max_count) { 615159764Sobrien struct magic *nm; 616159764Sobrien size_t cnt = me->max_count + ALLOC_CHUNK; 617159764Sobrien if ((nm = realloc(me->mp, sizeof(*nm) * cnt)) == NULL) { 618169942Sobrien file_oomem(ms, sizeof(*nm) * cnt); 619159764Sobrien return -1; 620159764Sobrien } 621159764Sobrien me->mp = m = nm; 622159764Sobrien me->max_count = cnt; 623159764Sobrien } 624159764Sobrien m = &me->mp[me->cont_count++]; 625159764Sobrien memset(m, 0, sizeof(*m)); 626159764Sobrien m->cont_level = cont_level; 627159764Sobrien } else { 628159764Sobrien if (*nmentryp == maxmagic) { 629159764Sobrien struct magic_entry *mp; 630159764Sobrien 631159764Sobrien maxmagic += ALLOC_INCR; 632159764Sobrien if ((mp = realloc(*mentryp, sizeof(*mp) * maxmagic)) == 633159764Sobrien NULL) { 634169942Sobrien file_oomem(ms, sizeof(*mp) * maxmagic); 635159764Sobrien return -1; 636159764Sobrien } 637159764Sobrien (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * 638159764Sobrien ALLOC_INCR); 639159764Sobrien *mentryp = mp; 640159764Sobrien } 641159764Sobrien me = &(*mentryp)[*nmentryp]; 642159764Sobrien if (me->mp == NULL) { 643159764Sobrien if ((m = malloc(sizeof(*m) * ALLOC_CHUNK)) == NULL) { 644169942Sobrien file_oomem(ms, sizeof(*m) * ALLOC_CHUNK); 645159764Sobrien return -1; 646159764Sobrien } 647159764Sobrien me->mp = m; 648159764Sobrien me->max_count = ALLOC_CHUNK; 649159764Sobrien } else 650159764Sobrien m = me->mp; 651159764Sobrien memset(m, 0, sizeof(*m)); 652159764Sobrien m->cont_level = 0; 653159764Sobrien me->cont_count = 1; 654159764Sobrien } 655169942Sobrien m->lineno = lineno; 656159764Sobrien 657159764Sobrien if (m->cont_level != 0 && *l == '&') { 658159764Sobrien ++l; /* step over */ 659159764Sobrien m->flag |= OFFADD; 660159764Sobrien } 66168349Sobrien if (m->cont_level != 0 && *l == '(') { 66268349Sobrien ++l; /* step over */ 66368349Sobrien m->flag |= INDIR; 664159764Sobrien if (m->flag & OFFADD) 665159764Sobrien m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 66668349Sobrien } 66768349Sobrien if (m->cont_level != 0 && *l == '&') { 66868349Sobrien ++l; /* step over */ 66980588Sobrien m->flag |= OFFADD; 67068349Sobrien } 67168349Sobrien 67268349Sobrien /* get offset, then skip over it */ 673133359Sobrien m->offset = (uint32_t)strtoul(l, &t, 0); 67468349Sobrien if (l == t) 675133359Sobrien if (ms->flags & MAGIC_CHECK) 676139368Sobrien file_magwarn(ms, "offset `%s' invalid", l); 67768349Sobrien l = t; 67868349Sobrien 67968349Sobrien if (m->flag & INDIR) { 680133359Sobrien m->in_type = FILE_LONG; 68174784Sobrien m->in_offset = 0; 68268349Sobrien /* 68368349Sobrien * read [.lbs][+-]nnnnn) 68468349Sobrien */ 68568349Sobrien if (*l == '.') { 68668349Sobrien l++; 68768349Sobrien switch (*l) { 68868349Sobrien case 'l': 689133359Sobrien m->in_type = FILE_LELONG; 69068349Sobrien break; 69168349Sobrien case 'L': 692133359Sobrien m->in_type = FILE_BELONG; 69368349Sobrien break; 694159764Sobrien case 'm': 695159764Sobrien m->in_type = FILE_MELONG; 696159764Sobrien break; 69768349Sobrien case 'h': 69868349Sobrien case 's': 699133359Sobrien m->in_type = FILE_LESHORT; 70068349Sobrien break; 70168349Sobrien case 'H': 70268349Sobrien case 'S': 703133359Sobrien m->in_type = FILE_BESHORT; 70468349Sobrien break; 70568349Sobrien case 'c': 70668349Sobrien case 'b': 70768349Sobrien case 'C': 70868349Sobrien case 'B': 709133359Sobrien m->in_type = FILE_BYTE; 71068349Sobrien break; 71168349Sobrien default: 712133359Sobrien if (ms->flags & MAGIC_CHECK) 713139368Sobrien file_magwarn(ms, 714139368Sobrien "indirect offset type `%c' invalid", 715133359Sobrien *l); 71668349Sobrien break; 71768349Sobrien } 71868349Sobrien l++; 71968349Sobrien } 72080588Sobrien if (*l == '~') { 721159764Sobrien m->in_op |= FILE_OPINVERSE; 72280588Sobrien l++; 72380588Sobrien } 72480588Sobrien switch (*l) { 72580588Sobrien case '&': 726133359Sobrien m->in_op |= FILE_OPAND; 72780588Sobrien l++; 72880588Sobrien break; 72980588Sobrien case '|': 730133359Sobrien m->in_op |= FILE_OPOR; 73180588Sobrien l++; 73280588Sobrien break; 73380588Sobrien case '^': 734133359Sobrien m->in_op |= FILE_OPXOR; 73580588Sobrien l++; 73680588Sobrien break; 73780588Sobrien case '+': 738133359Sobrien m->in_op |= FILE_OPADD; 73980588Sobrien l++; 74080588Sobrien break; 74180588Sobrien case '-': 742133359Sobrien m->in_op |= FILE_OPMINUS; 74380588Sobrien l++; 74480588Sobrien break; 74580588Sobrien case '*': 746133359Sobrien m->in_op |= FILE_OPMULTIPLY; 74780588Sobrien l++; 74880588Sobrien break; 74980588Sobrien case '/': 750133359Sobrien m->in_op |= FILE_OPDIVIDE; 75180588Sobrien l++; 75280588Sobrien break; 75380588Sobrien case '%': 754133359Sobrien m->in_op |= FILE_OPMODULO; 75580588Sobrien l++; 75680588Sobrien break; 75780588Sobrien } 758159764Sobrien if (*l == '(') { 759159764Sobrien m->in_op |= FILE_OPINDIRECT; 760159764Sobrien l++; 761159764Sobrien } 762159764Sobrien if (isdigit((unsigned char)*l) || *l == '-') { 763159764Sobrien m->in_offset = (int32_t)strtol(l, &t, 0); 764159764Sobrien l = t; 765159764Sobrien } 766159764Sobrien if (*l++ != ')' || 767159764Sobrien ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 768133359Sobrien if (ms->flags & MAGIC_CHECK) 769139368Sobrien file_magwarn(ms, 770139368Sobrien "missing ')' in indirect offset"); 77168349Sobrien } 77268349Sobrien 77368349Sobrien 77468349Sobrien while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) 77568349Sobrien ++l; 77668349Sobrien EATAB; 77768349Sobrien 77868349Sobrien if (*l == 'u') { 77968349Sobrien ++l; 78068349Sobrien m->flag |= UNSIGNED; 78168349Sobrien } 78268349Sobrien 78368349Sobrien /* get type, skip it */ 784169942Sobrien for (i = 0; i < file_nnames; i++) { 785169942Sobrien size_t len = strlen(file_names[i]); 786169942Sobrien if (strncmp(l, file_names[i], len) == 0) { 787169942Sobrien m->type = i; 788169942Sobrien l+= len; 789169942Sobrien break; 790169942Sobrien } 791169942Sobrien } 792169942Sobrien if (i == file_nnames) { 793133359Sobrien if (ms->flags & MAGIC_CHECK) 794139368Sobrien file_magwarn(ms, "type `%s' invalid", l); 79568349Sobrien return -1; 79668349Sobrien } 79768349Sobrien /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 79880588Sobrien /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 79980588Sobrien if (*l == '~') { 800139368Sobrien if (!IS_STRING(m->type)) 801159764Sobrien m->mask_op |= FILE_OPINVERSE; 80268349Sobrien ++l; 80380588Sobrien } 804133359Sobrien if ((t = strchr(fops, *l)) != NULL) { 805133359Sobrien uint32_t op = (uint32_t)(t - fops); 806159764Sobrien if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) { 80780588Sobrien ++l; 808133359Sobrien m->mask_op |= op; 809169942Sobrien val = (uint64_t)strtoull(l, &t, 0); 810159764Sobrien l = t; 811133359Sobrien m->mask = file_signextend(ms, m, val); 81280588Sobrien eatsize(&l); 81380588Sobrien } else { 81480588Sobrien m->mask = 0L; 815133359Sobrien while (!isspace((unsigned char)*++l)) { 81668349Sobrien switch (*l) { 81768349Sobrien case CHAR_IGNORE_LOWERCASE: 81868349Sobrien m->mask |= STRING_IGNORE_LOWERCASE; 81968349Sobrien break; 82068349Sobrien case CHAR_COMPACT_BLANK: 82168349Sobrien m->mask |= STRING_COMPACT_BLANK; 82268349Sobrien break; 82368349Sobrien case CHAR_COMPACT_OPTIONAL_BLANK: 82468349Sobrien m->mask |= 82568349Sobrien STRING_COMPACT_OPTIONAL_BLANK; 82668349Sobrien break; 82768349Sobrien default: 828133359Sobrien if (ms->flags & MAGIC_CHECK) 829139368Sobrien file_magwarn(ms, 830139368Sobrien "string extension `%c' invalid", 831133359Sobrien *l); 83268349Sobrien return -1; 83368349Sobrien } 83468349Sobrien } 835159764Sobrien ++l; 83668349Sobrien } 83780588Sobrien } 838133359Sobrien /* 839133359Sobrien * We used to set mask to all 1's here, instead let's just not do 840133359Sobrien * anything if mask = 0 (unless you have a better idea) 841133359Sobrien */ 84268349Sobrien EATAB; 84368349Sobrien 84468349Sobrien switch (*l) { 84568349Sobrien case '>': 84668349Sobrien case '<': 84768349Sobrien /* Old-style anding: "0 byte &0x80 dynamically linked" */ 84868349Sobrien case '&': 84968349Sobrien case '^': 85068349Sobrien case '=': 85168349Sobrien m->reln = *l; 85268349Sobrien ++l; 85368349Sobrien if (*l == '=') { 85468349Sobrien /* HP compat: ignore &= etc. */ 85568349Sobrien ++l; 85668349Sobrien } 85768349Sobrien break; 85868349Sobrien case '!': 859159764Sobrien m->reln = *l; 860159764Sobrien ++l; 861159764Sobrien break; 86268349Sobrien default: 863159764Sobrien if (*l == 'x' && ((isascii((unsigned char)l[1]) && 864159764Sobrien isspace((unsigned char)l[1])) || !l[1])) { 86568349Sobrien m->reln = *l; 86668349Sobrien ++l; 86768349Sobrien goto GetDesc; /* Bill The Cat */ 86868349Sobrien } 86968349Sobrien m->reln = '='; 87068349Sobrien break; 87168349Sobrien } 87268349Sobrien EATAB; 87368349Sobrien 874133359Sobrien if (getvalue(ms, m, &l)) 87568349Sobrien return -1; 87668349Sobrien /* 87768349Sobrien * TODO finish this macro and start using it! 87868349Sobrien * #define offsetcheck {if (offset > HOWMANY-1) 87968349Sobrien * magwarn("offset too big"); } 88068349Sobrien */ 88168349Sobrien 88268349Sobrien /* 88368349Sobrien * now get last part - the description 88468349Sobrien */ 88568349SobrienGetDesc: 88668349Sobrien EATAB; 88768349Sobrien if (l[0] == '\b') { 88868349Sobrien ++l; 88968349Sobrien m->nospflag = 1; 89068349Sobrien } else if ((l[0] == '\\') && (l[1] == 'b')) { 89168349Sobrien ++l; 89268349Sobrien ++l; 89368349Sobrien m->nospflag = 1; 89468349Sobrien } else 89568349Sobrien m->nospflag = 0; 896169942Sobrien for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 897169942Sobrien continue; 898169942Sobrien if (i == sizeof(m->desc)) { 899169942Sobrien m->desc[sizeof(m->desc) - 1] = '\0'; 900169942Sobrien if (ms->flags & MAGIC_CHECK) 901169942Sobrien file_magwarn(ms, "description `%s' truncated", m->desc); 902169942Sobrien } 90368349Sobrien 904169942Sobrien /* 905169942Sobrien * We only do this check while compiling, or if any of the magic 906169942Sobrien * files were not compiled. 907169942Sobrien */ 908169942Sobrien if (ms->flags & MAGIC_CHECK) { 909169942Sobrien if (check_format(ms, m) == -1) 910133359Sobrien return -1; 911133359Sobrien } 912103373Sobrien#ifndef COMPILE_ONLY 913133359Sobrien if (action == FILE_CHECK) { 914133359Sobrien file_mdump(m); 91568349Sobrien } 916103373Sobrien#endif 917159764Sobrien if (m->cont_level == 0) 918159764Sobrien ++(*nmentryp); /* make room for next */ 91968349Sobrien return 0; 92068349Sobrien} 92168349Sobrien 922169942Sobrienprivate int 923169942Sobriencheck_format_type(const char *ptr, int type) 924169942Sobrien{ 925169942Sobrien int quad = 0; 926169942Sobrien if (*ptr == '\0') { 927169942Sobrien /* Missing format string; bad */ 928169942Sobrien return -1; 929169942Sobrien } 930169942Sobrien 931169942Sobrien switch (type) { 932169942Sobrien case FILE_FMT_QUAD: 933169942Sobrien quad = 1; 934169942Sobrien /*FALLTHROUGH*/ 935169942Sobrien case FILE_FMT_NUM: 936169942Sobrien if (*ptr == '-') 937169942Sobrien ptr++; 938169942Sobrien if (*ptr == '.') 939169942Sobrien ptr++; 940169942Sobrien while (isdigit((unsigned char)*ptr)) ptr++; 941169942Sobrien if (*ptr == '.') 942169942Sobrien ptr++; 943169942Sobrien while (isdigit((unsigned char)*ptr)) ptr++; 944169942Sobrien if (quad) { 945169942Sobrien if (*ptr++ != 'l') 946169942Sobrien return -1; 947169942Sobrien if (*ptr++ != 'l') 948169942Sobrien return -1; 949169942Sobrien } 950169942Sobrien 951169942Sobrien switch (*ptr++) { 952169942Sobrien case 'l': 953169942Sobrien switch (*ptr++) { 954169942Sobrien case 'i': 955169942Sobrien case 'd': 956169942Sobrien case 'u': 957169942Sobrien case 'x': 958169942Sobrien case 'X': 959169942Sobrien return 0; 960169942Sobrien default: 961169942Sobrien return -1; 962169942Sobrien } 963169942Sobrien 964169942Sobrien case 'h': 965169942Sobrien switch (*ptr++) { 966169942Sobrien case 'h': 967169942Sobrien switch (*ptr++) { 968169942Sobrien case 'i': 969169942Sobrien case 'd': 970169942Sobrien case 'u': 971169942Sobrien case 'x': 972169942Sobrien case 'X': 973169942Sobrien return 0; 974169942Sobrien default: 975169942Sobrien return -1; 976169942Sobrien } 977169942Sobrien case 'd': 978169942Sobrien return 0; 979169942Sobrien default: 980169942Sobrien return -1; 981169942Sobrien } 982169942Sobrien 983169942Sobrien case 'i': 984169942Sobrien case 'c': 985169942Sobrien case 'd': 986169942Sobrien case 'u': 987169942Sobrien case 'x': 988169942Sobrien case 'X': 989169942Sobrien return 0; 990169942Sobrien 991169942Sobrien default: 992169942Sobrien return -1; 993169942Sobrien } 994169942Sobrien 995169942Sobrien case FILE_FMT_STR: 996169942Sobrien if (*ptr == '-') 997169942Sobrien ptr++; 998169942Sobrien while (isdigit((unsigned char )*ptr)) 999169942Sobrien ptr++; 1000169942Sobrien if (*ptr == '.') { 1001169942Sobrien ptr++; 1002169942Sobrien while (isdigit((unsigned char )*ptr)) 1003169942Sobrien ptr++; 1004169942Sobrien } 1005169942Sobrien 1006169942Sobrien switch (*ptr++) { 1007169942Sobrien case 's': 1008169942Sobrien return 0; 1009169942Sobrien default: 1010169942Sobrien return -1; 1011169942Sobrien } 1012169942Sobrien 1013169942Sobrien default: 1014169942Sobrien /* internal error */ 1015169942Sobrien abort(); 1016169942Sobrien } 1017169942Sobrien /*NOTREACHED*/ 1018169942Sobrien return -1; 1019169942Sobrien} 1020169942Sobrien 1021133359Sobrien/* 1022133359Sobrien * Check that the optional printf format in description matches 1023133359Sobrien * the type of the magic. 1024133359Sobrien */ 1025133359Sobrienprivate int 1026139368Sobriencheck_format(struct magic_set *ms, struct magic *m) 1027133359Sobrien{ 1028133359Sobrien char *ptr; 1029133359Sobrien 1030133359Sobrien for (ptr = m->desc; *ptr; ptr++) 1031133359Sobrien if (*ptr == '%') 1032133359Sobrien break; 1033133359Sobrien if (*ptr == '\0') { 1034133359Sobrien /* No format string; ok */ 1035133359Sobrien return 1; 1036133359Sobrien } 1037169942Sobrien 1038169942Sobrien assert(file_nformats == file_nnames); 1039169942Sobrien 1040169942Sobrien if (m->type >= file_nformats) { 1041169942Sobrien file_error(ms, 0, "Internal error inconsistency between " 1042169942Sobrien "m->type and format strings"); 1043169942Sobrien return -1; 1044133359Sobrien } 1045169942Sobrien if (file_formats[m->type] == FILE_FMT_NONE) { 1046169942Sobrien file_error(ms, 0, "No format string for `%s' with description " 1047169942Sobrien "`%s'", m->desc, file_names[m->type]); 1048169942Sobrien return -1; 1049133359Sobrien } 1050169942Sobrien 1051169942Sobrien ptr++; 1052169942Sobrien if (check_format_type(ptr, file_formats[m->type]) == -1) { 1053169942Sobrien /* 1054169942Sobrien * TODO: this error message is unhelpful if the format 1055169942Sobrien * string is not one character long 1056169942Sobrien */ 1057169942Sobrien file_error(ms, 0, "Printf format `%c' is not valid for type " 1058169942Sobrien " `%s' in description `%s'", *ptr, 1059169942Sobrien file_names[m->type], m->desc); 1060169942Sobrien return -1; 1061169942Sobrien } 1062169942Sobrien 1063133359Sobrien for (; *ptr; ptr++) { 1064169942Sobrien if (*ptr == '%') { 1065169942Sobrien file_error(ms, 0, 1066169942Sobrien "Too many format strings (should have at most one) " 1067169942Sobrien "for `%s' with description `%s'", 1068169942Sobrien file_names[m->type], m->desc); 1069169942Sobrien return -1; 1070133359Sobrien } 1071133359Sobrien } 1072169942Sobrien return 0; 1073133359Sobrien} 1074133359Sobrien 107568349Sobrien/* 107668349Sobrien * Read a numeric value from a pointer, into the value union of a magic 107768349Sobrien * pointer, according to the magic type. Update the string pointer to point 107868349Sobrien * just after the number read. Return 0 for success, non-zero for failure. 107968349Sobrien */ 1080133359Sobrienprivate int 1081159764Sobriengetvalue(struct magic_set *ms, struct magic *m, const char **p) 108268349Sobrien{ 108368349Sobrien int slen; 108468349Sobrien 1085133359Sobrien switch (m->type) { 1086139368Sobrien case FILE_BESTRING16: 1087139368Sobrien case FILE_LESTRING16: 1088133359Sobrien case FILE_STRING: 1089133359Sobrien case FILE_PSTRING: 1090133359Sobrien case FILE_REGEX: 1091159764Sobrien case FILE_SEARCH: 1092133359Sobrien *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen); 1093133359Sobrien if (*p == NULL) { 1094133359Sobrien if (ms->flags & MAGIC_CHECK) 1095139368Sobrien file_magwarn(ms, "cannot get string from `%s'", 1096133359Sobrien m->value.s); 1097133359Sobrien return -1; 1098133359Sobrien } 109968349Sobrien m->vallen = slen; 1100133359Sobrien return 0; 1101133359Sobrien default: 110268349Sobrien if (m->reln != 'x') { 1103159764Sobrien char *ep; 1104169942Sobrien m->value.q = file_signextend(ms, m, 1105169942Sobrien (uint64_t)strtoull(*p, &ep, 0)); 1106159764Sobrien *p = ep; 110768349Sobrien eatsize(p); 110868349Sobrien } 1109133359Sobrien return 0; 1110133359Sobrien } 111168349Sobrien} 111268349Sobrien 111368349Sobrien/* 111468349Sobrien * Convert a string containing C character escapes. Stop at an unescaped 111568349Sobrien * space or tab. 111668349Sobrien * Copy the converted version to "p", returning its length in *slen. 111768349Sobrien * Return updated scan pointer as function result. 111868349Sobrien */ 1119159764Sobrienprivate const char * 1120159764Sobriengetstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen) 112168349Sobrien{ 1122159764Sobrien const char *origs = s; 1123159764Sobrien char *origp = p; 112468349Sobrien char *pmax = p + plen - 1; 112568349Sobrien int c; 112668349Sobrien int val; 112768349Sobrien 112868349Sobrien while ((c = *s++) != '\0') { 112968349Sobrien if (isspace((unsigned char) c)) 113068349Sobrien break; 113168349Sobrien if (p >= pmax) { 1132133359Sobrien file_error(ms, 0, "string too long: `%s'", origs); 1133133359Sobrien return NULL; 113468349Sobrien } 113568349Sobrien if(c == '\\') { 113668349Sobrien switch(c = *s++) { 113768349Sobrien 113868349Sobrien case '\0': 113968349Sobrien goto out; 114068349Sobrien 114168349Sobrien default: 114268349Sobrien *p++ = (char) c; 114368349Sobrien break; 114468349Sobrien 114568349Sobrien case 'n': 114668349Sobrien *p++ = '\n'; 114768349Sobrien break; 114868349Sobrien 114968349Sobrien case 'r': 115068349Sobrien *p++ = '\r'; 115168349Sobrien break; 115268349Sobrien 115368349Sobrien case 'b': 115468349Sobrien *p++ = '\b'; 115568349Sobrien break; 115668349Sobrien 115768349Sobrien case 't': 115868349Sobrien *p++ = '\t'; 115968349Sobrien break; 116068349Sobrien 116168349Sobrien case 'f': 116268349Sobrien *p++ = '\f'; 116368349Sobrien break; 116468349Sobrien 116568349Sobrien case 'v': 116668349Sobrien *p++ = '\v'; 116768349Sobrien break; 116868349Sobrien 116968349Sobrien /* \ and up to 3 octal digits */ 117068349Sobrien case '0': 117168349Sobrien case '1': 117268349Sobrien case '2': 117368349Sobrien case '3': 117468349Sobrien case '4': 117568349Sobrien case '5': 117668349Sobrien case '6': 117768349Sobrien case '7': 117868349Sobrien val = c - '0'; 117968349Sobrien c = *s++; /* try for 2 */ 118068349Sobrien if(c >= '0' && c <= '7') { 118168349Sobrien val = (val<<3) | (c - '0'); 118268349Sobrien c = *s++; /* try for 3 */ 118368349Sobrien if(c >= '0' && c <= '7') 118468349Sobrien val = (val<<3) | (c-'0'); 118568349Sobrien else 118668349Sobrien --s; 118768349Sobrien } 118868349Sobrien else 118968349Sobrien --s; 119068349Sobrien *p++ = (char)val; 119168349Sobrien break; 119268349Sobrien 119368349Sobrien /* \x and up to 2 hex digits */ 119468349Sobrien case 'x': 119568349Sobrien val = 'x'; /* Default if no digits */ 119668349Sobrien c = hextoint(*s++); /* Get next char */ 119768349Sobrien if (c >= 0) { 119868349Sobrien val = c; 119968349Sobrien c = hextoint(*s++); 120068349Sobrien if (c >= 0) 120168349Sobrien val = (val << 4) + c; 120268349Sobrien else 120368349Sobrien --s; 120468349Sobrien } else 120568349Sobrien --s; 120668349Sobrien *p++ = (char)val; 120768349Sobrien break; 120868349Sobrien } 120968349Sobrien } else 121068349Sobrien *p++ = (char)c; 121168349Sobrien } 121268349Sobrienout: 121368349Sobrien *p = '\0'; 121468349Sobrien *slen = p - origp; 121568349Sobrien return s; 121668349Sobrien} 121768349Sobrien 121868349Sobrien 121968349Sobrien/* Single hex char to int; -1 if not a hex char. */ 1220133359Sobrienprivate int 1221103373Sobrienhextoint(int c) 122268349Sobrien{ 122368349Sobrien if (!isascii((unsigned char) c)) 122468349Sobrien return -1; 122568349Sobrien if (isdigit((unsigned char) c)) 122668349Sobrien return c - '0'; 122768349Sobrien if ((c >= 'a')&&(c <= 'f')) 122868349Sobrien return c + 10 - 'a'; 122968349Sobrien if (( c>= 'A')&&(c <= 'F')) 123068349Sobrien return c + 10 - 'A'; 123168349Sobrien return -1; 123268349Sobrien} 123368349Sobrien 123468349Sobrien 123568349Sobrien/* 123668349Sobrien * Print a string containing C character escapes. 123768349Sobrien */ 1238133359Sobrienprotected void 1239133359Sobrienfile_showstr(FILE *fp, const char *s, size_t len) 124068349Sobrien{ 124168349Sobrien char c; 124268349Sobrien 124368349Sobrien for (;;) { 124468349Sobrien c = *s++; 1245133359Sobrien if (len == ~0U) { 124668349Sobrien if (c == '\0') 124768349Sobrien break; 124868349Sobrien } 124968349Sobrien else { 125068349Sobrien if (len-- == 0) 125168349Sobrien break; 125268349Sobrien } 125368349Sobrien if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 125468349Sobrien (void) fputc(c, fp); 125568349Sobrien else { 125668349Sobrien (void) fputc('\\', fp); 125768349Sobrien switch (c) { 125868349Sobrien 125968349Sobrien case '\n': 126068349Sobrien (void) fputc('n', fp); 126168349Sobrien break; 126268349Sobrien 126368349Sobrien case '\r': 126468349Sobrien (void) fputc('r', fp); 126568349Sobrien break; 126668349Sobrien 126768349Sobrien case '\b': 126868349Sobrien (void) fputc('b', fp); 126968349Sobrien break; 127068349Sobrien 127168349Sobrien case '\t': 127268349Sobrien (void) fputc('t', fp); 127368349Sobrien break; 127468349Sobrien 127568349Sobrien case '\f': 127668349Sobrien (void) fputc('f', fp); 127768349Sobrien break; 127868349Sobrien 127968349Sobrien case '\v': 128068349Sobrien (void) fputc('v', fp); 128168349Sobrien break; 128268349Sobrien 128368349Sobrien default: 128468349Sobrien (void) fprintf(fp, "%.3o", c & 0377); 128568349Sobrien break; 128668349Sobrien } 128768349Sobrien } 128868349Sobrien } 128968349Sobrien} 129068349Sobrien 129168349Sobrien/* 129268349Sobrien * eatsize(): Eat the size spec from a number [eg. 10UL] 129368349Sobrien */ 1294133359Sobrienprivate void 1295159764Sobrieneatsize(const char **p) 129668349Sobrien{ 1297159764Sobrien const char *l = *p; 129868349Sobrien 129968349Sobrien if (LOWCASE(*l) == 'u') 130068349Sobrien l++; 130168349Sobrien 130268349Sobrien switch (LOWCASE(*l)) { 130368349Sobrien case 'l': /* long */ 130468349Sobrien case 's': /* short */ 130568349Sobrien case 'h': /* short */ 130668349Sobrien case 'b': /* char/byte */ 130768349Sobrien case 'c': /* char/byte */ 130868349Sobrien l++; 130968349Sobrien /*FALLTHROUGH*/ 131068349Sobrien default: 131168349Sobrien break; 131268349Sobrien } 131368349Sobrien 131468349Sobrien *p = l; 131568349Sobrien} 131674784Sobrien 131774784Sobrien/* 1318103373Sobrien * handle a compiled file. 131974784Sobrien */ 1320133359Sobrienprivate int 1321133359Sobrienapprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 1322133359Sobrien const char *fn) 132374784Sobrien{ 132474784Sobrien int fd; 132574784Sobrien struct stat st; 1326103373Sobrien uint32_t *ptr; 1327103373Sobrien uint32_t version; 132874784Sobrien int needsbyteswap; 1329133359Sobrien char buf[MAXPATHLEN]; 1330139368Sobrien char *dbname = mkdbname(fn, buf, sizeof(buf), 0); 1331133359Sobrien void *mm = NULL; 133274784Sobrien 133380588Sobrien if (dbname == NULL) 133480588Sobrien return -1; 133580588Sobrien 1336159764Sobrien if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 133774784Sobrien return -1; 133874784Sobrien 133974784Sobrien if (fstat(fd, &st) == -1) { 1340133359Sobrien file_error(ms, errno, "cannot stat `%s'", dbname); 134174784Sobrien goto error; 134274784Sobrien } 1343133359Sobrien if (st.st_size < 16) { 1344133359Sobrien file_error(ms, 0, "file `%s' is too small", dbname); 1345133359Sobrien goto error; 1346133359Sobrien } 134774784Sobrien 134880588Sobrien#ifdef QUICK 1349103373Sobrien if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 135074784Sobrien MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 1351133359Sobrien file_error(ms, errno, "cannot map `%s'", dbname); 135274784Sobrien goto error; 135374784Sobrien } 1354133359Sobrien#define RET 2 135580588Sobrien#else 1356103373Sobrien if ((mm = malloc((size_t)st.st_size)) == NULL) { 1357169942Sobrien file_oomem(ms, (size_t)st.st_size); 135880588Sobrien goto error; 135980588Sobrien } 1360103373Sobrien if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) { 1361133359Sobrien file_badread(ms); 136280588Sobrien goto error; 136380588Sobrien } 1364133359Sobrien#define RET 1 136580588Sobrien#endif 1366103373Sobrien *magicp = mm; 136774784Sobrien (void)close(fd); 136875937Sobrien fd = -1; 1369133359Sobrien ptr = (uint32_t *)(void *)*magicp; 137074784Sobrien if (*ptr != MAGICNO) { 137174784Sobrien if (swap4(*ptr) != MAGICNO) { 1372133359Sobrien file_error(ms, 0, "bad magic in `%s'"); 137374784Sobrien goto error; 137474784Sobrien } 137574784Sobrien needsbyteswap = 1; 137674784Sobrien } else 137774784Sobrien needsbyteswap = 0; 137874784Sobrien if (needsbyteswap) 137974784Sobrien version = swap4(ptr[1]); 138074784Sobrien else 138174784Sobrien version = ptr[1]; 138274784Sobrien if (version != VERSIONNO) { 1383133359Sobrien file_error(ms, 0, "version mismatch (%d != %d) in `%s'", 1384133359Sobrien version, VERSIONNO, dbname); 138574784Sobrien goto error; 138674784Sobrien } 1387133359Sobrien *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)) - 1; 138874784Sobrien (*magicp)++; 138974784Sobrien if (needsbyteswap) 139074784Sobrien byteswap(*magicp, *nmagicp); 1391133359Sobrien return RET; 139274784Sobrien 139374784Sobrienerror: 139474784Sobrien if (fd != -1) 139574784Sobrien (void)close(fd); 1396103373Sobrien if (mm) { 139780588Sobrien#ifdef QUICK 1398133359Sobrien (void)munmap((void *)mm, (size_t)st.st_size); 139980588Sobrien#else 1400103373Sobrien free(mm); 140180588Sobrien#endif 140280588Sobrien } else { 140374784Sobrien *magicp = NULL; 140474784Sobrien *nmagicp = 0; 140574784Sobrien } 140674784Sobrien return -1; 140774784Sobrien} 140874784Sobrien 1409133359Sobrienprivate const uint32_t ar[] = { 1410133359Sobrien MAGICNO, VERSIONNO 1411133359Sobrien}; 141274784Sobrien/* 141374784Sobrien * handle an mmaped file. 141474784Sobrien */ 1415133359Sobrienprivate int 1416133359Sobrienapprentice_compile(struct magic_set *ms, struct magic **magicp, 1417133359Sobrien uint32_t *nmagicp, const char *fn) 141874784Sobrien{ 141974784Sobrien int fd; 1420133359Sobrien char buf[MAXPATHLEN]; 1421139368Sobrien char *dbname = mkdbname(fn, buf, sizeof(buf), 1); 142274784Sobrien 142380588Sobrien if (dbname == NULL) 142480588Sobrien return -1; 142580588Sobrien 1426159764Sobrien if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { 1427133359Sobrien file_error(ms, errno, "cannot open `%s'", dbname); 142874784Sobrien return -1; 142974784Sobrien } 143074784Sobrien 1431133359Sobrien if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 1432133359Sobrien file_error(ms, errno, "error writing `%s'", dbname); 143374784Sobrien return -1; 143474784Sobrien } 143574784Sobrien 1436133359Sobrien if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) 1437133359Sobrien != sizeof(struct magic)) { 1438133359Sobrien file_error(ms, errno, "error seeking `%s'", dbname); 143974784Sobrien return -1; 144074784Sobrien } 144174784Sobrien 1442133359Sobrien if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 1443133359Sobrien != (ssize_t)(sizeof(struct magic) * *nmagicp)) { 1444133359Sobrien file_error(ms, errno, "error writing `%s'", dbname); 144574784Sobrien return -1; 144674784Sobrien } 144774784Sobrien 144874784Sobrien (void)close(fd); 144974784Sobrien return 0; 145074784Sobrien} 145174784Sobrien 1452133359Sobrienprivate const char ext[] = ".mgc"; 145374784Sobrien/* 145474784Sobrien * make a dbname 145574784Sobrien */ 1456133359Sobrienprivate char * 1457139368Sobrienmkdbname(const char *fn, char *buf, size_t bufsiz, int strip) 145874784Sobrien{ 1459139368Sobrien if (strip) { 1460139368Sobrien const char *p; 1461139368Sobrien if ((p = strrchr(fn, '/')) != NULL) 1462139368Sobrien fn = ++p; 1463139368Sobrien } 1464139368Sobrien 1465133359Sobrien (void)snprintf(buf, bufsiz, "%s%s", fn, ext); 146674784Sobrien return buf; 146774784Sobrien} 146874784Sobrien 146974784Sobrien/* 147074784Sobrien * Byteswap an mmap'ed file if needed 147174784Sobrien */ 1472133359Sobrienprivate void 1473103373Sobrienbyteswap(struct magic *magic, uint32_t nmagic) 147474784Sobrien{ 1475103373Sobrien uint32_t i; 147674784Sobrien for (i = 0; i < nmagic; i++) 147774784Sobrien bs1(&magic[i]); 147874784Sobrien} 147974784Sobrien 148074784Sobrien/* 148174784Sobrien * swap a short 148274784Sobrien */ 1483133359Sobrienprivate uint16_t 1484103373Sobrienswap2(uint16_t sv) 148574784Sobrien{ 1486103373Sobrien uint16_t rv; 1487133359Sobrien uint8_t *s = (uint8_t *)(void *)&sv; 1488133359Sobrien uint8_t *d = (uint8_t *)(void *)&rv; 148974784Sobrien d[0] = s[1]; 149074784Sobrien d[1] = s[0]; 149174784Sobrien return rv; 149274784Sobrien} 149374784Sobrien 149474784Sobrien/* 149574784Sobrien * swap an int 149674784Sobrien */ 1497133359Sobrienprivate uint32_t 1498103373Sobrienswap4(uint32_t sv) 149974784Sobrien{ 1500103373Sobrien uint32_t rv; 1501133359Sobrien uint8_t *s = (uint8_t *)(void *)&sv; 1502133359Sobrien uint8_t *d = (uint8_t *)(void *)&rv; 150374784Sobrien d[0] = s[3]; 150474784Sobrien d[1] = s[2]; 150574784Sobrien d[2] = s[1]; 150674784Sobrien d[3] = s[0]; 150774784Sobrien return rv; 150874784Sobrien} 150974784Sobrien 151074784Sobrien/* 1511169942Sobrien * swap a quad 1512169942Sobrien */ 1513169942Sobrienprivate uint64_t 1514169942Sobrienswap8(uint64_t sv) 1515169942Sobrien{ 1516169942Sobrien uint32_t rv; 1517169942Sobrien uint8_t *s = (uint8_t *)(void *)&sv; 1518169942Sobrien uint8_t *d = (uint8_t *)(void *)&rv; 1519169942Sobrien d[0] = s[3]; 1520169942Sobrien d[1] = s[2]; 1521169942Sobrien d[2] = s[1]; 1522169942Sobrien d[3] = s[0]; 1523169942Sobrien d[4] = s[7]; 1524169942Sobrien d[5] = s[6]; 1525169942Sobrien d[6] = s[5]; 1526169942Sobrien d[7] = s[4]; 1527169942Sobrien return rv; 1528169942Sobrien} 1529169942Sobrien 1530169942Sobrien/* 153174784Sobrien * byteswap a single magic entry 153274784Sobrien */ 1533133359Sobrienprivate void 1534133359Sobrienbs1(struct magic *m) 153574784Sobrien{ 153674784Sobrien m->cont_level = swap2(m->cont_level); 1537133359Sobrien m->offset = swap4((uint32_t)m->offset); 1538133359Sobrien m->in_offset = swap4((uint32_t)m->in_offset); 1539159764Sobrien if (!IS_STRING(m->type)) 1540169942Sobrien m->value.q = swap8(m->value.q); 1541169942Sobrien m->mask = swap8(m->mask); 154274784Sobrien} 1543