1/* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32#include "file.h" 33 34#ifndef lint 35FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $") 36#endif /* lint */ 37 38#include "magic.h" 39#include <stdlib.h> 40#ifdef HAVE_UNISTD_H 41#include <unistd.h> 42#endif 43#include <string.h> 44#include <assert.h> 45#include <ctype.h> 46#include <fcntl.h> 47#ifdef QUICK 48#include <sys/mman.h> 49#endif 50#include <dirent.h> 51 52#define EATAB {while (isascii((unsigned char) *l) && \ 53 isspace((unsigned char) *l)) ++l;} 54#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 55 tolower((unsigned char) (l)) : (l)) 56/* 57 * Work around a bug in headers on Digital Unix. 58 * At least confirmed for: OSF1 V4.0 878 59 */ 60#if defined(__osf__) && defined(__DECC) 61#ifdef MAP_FAILED 62#undef MAP_FAILED 63#endif 64#endif 65 66#ifndef MAP_FAILED 67#define MAP_FAILED (void *) -1 68#endif 69 70#ifndef MAP_FILE 71#define MAP_FILE 0 72#endif 73 74struct magic_entry { 75 struct magic *mp; 76 uint32_t cont_count; 77 uint32_t max_count; 78}; 79 80int file_formats[FILE_NAMES_SIZE]; 81const size_t file_nformats = FILE_NAMES_SIZE; 82const char *file_names[FILE_NAMES_SIZE]; 83const size_t file_nnames = FILE_NAMES_SIZE; 84 85private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 86private int hextoint(int); 87private const char *getstr(struct magic_set *, struct magic *, const char *, 88 int); 89private int parse(struct magic_set *, struct magic_entry **, uint32_t *, 90 const char *, size_t, int); 91private void eatsize(const char **); 92private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); 93private size_t apprentice_magic_strength(const struct magic *); 94private int apprentice_sort(const void *, const void *); 95#ifndef COMPILE_ONLY 96private void apprentice_list(struct mlist *, int ); 97#endif 98private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, 99 const char *, int); 100#ifndef COMPILE_ONLY 101private void byteswap(struct magic *, uint32_t); 102private void bs1(struct magic *); 103private uint16_t swap2(uint16_t); 104private uint32_t swap4(uint32_t); 105private uint64_t swap8(uint64_t); 106#endif 107private char *mkdbname(struct magic_set *, const char *, int); 108#ifndef COMPILE_ONLY 109private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 110 const char *); 111#endif 112private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 113 const char *); 114private int check_format_type(const char *, int); 115private int check_format(struct magic_set *, struct magic *); 116private int get_op(char); 117private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 118private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 119private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 120 121 122private size_t maxmagic = 0; 123private size_t magicsize = sizeof(struct magic); 124 125private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 126 127private struct { 128 const char *name; 129 size_t len; 130 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 131} bang[] = { 132#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 133 DECLARE_FIELD(mime), 134 DECLARE_FIELD(apple), 135 DECLARE_FIELD(strength), 136#undef DECLARE_FIELD 137 { NULL, 0, NULL } 138}; 139 140#ifdef COMPILE_ONLY 141 142int main(int, char *[]); 143 144int 145main(int argc, char *argv[]) 146{ 147 int ret; 148 struct magic_set *ms; 149 char *progname; 150 151 if ((progname = strrchr(argv[0], '/')) != NULL) 152 progname++; 153 else 154 progname = argv[0]; 155 156 if (argc != 2) { 157 (void)fprintf(stderr, "Usage: %s file\n", progname); 158 return 1; 159 } 160 161 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 162 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 163 return 1; 164 } 165 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 166 if (ret == 1) 167 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 168 magic_close(ms); 169 return ret; 170} 171#endif /* COMPILE_ONLY */ 172 173static const struct type_tbl_s { 174 const char name[16]; 175 const size_t len; 176 const int type; 177 const int format; 178} type_tbl[] = { 179# define XX(s) s, (sizeof(s) - 1) 180# define XX_NULL "", 0 181 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 182 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 183 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 184 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 185 { XX("string"), FILE_STRING, FILE_FMT_STR }, 186 { XX("date"), FILE_DATE, FILE_FMT_STR }, 187 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 188 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 189 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 190 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 191 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 192 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 193 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 194 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 195 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 196 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 197 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 198 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 199 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 200 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 201 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 202 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 203 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 204 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 205 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 206 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 207 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 208 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 209 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 210 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 211 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 212 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 213 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 214 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 215 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 216 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 217 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 218 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 219 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 220 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 221 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE }, 222 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 223# undef XX 224# undef XX_NULL 225}; 226 227private int 228get_type(const char *l, const char **t) 229{ 230 const struct type_tbl_s *p; 231 232 for (p = type_tbl; p->len; p++) { 233 if (strncmp(l, p->name, p->len) == 0) { 234 if (t) 235 *t = l + p->len; 236 break; 237 } 238 } 239 return p->type; 240} 241 242private void 243init_file_tables(void) 244{ 245 static int done = 0; 246 const struct type_tbl_s *p; 247 248 if (done) 249 return; 250 done++; 251 252 for (p = type_tbl; p->len; p++) { 253 assert(p->type < FILE_NAMES_SIZE); 254 file_names[p->type] = p->name; 255 file_formats[p->type] = p->format; 256 } 257} 258 259/* 260 * Handle one file or directory. 261 */ 262private int 263apprentice_1(struct magic_set *ms, const char *fn, int action, 264 struct mlist *mlist) 265{ 266 struct magic *magic = NULL; 267 uint32_t nmagic = 0; 268#ifndef COMPILE_ONLY 269 struct mlist *ml; 270#endif 271 int rv = -1; 272#ifndef COMPILE_ONLY 273 int mapped; 274#endif 275 276 if (magicsize != FILE_MAGICSIZE) { 277 file_error(ms, 0, "magic element size %lu != %lu", 278 (unsigned long)sizeof(*magic), 279 (unsigned long)FILE_MAGICSIZE); 280 return -1; 281 } 282 283 if (action == FILE_COMPILE) { 284 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 285 if (rv != 0) 286 return -1; 287 rv = apprentice_compile(ms, &magic, &nmagic, fn); 288 free(magic); 289 return rv; 290 } 291 292#ifndef COMPILE_ONLY 293 if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { 294 if (ms->flags & MAGIC_CHECK) 295 file_magwarn(ms, "using regular magic file `%s'", fn); 296 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 297 if (rv != 0) 298 return -1; 299 } 300 301 mapped = rv; 302 303 if (magic == NULL) { 304 file_delmagic(magic, mapped, nmagic); 305 return -1; 306 } 307 308 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) { 309 file_delmagic(magic, mapped, nmagic); 310 file_oomem(ms, sizeof(*ml)); 311 return -1; 312 } 313 314 ml->magic = magic; 315 ml->nmagic = nmagic; 316 ml->mapped = mapped; 317 318 mlist->prev->next = ml; 319 ml->prev = mlist->prev; 320 ml->next = mlist; 321 mlist->prev = ml; 322 323 if (action == FILE_LIST) { 324 printf("Binary patterns:\n"); 325 apprentice_list(mlist, BINTEST); 326 printf("Text patterns:\n"); 327 apprentice_list(mlist, TEXTTEST); 328 } 329#endif /* COMPILE_ONLY */ 330 return 0; 331} 332 333protected void 334file_delmagic(struct magic *p, int type, size_t entries) 335{ 336 if (p == NULL) 337 return; 338 switch (type) { 339 case 2: 340#ifdef QUICK 341 p--; 342 (void)munmap((void *)p, sizeof(*p) * (entries + 1)); 343 break; 344#else 345 (void)&entries; 346 abort(); 347 /*NOTREACHED*/ 348#endif 349 case 1: 350 p--; 351 /*FALLTHROUGH*/ 352 case 0: 353 free(p); 354 break; 355 default: 356 abort(); 357 } 358} 359 360/* const char *fn: list of magic files and directories */ 361protected struct mlist * 362file_apprentice(struct magic_set *ms, const char *fn, int action) 363{ 364 char *p, *mfn; 365 int file_err, errs = -1; 366 struct mlist *mlist; 367 368 if ((fn = magic_getpath(fn, action)) == NULL) 369 return NULL; 370 371 init_file_tables(); 372 373 if ((mfn = strdup(fn)) == NULL) { 374 file_oomem(ms, strlen(fn)); 375 return NULL; 376 } 377 fn = mfn; 378 379 if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) { 380 free(mfn); 381 file_oomem(ms, sizeof(*mlist)); 382 return NULL; 383 } 384 mlist->next = mlist->prev = mlist; 385 386 while (fn) { 387 p = strchr(fn, PATHSEP); 388 if (p) 389 *p++ = '\0'; 390 if (*fn == '\0') 391 break; 392 file_err = apprentice_1(ms, fn, action, mlist); 393 errs = MAX(errs, file_err); 394 fn = p; 395 } 396 if (errs == -1) { 397 free(mfn); 398 free(mlist); 399 mlist = NULL; 400 file_error(ms, 0, "could not find any magic files!"); 401 return NULL; 402 } 403 free(mfn); 404 return mlist; 405} 406 407/* 408 * Get weight of this magic entry, for sorting purposes. 409 */ 410private size_t 411apprentice_magic_strength(const struct magic *m) 412{ 413#define MULT 10 414 size_t val = 2 * MULT; /* baseline strength */ 415 416 switch (m->type) { 417 case FILE_DEFAULT: /* make sure this sorts last */ 418 if (m->factor_op != FILE_FACTOR_OP_NONE) 419 abort(); 420 return 0; 421 422 case FILE_BYTE: 423 val += 1 * MULT; 424 break; 425 426 case FILE_SHORT: 427 case FILE_LESHORT: 428 case FILE_BESHORT: 429 val += 2 * MULT; 430 break; 431 432 case FILE_LONG: 433 case FILE_LELONG: 434 case FILE_BELONG: 435 case FILE_MELONG: 436 val += 4 * MULT; 437 break; 438 439 case FILE_PSTRING: 440 case FILE_STRING: 441 val += m->vallen * MULT; 442 break; 443 444 case FILE_BESTRING16: 445 case FILE_LESTRING16: 446 val += m->vallen * MULT / 2; 447 break; 448 449 case FILE_SEARCH: 450 case FILE_REGEX: 451 val += m->vallen * MAX(MULT / m->vallen, 1); 452 break; 453 454 case FILE_DATE: 455 case FILE_LEDATE: 456 case FILE_BEDATE: 457 case FILE_MEDATE: 458 case FILE_LDATE: 459 case FILE_LELDATE: 460 case FILE_BELDATE: 461 case FILE_MELDATE: 462 case FILE_FLOAT: 463 case FILE_BEFLOAT: 464 case FILE_LEFLOAT: 465 val += 4 * MULT; 466 break; 467 468 case FILE_QUAD: 469 case FILE_BEQUAD: 470 case FILE_LEQUAD: 471 case FILE_QDATE: 472 case FILE_LEQDATE: 473 case FILE_BEQDATE: 474 case FILE_QLDATE: 475 case FILE_LEQLDATE: 476 case FILE_BEQLDATE: 477 case FILE_DOUBLE: 478 case FILE_BEDOUBLE: 479 case FILE_LEDOUBLE: 480 val += 8 * MULT; 481 break; 482 483 default: 484 val = 0; 485 (void)fprintf(stderr, "Bad type %d\n", m->type); 486 abort(); 487 } 488 489 switch (m->reln) { 490 case 'x': /* matches anything penalize */ 491 case '!': /* matches almost anything penalize */ 492 val = 0; 493 break; 494 495 case '=': /* Exact match, prefer */ 496 val += MULT; 497 break; 498 499 case '>': 500 case '<': /* comparison match reduce strength */ 501 val -= 2 * MULT; 502 break; 503 504 case '^': 505 case '&': /* masking bits, we could count them too */ 506 val -= MULT; 507 break; 508 509 default: 510 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 511 abort(); 512 } 513 514 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 515 val = 1; 516 517 switch (m->factor_op) { 518 case FILE_FACTOR_OP_NONE: 519 break; 520 case FILE_FACTOR_OP_PLUS: 521 val += m->factor; 522 break; 523 case FILE_FACTOR_OP_MINUS: 524 val -= m->factor; 525 break; 526 case FILE_FACTOR_OP_TIMES: 527 val *= m->factor; 528 break; 529 case FILE_FACTOR_OP_DIV: 530 val /= m->factor; 531 break; 532 default: 533 abort(); 534 } 535 536 /* 537 * Magic entries with no description get a bonus because they depend 538 * on subsequent magic entries to print something. 539 */ 540 if (m->desc[0] == '\0') 541 val++; 542 return val; 543} 544 545/* 546 * Sort callback for sorting entries by "strength" (basically length) 547 */ 548private int 549apprentice_sort(const void *a, const void *b) 550{ 551 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 552 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 553 size_t sa = apprentice_magic_strength(ma->mp); 554 size_t sb = apprentice_magic_strength(mb->mp); 555 if (sa == sb) 556 return 0; 557 else if (sa > sb) 558 return -1; 559 else 560 return 1; 561} 562 563/* 564 * Shows sorted patterns list in the order which is used for the matching 565 */ 566#ifndef COMPILE_ONLY 567private void 568apprentice_list(struct mlist *mlist, int mode) 569{ 570 uint32_t magindex = 0; 571 struct mlist *ml; 572 for (ml = mlist->next; ml != mlist; ml = ml->next) { 573 for (magindex = 0; magindex < ml->nmagic; magindex++) { 574 struct magic *m = &ml->magic[magindex]; 575 if ((m->flag & mode) != mode) { 576 /* Skip sub-tests */ 577 while (magindex + 1 < ml->nmagic && 578 ml->magic[magindex + 1].cont_level != 0) 579 ++magindex; 580 continue; /* Skip to next top-level test*/ 581 } 582 583 /* 584 * Try to iterate over the tree until we find item with 585 * description/mimetype. 586 */ 587 while (magindex + 1 < ml->nmagic && 588 ml->magic[magindex + 1].cont_level != 0 && 589 *ml->magic[magindex].desc == '\0' && 590 *ml->magic[magindex].mimetype == '\0') 591 magindex++; 592 593 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 594 apprentice_magic_strength(m), 595 ml->magic[magindex].desc, 596 ml->magic[magindex].mimetype); 597 } 598 } 599} 600#endif /* COMPILE_ONLY */ 601 602private void 603set_test_type(struct magic *mstart, struct magic *m) 604{ 605 switch (m->type) { 606 case FILE_BYTE: 607 case FILE_SHORT: 608 case FILE_LONG: 609 case FILE_DATE: 610 case FILE_BESHORT: 611 case FILE_BELONG: 612 case FILE_BEDATE: 613 case FILE_LESHORT: 614 case FILE_LELONG: 615 case FILE_LEDATE: 616 case FILE_LDATE: 617 case FILE_BELDATE: 618 case FILE_LELDATE: 619 case FILE_MEDATE: 620 case FILE_MELDATE: 621 case FILE_MELONG: 622 case FILE_QUAD: 623 case FILE_LEQUAD: 624 case FILE_BEQUAD: 625 case FILE_QDATE: 626 case FILE_LEQDATE: 627 case FILE_BEQDATE: 628 case FILE_QLDATE: 629 case FILE_LEQLDATE: 630 case FILE_BEQLDATE: 631 case FILE_FLOAT: 632 case FILE_BEFLOAT: 633 case FILE_LEFLOAT: 634 case FILE_DOUBLE: 635 case FILE_BEDOUBLE: 636 case FILE_LEDOUBLE: 637 mstart->flag |= BINTEST; 638 break; 639 case FILE_STRING: 640 case FILE_PSTRING: 641 case FILE_BESTRING16: 642 case FILE_LESTRING16: 643 /* Allow text overrides */ 644 if (mstart->str_flags & STRING_TEXTTEST) 645 mstart->flag |= TEXTTEST; 646 else 647 mstart->flag |= BINTEST; 648 break; 649 case FILE_REGEX: 650 case FILE_SEARCH: 651 /* Check for override */ 652 if (mstart->str_flags & STRING_BINTEST) 653 mstart->flag |= BINTEST; 654 if (mstart->str_flags & STRING_TEXTTEST) 655 mstart->flag |= TEXTTEST; 656 657 if (mstart->flag & (TEXTTEST|BINTEST)) 658 break; 659 660 /* binary test if pattern is not text */ 661 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 662 NULL) <= 0) 663 mstart->flag |= BINTEST; 664 else 665 mstart->flag |= TEXTTEST; 666 break; 667 case FILE_DEFAULT: 668 /* can't deduce anything; we shouldn't see this at the 669 top level anyway */ 670 break; 671 case FILE_INVALID: 672 default: 673 /* invalid search type, but no need to complain here */ 674 break; 675 } 676} 677 678/* 679 * Load and parse one file. 680 */ 681private void 682load_1(struct magic_set *ms, int action, const char *fn, int *errs, 683 struct magic_entry **marray, uint32_t *marraycount) 684{ 685 size_t lineno = 0, llen = 0; 686 char *line = NULL; 687 ssize_t len; 688 689 FILE *f = fopen(ms->file = fn, "r"); 690 if (f == NULL) { 691 if (errno != ENOENT) 692 file_error(ms, errno, "cannot read magic file `%s'", 693 fn); 694 (*errs)++; 695 return; 696 } 697 698 /* read and parse this file */ 699 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 700 ms->line++) { 701 if (len == 0) /* null line, garbage, etc */ 702 continue; 703 if (line[len - 1] == '\n') { 704 lineno++; 705 line[len - 1] = '\0'; /* delete newline */ 706 } 707 switch (line[0]) { 708 case '\0': /* empty, do not parse */ 709 case '#': /* comment, do not parse */ 710 continue; 711 case '!': 712 if (line[1] == ':') { 713 size_t i; 714 715 for (i = 0; bang[i].name != NULL; i++) { 716 if ((size_t)(len - 2) > bang[i].len && 717 memcmp(bang[i].name, line + 2, 718 bang[i].len) == 0) 719 break; 720 } 721 if (bang[i].name == NULL) { 722 file_error(ms, 0, 723 "Unknown !: entry `%s'", line); 724 (*errs)++; 725 continue; 726 } 727 if (*marraycount == 0) { 728 file_error(ms, 0, 729 "No current entry for :!%s type", 730 bang[i].name); 731 (*errs)++; 732 continue; 733 } 734 if ((*bang[i].fun)(ms, 735 &(*marray)[*marraycount - 1], 736 line + bang[i].len + 2) != 0) { 737 (*errs)++; 738 continue; 739 } 740 continue; 741 } 742 /*FALLTHROUGH*/ 743 default: 744 if (parse(ms, marray, marraycount, line, lineno, 745 action) != 0) 746 (*errs)++; 747 break; 748 } 749 } 750 free(line); 751 (void)fclose(f); 752} 753 754/* 755 * parse a file or directory of files 756 * const char *fn: name of magic file or directory 757 */ 758private int 759cmpstrp(const void *p1, const void *p2) 760{ 761 return strcmp(*(char *const *)p1, *(char *const *)p2); 762} 763 764private int 765apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 766 const char *fn, int action) 767{ 768 int errs = 0; 769 struct magic_entry *marray; 770 uint32_t marraycount, i, mentrycount = 0, starttest; 771 size_t slen, files = 0, maxfiles = 0; 772 char **filearr = NULL, *mfn; 773 struct stat st; 774 DIR *dir; 775 struct dirent *d; 776 777 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 778 779 maxmagic = MAXMAGIS; 780 if ((marray = CAST(struct magic_entry *, calloc(maxmagic, 781 sizeof(*marray)))) == NULL) { 782 file_oomem(ms, maxmagic * sizeof(*marray)); 783 return -1; 784 } 785 marraycount = 0; 786 787 /* print silly verbose header for USG compat. */ 788 if (action == FILE_CHECK) 789 (void)fprintf(stderr, "%s\n", usg_hdr); 790 791 /* load directory or file */ 792 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 793 dir = opendir(fn); 794 if (!dir) { 795 errs++; 796 goto out; 797 } 798 while ((d = readdir(dir)) != NULL) { 799 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 800 file_oomem(ms, 801 strlen(fn) + strlen(d->d_name) + 2); 802 errs++; 803 closedir(dir); 804 goto out; 805 } 806 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 807 free(mfn); 808 continue; 809 } 810 if (files >= maxfiles) { 811 size_t mlen; 812 maxfiles = (maxfiles + 1) * 2; 813 mlen = maxfiles * sizeof(*filearr); 814 if ((filearr = CAST(char **, 815 realloc(filearr, mlen))) == NULL) { 816 file_oomem(ms, mlen); 817 free(mfn); 818 closedir(dir); 819 errs++; 820 goto out; 821 } 822 } 823 filearr[files++] = mfn; 824 } 825 closedir(dir); 826 qsort(filearr, files, sizeof(*filearr), cmpstrp); 827 for (i = 0; i < files; i++) { 828 load_1(ms, action, filearr[i], &errs, &marray, 829 &marraycount); 830 free(filearr[i]); 831 } 832 free(filearr); 833 } else 834 load_1(ms, action, fn, &errs, &marray, &marraycount); 835 if (errs) 836 goto out; 837 838 /* Set types of tests */ 839 for (i = 0; i < marraycount; ) { 840 if (marray[i].mp->cont_level != 0) { 841 i++; 842 continue; 843 } 844 845 starttest = i; 846 do { 847 static const char text[] = "text"; 848 static const char binary[] = "binary"; 849 static const size_t len = sizeof(text); 850 set_test_type(marray[starttest].mp, marray[i].mp); 851 if ((ms->flags & MAGIC_DEBUG) == 0) 852 continue; 853 (void)fprintf(stderr, "%s%s%s: %s\n", 854 marray[i].mp->mimetype, 855 marray[i].mp->mimetype[0] == '\0' ? "" : "; ", 856 marray[i].mp->desc[0] ? marray[i].mp->desc : 857 "(no description)", 858 marray[i].mp->flag & BINTEST ? binary : text); 859 if (marray[i].mp->flag & BINTEST) { 860 char *p = strstr(marray[i].mp->desc, text); 861 if (p && (p == marray[i].mp->desc || 862 isspace((unsigned char)p[-1])) && 863 (p + len - marray[i].mp->desc == 864 MAXstring || (p[len] == '\0' || 865 isspace((unsigned char)p[len])))) 866 (void)fprintf(stderr, "*** Possible " 867 "binary test for text type\n"); 868 } 869 } while (++i < marraycount && marray[i].mp->cont_level != 0); 870 } 871 872 qsort(marray, marraycount, sizeof(*marray), apprentice_sort); 873 874 /* 875 * Make sure that any level 0 "default" line is last (if one exists). 876 */ 877 for (i = 0; i < marraycount; i++) { 878 if (marray[i].mp->cont_level == 0 && 879 marray[i].mp->type == FILE_DEFAULT) { 880 while (++i < marraycount) 881 if (marray[i].mp->cont_level == 0) 882 break; 883 if (i != marraycount) { 884 /* XXX - Ugh! */ 885 ms->line = marray[i].mp->lineno; 886 file_magwarn(ms, 887 "level 0 \"default\" did not sort last"); 888 } 889 break; 890 } 891 } 892 893 for (i = 0; i < marraycount; i++) 894 mentrycount += marray[i].cont_count; 895 896 slen = sizeof(**magicp) * mentrycount; 897 if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) { 898 file_oomem(ms, slen); 899 errs++; 900 goto out; 901 } 902 903 mentrycount = 0; 904 for (i = 0; i < marraycount; i++) { 905 (void)memcpy(*magicp + mentrycount, marray[i].mp, 906 marray[i].cont_count * sizeof(**magicp)); 907 mentrycount += marray[i].cont_count; 908 } 909out: 910 for (i = 0; i < marraycount; i++) 911 free(marray[i].mp); 912 free(marray); 913 if (errs) { 914 *magicp = NULL; 915 *nmagicp = 0; 916 return errs; 917 } else { 918 *nmagicp = mentrycount; 919 return 0; 920 } 921 922} 923 924/* 925 * extend the sign bit if the comparison is to be signed 926 */ 927protected uint64_t 928file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 929{ 930 if (!(m->flag & UNSIGNED)) { 931 switch(m->type) { 932 /* 933 * Do not remove the casts below. They are 934 * vital. When later compared with the data, 935 * the sign extension must have happened. 936 */ 937 case FILE_BYTE: 938 v = (char) v; 939 break; 940 case FILE_SHORT: 941 case FILE_BESHORT: 942 case FILE_LESHORT: 943 v = (short) v; 944 break; 945 case FILE_DATE: 946 case FILE_BEDATE: 947 case FILE_LEDATE: 948 case FILE_MEDATE: 949 case FILE_LDATE: 950 case FILE_BELDATE: 951 case FILE_LELDATE: 952 case FILE_MELDATE: 953 case FILE_LONG: 954 case FILE_BELONG: 955 case FILE_LELONG: 956 case FILE_MELONG: 957 case FILE_FLOAT: 958 case FILE_BEFLOAT: 959 case FILE_LEFLOAT: 960 v = (int32_t) v; 961 break; 962 case FILE_QUAD: 963 case FILE_BEQUAD: 964 case FILE_LEQUAD: 965 case FILE_QDATE: 966 case FILE_QLDATE: 967 case FILE_BEQDATE: 968 case FILE_BEQLDATE: 969 case FILE_LEQDATE: 970 case FILE_LEQLDATE: 971 case FILE_DOUBLE: 972 case FILE_BEDOUBLE: 973 case FILE_LEDOUBLE: 974 v = (int64_t) v; 975 break; 976 case FILE_STRING: 977 case FILE_PSTRING: 978 case FILE_BESTRING16: 979 case FILE_LESTRING16: 980 case FILE_REGEX: 981 case FILE_SEARCH: 982 case FILE_DEFAULT: 983 case FILE_INDIRECT: 984 break; 985 default: 986 if (ms->flags & MAGIC_CHECK) 987 file_magwarn(ms, "cannot happen: m->type=%d\n", 988 m->type); 989 return ~0U; 990 } 991 } 992 return v; 993} 994 995private int 996string_modifier_check(struct magic_set *ms, struct magic *m) 997{ 998 if ((ms->flags & MAGIC_CHECK) == 0) 999 return 0; 1000 1001 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 1002 file_magwarn(ms, 1003 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 1004 return -1; 1005 } 1006 switch (m->type) { 1007 case FILE_BESTRING16: 1008 case FILE_LESTRING16: 1009 if (m->str_flags != 0) { 1010 file_magwarn(ms, 1011 "no modifiers allowed for 16-bit strings\n"); 1012 return -1; 1013 } 1014 break; 1015 case FILE_STRING: 1016 case FILE_PSTRING: 1017 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1018 file_magwarn(ms, 1019 "'/%c' only allowed on regex and search\n", 1020 CHAR_REGEX_OFFSET_START); 1021 return -1; 1022 } 1023 break; 1024 case FILE_SEARCH: 1025 if (m->str_range == 0) { 1026 file_magwarn(ms, 1027 "missing range; defaulting to %d\n", 1028 STRING_DEFAULT_RANGE); 1029 m->str_range = STRING_DEFAULT_RANGE; 1030 return -1; 1031 } 1032 break; 1033 case FILE_REGEX: 1034 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1035 file_magwarn(ms, "'/%c' not allowed on regex\n", 1036 CHAR_COMPACT_WHITESPACE); 1037 return -1; 1038 } 1039 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1040 file_magwarn(ms, "'/%c' not allowed on regex\n", 1041 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1042 return -1; 1043 } 1044 break; 1045 default: 1046 file_magwarn(ms, "coding error: m->type=%d\n", 1047 m->type); 1048 return -1; 1049 } 1050 return 0; 1051} 1052 1053private int 1054get_op(char c) 1055{ 1056 switch (c) { 1057 case '&': 1058 return FILE_OPAND; 1059 case '|': 1060 return FILE_OPOR; 1061 case '^': 1062 return FILE_OPXOR; 1063 case '+': 1064 return FILE_OPADD; 1065 case '-': 1066 return FILE_OPMINUS; 1067 case '*': 1068 return FILE_OPMULTIPLY; 1069 case '/': 1070 return FILE_OPDIVIDE; 1071 case '%': 1072 return FILE_OPMODULO; 1073 default: 1074 return -1; 1075 } 1076} 1077 1078#ifdef ENABLE_CONDITIONALS 1079private int 1080get_cond(const char *l, const char **t) 1081{ 1082 static const struct cond_tbl_s { 1083 char name[8]; 1084 size_t len; 1085 int cond; 1086 } cond_tbl[] = { 1087 { "if", 2, COND_IF }, 1088 { "elif", 4, COND_ELIF }, 1089 { "else", 4, COND_ELSE }, 1090 { "", 0, COND_NONE }, 1091 }; 1092 const struct cond_tbl_s *p; 1093 1094 for (p = cond_tbl; p->len; p++) { 1095 if (strncmp(l, p->name, p->len) == 0 && 1096 isspace((unsigned char)l[p->len])) { 1097 if (t) 1098 *t = l + p->len; 1099 break; 1100 } 1101 } 1102 return p->cond; 1103} 1104 1105private int 1106check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1107{ 1108 int last_cond; 1109 last_cond = ms->c.li[cont_level].last_cond; 1110 1111 switch (cond) { 1112 case COND_IF: 1113 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1114 if (ms->flags & MAGIC_CHECK) 1115 file_magwarn(ms, "syntax error: `if'"); 1116 return -1; 1117 } 1118 last_cond = COND_IF; 1119 break; 1120 1121 case COND_ELIF: 1122 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1123 if (ms->flags & MAGIC_CHECK) 1124 file_magwarn(ms, "syntax error: `elif'"); 1125 return -1; 1126 } 1127 last_cond = COND_ELIF; 1128 break; 1129 1130 case COND_ELSE: 1131 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1132 if (ms->flags & MAGIC_CHECK) 1133 file_magwarn(ms, "syntax error: `else'"); 1134 return -1; 1135 } 1136 last_cond = COND_NONE; 1137 break; 1138 1139 case COND_NONE: 1140 last_cond = COND_NONE; 1141 break; 1142 } 1143 1144 ms->c.li[cont_level].last_cond = last_cond; 1145 return 0; 1146} 1147#endif /* ENABLE_CONDITIONALS */ 1148 1149/* 1150 * parse one line from magic file, put into magic[index++] if valid 1151 */ 1152private int 1153parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 1154 const char *line, size_t lineno, int action) 1155{ 1156#ifdef ENABLE_CONDITIONALS 1157 static uint32_t last_cont_level = 0; 1158#endif 1159 size_t i; 1160 struct magic_entry *me; 1161 struct magic *m; 1162 const char *l = line; 1163 char *t; 1164 int op; 1165 uint32_t cont_level; 1166 1167 cont_level = 0; 1168 1169 while (*l == '>') { 1170 ++l; /* step over */ 1171 cont_level++; 1172 } 1173#ifdef ENABLE_CONDITIONALS 1174 if (cont_level == 0 || cont_level > last_cont_level) 1175 if (file_check_mem(ms, cont_level) == -1) 1176 return -1; 1177 last_cont_level = cont_level; 1178#endif 1179 1180#define ALLOC_CHUNK (size_t)10 1181#define ALLOC_INCR (size_t)200 1182 1183 if (cont_level != 0) { 1184 if (*nmentryp == 0) { 1185 file_error(ms, 0, "No current entry for continuation"); 1186 return -1; 1187 } 1188 me = &(*mentryp)[*nmentryp - 1]; 1189 if (me->cont_count == me->max_count) { 1190 struct magic *nm; 1191 size_t cnt = me->max_count + ALLOC_CHUNK; 1192 if ((nm = CAST(struct magic *, realloc(me->mp, 1193 sizeof(*nm) * cnt))) == NULL) { 1194 file_oomem(ms, sizeof(*nm) * cnt); 1195 return -1; 1196 } 1197 me->mp = m = nm; 1198 me->max_count = CAST(uint32_t, cnt); 1199 } 1200 m = &me->mp[me->cont_count++]; 1201 (void)memset(m, 0, sizeof(*m)); 1202 m->cont_level = cont_level; 1203 } else { 1204 if (*nmentryp == maxmagic) { 1205 struct magic_entry *mp; 1206 1207 maxmagic += ALLOC_INCR; 1208 if ((mp = CAST(struct magic_entry *, 1209 realloc(*mentryp, sizeof(*mp) * maxmagic))) == 1210 NULL) { 1211 file_oomem(ms, sizeof(*mp) * maxmagic); 1212 return -1; 1213 } 1214 (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * 1215 ALLOC_INCR); 1216 *mentryp = mp; 1217 } 1218 me = &(*mentryp)[*nmentryp]; 1219 if (me->mp == NULL) { 1220 size_t len = sizeof(*m) * ALLOC_CHUNK; 1221 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1222 file_oomem(ms, len); 1223 return -1; 1224 } 1225 me->mp = m; 1226 me->max_count = ALLOC_CHUNK; 1227 } else 1228 m = me->mp; 1229 (void)memset(m, 0, sizeof(*m)); 1230 m->factor_op = FILE_FACTOR_OP_NONE; 1231 m->cont_level = 0; 1232 me->cont_count = 1; 1233 } 1234 m->lineno = CAST(uint32_t, lineno); 1235 1236 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1237 ++l; /* step over */ 1238 m->flag |= OFFADD; 1239 } 1240 if (*l == '(') { 1241 ++l; /* step over */ 1242 m->flag |= INDIR; 1243 if (m->flag & OFFADD) 1244 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1245 1246 if (*l == '&') { /* m->cont_level == 0 checked below */ 1247 ++l; /* step over */ 1248 m->flag |= OFFADD; 1249 } 1250 } 1251 /* Indirect offsets are not valid at level 0. */ 1252 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1253 if (ms->flags & MAGIC_CHECK) 1254 file_magwarn(ms, "relative offset at level 0"); 1255 1256 /* get offset, then skip over it */ 1257 m->offset = (uint32_t)strtoul(l, &t, 0); 1258 if (l == t) 1259 if (ms->flags & MAGIC_CHECK) 1260 file_magwarn(ms, "offset `%s' invalid", l); 1261 l = t; 1262 1263 if (m->flag & INDIR) { 1264 m->in_type = FILE_LONG; 1265 m->in_offset = 0; 1266 /* 1267 * read [.lbs][+-]nnnnn) 1268 */ 1269 if (*l == '.') { 1270 l++; 1271 switch (*l) { 1272 case 'l': 1273 m->in_type = FILE_LELONG; 1274 break; 1275 case 'L': 1276 m->in_type = FILE_BELONG; 1277 break; 1278 case 'm': 1279 m->in_type = FILE_MELONG; 1280 break; 1281 case 'h': 1282 case 's': 1283 m->in_type = FILE_LESHORT; 1284 break; 1285 case 'H': 1286 case 'S': 1287 m->in_type = FILE_BESHORT; 1288 break; 1289 case 'c': 1290 case 'b': 1291 case 'C': 1292 case 'B': 1293 m->in_type = FILE_BYTE; 1294 break; 1295 case 'e': 1296 case 'f': 1297 case 'g': 1298 m->in_type = FILE_LEDOUBLE; 1299 break; 1300 case 'E': 1301 case 'F': 1302 case 'G': 1303 m->in_type = FILE_BEDOUBLE; 1304 break; 1305 case 'i': 1306 m->in_type = FILE_LEID3; 1307 break; 1308 case 'I': 1309 m->in_type = FILE_BEID3; 1310 break; 1311 default: 1312 if (ms->flags & MAGIC_CHECK) 1313 file_magwarn(ms, 1314 "indirect offset type `%c' invalid", 1315 *l); 1316 break; 1317 } 1318 l++; 1319 } 1320 1321 m->in_op = 0; 1322 if (*l == '~') { 1323 m->in_op |= FILE_OPINVERSE; 1324 l++; 1325 } 1326 if ((op = get_op(*l)) != -1) { 1327 m->in_op |= op; 1328 l++; 1329 } 1330 if (*l == '(') { 1331 m->in_op |= FILE_OPINDIRECT; 1332 l++; 1333 } 1334 if (isdigit((unsigned char)*l) || *l == '-') { 1335 m->in_offset = (int32_t)strtol(l, &t, 0); 1336 if (l == t) 1337 if (ms->flags & MAGIC_CHECK) 1338 file_magwarn(ms, 1339 "in_offset `%s' invalid", l); 1340 l = t; 1341 } 1342 if (*l++ != ')' || 1343 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1344 if (ms->flags & MAGIC_CHECK) 1345 file_magwarn(ms, 1346 "missing ')' in indirect offset"); 1347 } 1348 EATAB; 1349 1350#ifdef ENABLE_CONDITIONALS 1351 m->cond = get_cond(l, &l); 1352 if (check_cond(ms, m->cond, cont_level) == -1) 1353 return -1; 1354 1355 EATAB; 1356#endif 1357 1358 if (*l == 'u') { 1359 ++l; 1360 m->flag |= UNSIGNED; 1361 } 1362 1363 m->type = get_type(l, &l); 1364 if (m->type == FILE_INVALID) { 1365 if (ms->flags & MAGIC_CHECK) 1366 file_magwarn(ms, "type `%s' invalid", l); 1367 return -1; 1368 } 1369 1370 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1371 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1372 1373 m->mask_op = 0; 1374 if (*l == '~') { 1375 if (!IS_STRING(m->type)) 1376 m->mask_op |= FILE_OPINVERSE; 1377 else if (ms->flags & MAGIC_CHECK) 1378 file_magwarn(ms, "'~' invalid for string types"); 1379 ++l; 1380 } 1381 m->str_range = 0; 1382 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1383 if ((op = get_op(*l)) != -1) { 1384 if (!IS_STRING(m->type)) { 1385 uint64_t val; 1386 ++l; 1387 m->mask_op |= op; 1388 val = (uint64_t)strtoull(l, &t, 0); 1389 l = t; 1390 m->num_mask = file_signextend(ms, m, val); 1391 eatsize(&l); 1392 } 1393 else if (op == FILE_OPDIVIDE) { 1394 int have_range = 0; 1395 while (!isspace((unsigned char)*++l)) { 1396 switch (*l) { 1397 case '0': case '1': case '2': 1398 case '3': case '4': case '5': 1399 case '6': case '7': case '8': 1400 case '9': 1401 if (have_range && 1402 (ms->flags & MAGIC_CHECK)) 1403 file_magwarn(ms, 1404 "multiple ranges"); 1405 have_range = 1; 1406 m->str_range = CAST(uint32_t, 1407 strtoul(l, &t, 0)); 1408 if (m->str_range == 0) 1409 file_magwarn(ms, 1410 "zero range"); 1411 l = t - 1; 1412 break; 1413 case CHAR_COMPACT_WHITESPACE: 1414 m->str_flags |= 1415 STRING_COMPACT_WHITESPACE; 1416 break; 1417 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1418 m->str_flags |= 1419 STRING_COMPACT_OPTIONAL_WHITESPACE; 1420 break; 1421 case CHAR_IGNORE_LOWERCASE: 1422 m->str_flags |= STRING_IGNORE_LOWERCASE; 1423 break; 1424 case CHAR_IGNORE_UPPERCASE: 1425 m->str_flags |= STRING_IGNORE_UPPERCASE; 1426 break; 1427 case CHAR_REGEX_OFFSET_START: 1428 m->str_flags |= REGEX_OFFSET_START; 1429 break; 1430 case CHAR_BINTEST: 1431 m->str_flags |= STRING_BINTEST; 1432 break; 1433 case CHAR_TEXTTEST: 1434 m->str_flags |= STRING_TEXTTEST; 1435 break; 1436 case CHAR_PSTRING_1_LE: 1437 if (m->type != FILE_PSTRING) 1438 goto bad; 1439 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1440 break; 1441 case CHAR_PSTRING_2_BE: 1442 if (m->type != FILE_PSTRING) 1443 goto bad; 1444 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1445 break; 1446 case CHAR_PSTRING_2_LE: 1447 if (m->type != FILE_PSTRING) 1448 goto bad; 1449 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1450 break; 1451 case CHAR_PSTRING_4_BE: 1452 if (m->type != FILE_PSTRING) 1453 goto bad; 1454 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1455 break; 1456 case CHAR_PSTRING_4_LE: 1457 if (m->type != FILE_PSTRING) 1458 goto bad; 1459 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1460 break; 1461 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1462 if (m->type != FILE_PSTRING) 1463 goto bad; 1464 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1465 break; 1466 default: 1467 bad: 1468 if (ms->flags & MAGIC_CHECK) 1469 file_magwarn(ms, 1470 "string extension `%c' " 1471 "invalid", *l); 1472 return -1; 1473 } 1474 /* allow multiple '/' for readability */ 1475 if (l[1] == '/' && 1476 !isspace((unsigned char)l[2])) 1477 l++; 1478 } 1479 if (string_modifier_check(ms, m) == -1) 1480 return -1; 1481 } 1482 else { 1483 if (ms->flags & MAGIC_CHECK) 1484 file_magwarn(ms, "invalid string op: %c", *t); 1485 return -1; 1486 } 1487 } 1488 /* 1489 * We used to set mask to all 1's here, instead let's just not do 1490 * anything if mask = 0 (unless you have a better idea) 1491 */ 1492 EATAB; 1493 1494 switch (*l) { 1495 case '>': 1496 case '<': 1497 m->reln = *l; 1498 ++l; 1499 if (*l == '=') { 1500 if (ms->flags & MAGIC_CHECK) { 1501 file_magwarn(ms, "%c= not supported", 1502 m->reln); 1503 return -1; 1504 } 1505 ++l; 1506 } 1507 break; 1508 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1509 case '&': 1510 case '^': 1511 case '=': 1512 m->reln = *l; 1513 ++l; 1514 if (*l == '=') { 1515 /* HP compat: ignore &= etc. */ 1516 ++l; 1517 } 1518 break; 1519 case '!': 1520 m->reln = *l; 1521 ++l; 1522 break; 1523 default: 1524 m->reln = '='; /* the default relation */ 1525 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1526 isspace((unsigned char)l[1])) || !l[1])) { 1527 m->reln = *l; 1528 ++l; 1529 } 1530 break; 1531 } 1532 /* 1533 * Grab the value part, except for an 'x' reln. 1534 */ 1535 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1536 return -1; 1537 1538 /* 1539 * TODO finish this macro and start using it! 1540 * #define offsetcheck {if (offset > HOWMANY-1) 1541 * magwarn("offset too big"); } 1542 */ 1543 1544 /* 1545 * Now get last part - the description 1546 */ 1547 EATAB; 1548 if (l[0] == '\b') { 1549 ++l; 1550 m->flag |= NOSPACE; 1551 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1552 ++l; 1553 ++l; 1554 m->flag |= NOSPACE; 1555 } 1556 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1557 continue; 1558 if (i == sizeof(m->desc)) { 1559 m->desc[sizeof(m->desc) - 1] = '\0'; 1560 if (ms->flags & MAGIC_CHECK) 1561 file_magwarn(ms, "description `%s' truncated", m->desc); 1562 } 1563 1564 /* 1565 * We only do this check while compiling, or if any of the magic 1566 * files were not compiled. 1567 */ 1568 if (ms->flags & MAGIC_CHECK) { 1569 if (check_format(ms, m) == -1) 1570 return -1; 1571 } 1572#ifndef COMPILE_ONLY 1573 if (action == FILE_CHECK) { 1574 file_mdump(m); 1575 } 1576#endif 1577 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1578 if (m->cont_level == 0) 1579 ++(*nmentryp); /* make room for next */ 1580 return 0; 1581} 1582 1583/* 1584 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1585 * if valid 1586 */ 1587private int 1588parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1589{ 1590 const char *l = line; 1591 char *el; 1592 unsigned long factor; 1593 struct magic *m = &me->mp[0]; 1594 1595 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1596 file_magwarn(ms, 1597 "Current entry already has a strength type: %c %d", 1598 m->factor_op, m->factor); 1599 return -1; 1600 } 1601 EATAB; 1602 switch (*l) { 1603 case FILE_FACTOR_OP_NONE: 1604 case FILE_FACTOR_OP_PLUS: 1605 case FILE_FACTOR_OP_MINUS: 1606 case FILE_FACTOR_OP_TIMES: 1607 case FILE_FACTOR_OP_DIV: 1608 m->factor_op = *l++; 1609 break; 1610 default: 1611 file_magwarn(ms, "Unknown factor op `%c'", *l); 1612 return -1; 1613 } 1614 EATAB; 1615 factor = strtoul(l, &el, 0); 1616 if (factor > 255) { 1617 file_magwarn(ms, "Too large factor `%lu'", factor); 1618 goto out; 1619 } 1620 if (*el && !isspace((unsigned char)*el)) { 1621 file_magwarn(ms, "Bad factor `%s'", l); 1622 goto out; 1623 } 1624 m->factor = (uint8_t)factor; 1625 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1626 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1627 m->factor_op, m->factor); 1628 goto out; 1629 } 1630 return 0; 1631out: 1632 m->factor_op = FILE_FACTOR_OP_NONE; 1633 m->factor = 0; 1634 return -1; 1635} 1636 1637/* 1638 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 1639 * magic[index - 1] 1640 */ 1641private int 1642parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 1643{ 1644 size_t i; 1645 const char *l = line; 1646 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1647 1648 if (m->apple[0] != '\0') { 1649 file_magwarn(ms, "Current entry already has a APPLE type " 1650 "`%.8s', new type `%s'", m->mimetype, l); 1651 return -1; 1652 } 1653 1654 EATAB; 1655 for (i = 0; *l && ((isascii((unsigned char)*l) && 1656 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1657 i < sizeof(m->apple); m->apple[i++] = *l++) 1658 continue; 1659 if (i == sizeof(m->apple) && *l) { 1660 /* We don't need to NUL terminate here, printing handles it */ 1661 if (ms->flags & MAGIC_CHECK) 1662 file_magwarn(ms, "APPLE type `%s' truncated %" 1663 SIZE_T_FORMAT "u", line, i); 1664 } 1665 1666 if (i > 0) 1667 return 0; 1668 else 1669 return -1; 1670} 1671 1672/* 1673 * parse a MIME annotation line from magic file, put into magic[index - 1] 1674 * if valid 1675 */ 1676private int 1677parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 1678{ 1679 size_t i; 1680 const char *l = line; 1681 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1682 1683 if (m->mimetype[0] != '\0') { 1684 file_magwarn(ms, "Current entry already has a MIME type `%s'," 1685 " new type `%s'", m->mimetype, l); 1686 return -1; 1687 } 1688 1689 EATAB; 1690 for (i = 0; *l && ((isascii((unsigned char)*l) && 1691 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1692 i < sizeof(m->mimetype); m->mimetype[i++] = *l++) 1693 continue; 1694 if (i == sizeof(m->mimetype)) { 1695 m->mimetype[sizeof(m->mimetype) - 1] = '\0'; 1696 if (ms->flags & MAGIC_CHECK) 1697 file_magwarn(ms, "MIME type `%s' truncated %" 1698 SIZE_T_FORMAT "u", m->mimetype, i); 1699 } else 1700 m->mimetype[i] = '\0'; 1701 1702 if (i > 0) 1703 return 0; 1704 else 1705 return -1; 1706} 1707 1708private int 1709check_format_type(const char *ptr, int type) 1710{ 1711 int quad = 0; 1712 if (*ptr == '\0') { 1713 /* Missing format string; bad */ 1714 return -1; 1715 } 1716 1717 switch (type) { 1718 case FILE_FMT_QUAD: 1719 quad = 1; 1720 /*FALLTHROUGH*/ 1721 case FILE_FMT_NUM: 1722 if (*ptr == '-') 1723 ptr++; 1724 if (*ptr == '.') 1725 ptr++; 1726 while (isdigit((unsigned char)*ptr)) ptr++; 1727 if (*ptr == '.') 1728 ptr++; 1729 while (isdigit((unsigned char)*ptr)) ptr++; 1730 if (quad) { 1731 if (*ptr++ != 'l') 1732 return -1; 1733 if (*ptr++ != 'l') 1734 return -1; 1735 } 1736 1737 switch (*ptr++) { 1738 case 'l': 1739 switch (*ptr++) { 1740 case 'i': 1741 case 'd': 1742 case 'u': 1743 case 'x': 1744 case 'X': 1745 return 0; 1746 default: 1747 return -1; 1748 } 1749 1750 case 'h': 1751 switch (*ptr++) { 1752 case 'h': 1753 switch (*ptr++) { 1754 case 'i': 1755 case 'd': 1756 case 'u': 1757 case 'x': 1758 case 'X': 1759 return 0; 1760 default: 1761 return -1; 1762 } 1763 case 'd': 1764 return 0; 1765 default: 1766 return -1; 1767 } 1768 1769 case 'i': 1770 case 'c': 1771 case 'd': 1772 case 'u': 1773 case 'x': 1774 case 'X': 1775 return 0; 1776 1777 default: 1778 return -1; 1779 } 1780 1781 case FILE_FMT_FLOAT: 1782 case FILE_FMT_DOUBLE: 1783 if (*ptr == '-') 1784 ptr++; 1785 if (*ptr == '.') 1786 ptr++; 1787 while (isdigit((unsigned char)*ptr)) ptr++; 1788 if (*ptr == '.') 1789 ptr++; 1790 while (isdigit((unsigned char)*ptr)) ptr++; 1791 1792 switch (*ptr++) { 1793 case 'e': 1794 case 'E': 1795 case 'f': 1796 case 'F': 1797 case 'g': 1798 case 'G': 1799 return 0; 1800 1801 default: 1802 return -1; 1803 } 1804 1805 1806 case FILE_FMT_STR: 1807 if (*ptr == '-') 1808 ptr++; 1809 while (isdigit((unsigned char )*ptr)) 1810 ptr++; 1811 if (*ptr == '.') { 1812 ptr++; 1813 while (isdigit((unsigned char )*ptr)) 1814 ptr++; 1815 } 1816 1817 switch (*ptr++) { 1818 case 's': 1819 return 0; 1820 default: 1821 return -1; 1822 } 1823 1824 default: 1825 /* internal error */ 1826 abort(); 1827 } 1828 /*NOTREACHED*/ 1829 return -1; 1830} 1831 1832/* 1833 * Check that the optional printf format in description matches 1834 * the type of the magic. 1835 */ 1836private int 1837check_format(struct magic_set *ms, struct magic *m) 1838{ 1839 char *ptr; 1840 1841 for (ptr = m->desc; *ptr; ptr++) 1842 if (*ptr == '%') 1843 break; 1844 if (*ptr == '\0') { 1845 /* No format string; ok */ 1846 return 1; 1847 } 1848 1849 assert(file_nformats == file_nnames); 1850 1851 if (m->type >= file_nformats) { 1852 file_magwarn(ms, "Internal error inconsistency between " 1853 "m->type and format strings"); 1854 return -1; 1855 } 1856 if (file_formats[m->type] == FILE_FMT_NONE) { 1857 file_magwarn(ms, "No format string for `%s' with description " 1858 "`%s'", m->desc, file_names[m->type]); 1859 return -1; 1860 } 1861 1862 ptr++; 1863 if (check_format_type(ptr, file_formats[m->type]) == -1) { 1864 /* 1865 * TODO: this error message is unhelpful if the format 1866 * string is not one character long 1867 */ 1868 file_magwarn(ms, "Printf format `%c' is not valid for type " 1869 "`%s' in description `%s'", *ptr ? *ptr : '?', 1870 file_names[m->type], m->desc); 1871 return -1; 1872 } 1873 1874 for (; *ptr; ptr++) { 1875 if (*ptr == '%') { 1876 file_magwarn(ms, 1877 "Too many format strings (should have at most one) " 1878 "for `%s' with description `%s'", 1879 file_names[m->type], m->desc); 1880 return -1; 1881 } 1882 } 1883 return 0; 1884} 1885 1886/* 1887 * Read a numeric value from a pointer, into the value union of a magic 1888 * pointer, according to the magic type. Update the string pointer to point 1889 * just after the number read. Return 0 for success, non-zero for failure. 1890 */ 1891private int 1892getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 1893{ 1894 switch (m->type) { 1895 case FILE_BESTRING16: 1896 case FILE_LESTRING16: 1897 case FILE_STRING: 1898 case FILE_PSTRING: 1899 case FILE_REGEX: 1900 case FILE_SEARCH: 1901 *p = getstr(ms, m, *p, action == FILE_COMPILE); 1902 if (*p == NULL) { 1903 if (ms->flags & MAGIC_CHECK) 1904 file_magwarn(ms, "cannot get string from `%s'", 1905 m->value.s); 1906 return -1; 1907 } 1908 return 0; 1909 case FILE_FLOAT: 1910 case FILE_BEFLOAT: 1911 case FILE_LEFLOAT: 1912 if (m->reln != 'x') { 1913 char *ep; 1914#ifdef HAVE_STRTOF 1915 m->value.f = strtof(*p, &ep); 1916#else 1917 m->value.f = (float)strtod(*p, &ep); 1918#endif 1919 *p = ep; 1920 } 1921 return 0; 1922 case FILE_DOUBLE: 1923 case FILE_BEDOUBLE: 1924 case FILE_LEDOUBLE: 1925 if (m->reln != 'x') { 1926 char *ep; 1927 m->value.d = strtod(*p, &ep); 1928 *p = ep; 1929 } 1930 return 0; 1931 default: 1932 if (m->reln != 'x') { 1933 char *ep; 1934 m->value.q = file_signextend(ms, m, 1935 (uint64_t)strtoull(*p, &ep, 0)); 1936 *p = ep; 1937 eatsize(p); 1938 } 1939 return 0; 1940 } 1941} 1942 1943/* 1944 * Convert a string containing C character escapes. Stop at an unescaped 1945 * space or tab. 1946 * Copy the converted version to "m->value.s", and the length in m->vallen. 1947 * Return updated scan pointer as function result. Warn if set. 1948 */ 1949private const char * 1950getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 1951{ 1952 const char *origs = s; 1953 char *p = m->value.s; 1954 size_t plen = sizeof(m->value.s); 1955 char *origp = p; 1956 char *pmax = p + plen - 1; 1957 int c; 1958 int val; 1959 1960 while ((c = *s++) != '\0') { 1961 if (isspace((unsigned char) c)) 1962 break; 1963 if (p >= pmax) { 1964 file_error(ms, 0, "string too long: `%s'", origs); 1965 return NULL; 1966 } 1967 if (c == '\\') { 1968 switch(c = *s++) { 1969 1970 case '\0': 1971 if (warn) 1972 file_magwarn(ms, "incomplete escape"); 1973 goto out; 1974 1975 case '\t': 1976 if (warn) { 1977 file_magwarn(ms, 1978 "escaped tab found, use \\t instead"); 1979 warn = 0; /* already did */ 1980 } 1981 /*FALLTHROUGH*/ 1982 default: 1983 if (warn) { 1984 if (isprint((unsigned char)c)) { 1985 /* Allow escaping of 1986 * ``relations'' */ 1987 if (strchr("<>&^=!", c) == NULL 1988 && (m->type != FILE_REGEX || 1989 strchr("[]().*?^$|{}", c) 1990 == NULL)) { 1991 file_magwarn(ms, "no " 1992 "need to escape " 1993 "`%c'", c); 1994 } 1995 } else { 1996 file_magwarn(ms, 1997 "unknown escape sequence: " 1998 "\\%03o", c); 1999 } 2000 } 2001 /*FALLTHROUGH*/ 2002 /* space, perhaps force people to use \040? */ 2003 case ' ': 2004#if 0 2005 /* 2006 * Other things people escape, but shouldn't need to, 2007 * so we disallow them 2008 */ 2009 case '\'': 2010 case '"': 2011 case '?': 2012#endif 2013 /* Relations */ 2014 case '>': 2015 case '<': 2016 case '&': 2017 case '^': 2018 case '=': 2019 case '!': 2020 /* and baskslash itself */ 2021 case '\\': 2022 *p++ = (char) c; 2023 break; 2024 2025 case 'a': 2026 *p++ = '\a'; 2027 break; 2028 2029 case 'b': 2030 *p++ = '\b'; 2031 break; 2032 2033 case 'f': 2034 *p++ = '\f'; 2035 break; 2036 2037 case 'n': 2038 *p++ = '\n'; 2039 break; 2040 2041 case 'r': 2042 *p++ = '\r'; 2043 break; 2044 2045 case 't': 2046 *p++ = '\t'; 2047 break; 2048 2049 case 'v': 2050 *p++ = '\v'; 2051 break; 2052 2053 /* \ and up to 3 octal digits */ 2054 case '0': 2055 case '1': 2056 case '2': 2057 case '3': 2058 case '4': 2059 case '5': 2060 case '6': 2061 case '7': 2062 val = c - '0'; 2063 c = *s++; /* try for 2 */ 2064 if (c >= '0' && c <= '7') { 2065 val = (val << 3) | (c - '0'); 2066 c = *s++; /* try for 3 */ 2067 if (c >= '0' && c <= '7') 2068 val = (val << 3) | (c-'0'); 2069 else 2070 --s; 2071 } 2072 else 2073 --s; 2074 *p++ = (char)val; 2075 break; 2076 2077 /* \x and up to 2 hex digits */ 2078 case 'x': 2079 val = 'x'; /* Default if no digits */ 2080 c = hextoint(*s++); /* Get next char */ 2081 if (c >= 0) { 2082 val = c; 2083 c = hextoint(*s++); 2084 if (c >= 0) 2085 val = (val << 4) + c; 2086 else 2087 --s; 2088 } else 2089 --s; 2090 *p++ = (char)val; 2091 break; 2092 } 2093 } else 2094 *p++ = (char)c; 2095 } 2096out: 2097 *p = '\0'; 2098 m->vallen = CAST(unsigned char, (p - origp)); 2099 if (m->type == FILE_PSTRING) 2100 m->vallen += (unsigned char)file_pstring_length_size(m); 2101 return s; 2102} 2103 2104 2105/* Single hex char to int; -1 if not a hex char. */ 2106private int 2107hextoint(int c) 2108{ 2109 if (!isascii((unsigned char) c)) 2110 return -1; 2111 if (isdigit((unsigned char) c)) 2112 return c - '0'; 2113 if ((c >= 'a') && (c <= 'f')) 2114 return c + 10 - 'a'; 2115 if (( c>= 'A') && (c <= 'F')) 2116 return c + 10 - 'A'; 2117 return -1; 2118} 2119 2120 2121/* 2122 * Print a string containing C character escapes. 2123 */ 2124protected void 2125file_showstr(FILE *fp, const char *s, size_t len) 2126{ 2127 char c; 2128 2129 for (;;) { 2130 if (len == ~0U) { 2131 c = *s++; 2132 if (c == '\0') 2133 break; 2134 } 2135 else { 2136 if (len-- == 0) 2137 break; 2138 c = *s++; 2139 } 2140 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2141 (void) fputc(c, fp); 2142 else { 2143 (void) fputc('\\', fp); 2144 switch (c) { 2145 case '\a': 2146 (void) fputc('a', fp); 2147 break; 2148 2149 case '\b': 2150 (void) fputc('b', fp); 2151 break; 2152 2153 case '\f': 2154 (void) fputc('f', fp); 2155 break; 2156 2157 case '\n': 2158 (void) fputc('n', fp); 2159 break; 2160 2161 case '\r': 2162 (void) fputc('r', fp); 2163 break; 2164 2165 case '\t': 2166 (void) fputc('t', fp); 2167 break; 2168 2169 case '\v': 2170 (void) fputc('v', fp); 2171 break; 2172 2173 default: 2174 (void) fprintf(fp, "%.3o", c & 0377); 2175 break; 2176 } 2177 } 2178 } 2179} 2180 2181/* 2182 * eatsize(): Eat the size spec from a number [eg. 10UL] 2183 */ 2184private void 2185eatsize(const char **p) 2186{ 2187 const char *l = *p; 2188 2189 if (LOWCASE(*l) == 'u') 2190 l++; 2191 2192 switch (LOWCASE(*l)) { 2193 case 'l': /* long */ 2194 case 's': /* short */ 2195 case 'h': /* short */ 2196 case 'b': /* char/byte */ 2197 case 'c': /* char/byte */ 2198 l++; 2199 /*FALLTHROUGH*/ 2200 default: 2201 break; 2202 } 2203 2204 *p = l; 2205} 2206 2207#ifndef COMPILE_ONLY 2208/* 2209 * handle a compiled file. 2210 */ 2211private int 2212apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 2213 const char *fn) 2214{ 2215 int fd; 2216 struct stat st; 2217 uint32_t *ptr; 2218 uint32_t version; 2219 int needsbyteswap; 2220 char *dbname = NULL; 2221 void *mm = NULL; 2222 2223 dbname = mkdbname(ms, fn, 0); 2224 if (dbname == NULL) 2225 goto error2; 2226 2227 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2228 goto error2; 2229 2230 if (fstat(fd, &st) == -1) { 2231 file_error(ms, errno, "cannot stat `%s'", dbname); 2232 goto error1; 2233 } 2234 if (st.st_size < 8) { 2235 file_error(ms, 0, "file `%s' is too small", dbname); 2236 goto error1; 2237 } 2238 2239#ifdef QUICK 2240 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2241 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2242 file_error(ms, errno, "cannot map `%s'", dbname); 2243 goto error1; 2244 } 2245#define RET 2 2246#else 2247 if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) { 2248 file_oomem(ms, (size_t)st.st_size); 2249 goto error1; 2250 } 2251 if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) { 2252 file_badread(ms); 2253 goto error1; 2254 } 2255#define RET 1 2256#endif 2257 *magicp = CAST(struct magic *, mm); 2258 (void)close(fd); 2259 fd = -1; 2260 ptr = (uint32_t *)(void *)*magicp; 2261 if (*ptr != MAGICNO) { 2262 if (swap4(*ptr) != MAGICNO) { 2263 file_error(ms, 0, "bad magic in `%s'", dbname); 2264 goto error1; 2265 } 2266 needsbyteswap = 1; 2267 } else 2268 needsbyteswap = 0; 2269 if (needsbyteswap) 2270 version = swap4(ptr[1]); 2271 else 2272 version = ptr[1]; 2273 if (version != VERSIONNO) { 2274 file_error(ms, 0, "File %s supports only version %d magic " 2275 "files. `%s' is version %d", VERSION, 2276 VERSIONNO, dbname, version); 2277 goto error1; 2278 } 2279 *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)); 2280 if (*nmagicp > 0) 2281 (*nmagicp)--; 2282 (*magicp)++; 2283 if (needsbyteswap) 2284 byteswap(*magicp, *nmagicp); 2285 free(dbname); 2286 return RET; 2287 2288error1: 2289 if (fd != -1) 2290 (void)close(fd); 2291 if (mm) { 2292#ifdef QUICK 2293 (void)munmap((void *)mm, (size_t)st.st_size); 2294#else 2295 free(mm); 2296#endif 2297 } else { 2298 *magicp = NULL; 2299 *nmagicp = 0; 2300 } 2301error2: 2302 free(dbname); 2303 return -1; 2304} 2305#endif /* COMPILE_ONLY */ 2306 2307private const uint32_t ar[] = { 2308 MAGICNO, VERSIONNO 2309}; 2310/* 2311 * handle an mmaped file. 2312 */ 2313private int 2314apprentice_compile(struct magic_set *ms, struct magic **magicp, 2315 uint32_t *nmagicp, const char *fn) 2316{ 2317 int fd = -1; 2318 char *dbname; 2319 int rv = -1; 2320 2321 dbname = mkdbname(ms, fn, 1); 2322 2323 if (dbname == NULL) 2324 goto out; 2325 2326 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { 2327 file_error(ms, errno, "cannot open `%s'", dbname); 2328 goto out; 2329 } 2330 2331 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2332 file_error(ms, errno, "error writing `%s'", dbname); 2333 goto out; 2334 } 2335 2336 if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) 2337 != sizeof(struct magic)) { 2338 file_error(ms, errno, "error seeking `%s'", dbname); 2339 goto out; 2340 } 2341 2342 if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 2343 != (ssize_t)(sizeof(struct magic) * *nmagicp)) { 2344 file_error(ms, errno, "error writing `%s'", dbname); 2345 goto out; 2346 } 2347 2348 if (fd != -1) 2349 (void)close(fd); 2350 rv = 0; 2351out: 2352 free(dbname); 2353 return rv; 2354} 2355 2356private const char ext[] = ".mgc"; 2357/* 2358 * make a dbname 2359 */ 2360private char * 2361mkdbname(struct magic_set *ms, const char *fn, int strip) 2362{ 2363 const char *p, *q; 2364 char *buf; 2365 2366 if (strip) { 2367 if ((p = strrchr(fn, '/')) != NULL) 2368 fn = ++p; 2369 } 2370 2371 for (q = fn; *q; q++) 2372 continue; 2373 /* Look for .mgc */ 2374 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2375 if (*p != *q) 2376 break; 2377 2378 /* Did not find .mgc, restore q */ 2379 if (p >= ext) 2380 while (*q) 2381 q++; 2382 2383 q++; 2384 /* Compatibility with old code that looked in .mime */ 2385 if (ms->flags & MAGIC_MIME) { 2386 asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext); 2387 if (access(buf, R_OK) != -1) { 2388 ms->flags &= MAGIC_MIME_TYPE; 2389 return buf; 2390 } 2391 free(buf); 2392 } 2393 asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext); 2394 2395 /* Compatibility with old code that looked in .mime */ 2396 if (strstr(p, ".mime") != NULL) 2397 ms->flags &= MAGIC_MIME_TYPE; 2398 return buf; 2399} 2400 2401#ifndef COMPILE_ONLY 2402/* 2403 * Byteswap an mmap'ed file if needed 2404 */ 2405private void 2406byteswap(struct magic *magic, uint32_t nmagic) 2407{ 2408 uint32_t i; 2409 for (i = 0; i < nmagic; i++) 2410 bs1(&magic[i]); 2411} 2412 2413/* 2414 * swap a short 2415 */ 2416private uint16_t 2417swap2(uint16_t sv) 2418{ 2419 uint16_t rv; 2420 uint8_t *s = (uint8_t *)(void *)&sv; 2421 uint8_t *d = (uint8_t *)(void *)&rv; 2422 d[0] = s[1]; 2423 d[1] = s[0]; 2424 return rv; 2425} 2426 2427/* 2428 * swap an int 2429 */ 2430private uint32_t 2431swap4(uint32_t sv) 2432{ 2433 uint32_t rv; 2434 uint8_t *s = (uint8_t *)(void *)&sv; 2435 uint8_t *d = (uint8_t *)(void *)&rv; 2436 d[0] = s[3]; 2437 d[1] = s[2]; 2438 d[2] = s[1]; 2439 d[3] = s[0]; 2440 return rv; 2441} 2442 2443/* 2444 * swap a quad 2445 */ 2446private uint64_t 2447swap8(uint64_t sv) 2448{ 2449 uint64_t rv; 2450 uint8_t *s = (uint8_t *)(void *)&sv; 2451 uint8_t *d = (uint8_t *)(void *)&rv; 2452#if 0 2453 d[0] = s[3]; 2454 d[1] = s[2]; 2455 d[2] = s[1]; 2456 d[3] = s[0]; 2457 d[4] = s[7]; 2458 d[5] = s[6]; 2459 d[6] = s[5]; 2460 d[7] = s[4]; 2461#else 2462 d[0] = s[7]; 2463 d[1] = s[6]; 2464 d[2] = s[5]; 2465 d[3] = s[4]; 2466 d[4] = s[3]; 2467 d[5] = s[2]; 2468 d[6] = s[1]; 2469 d[7] = s[0]; 2470#endif 2471 return rv; 2472} 2473 2474/* 2475 * byteswap a single magic entry 2476 */ 2477private void 2478bs1(struct magic *m) 2479{ 2480 m->cont_level = swap2(m->cont_level); 2481 m->offset = swap4((uint32_t)m->offset); 2482 m->in_offset = swap4((uint32_t)m->in_offset); 2483 m->lineno = swap4((uint32_t)m->lineno); 2484 if (IS_STRING(m->type)) { 2485 m->str_range = swap4(m->str_range); 2486 m->str_flags = swap4(m->str_flags); 2487 } 2488 else { 2489 m->value.q = swap8(m->value.q); 2490 m->num_mask = swap8(m->num_mask); 2491 } 2492} 2493#endif /* COMPILE_ONLY */ 2494 2495protected size_t 2496file_pstring_length_size(const struct magic *m) 2497{ 2498 switch (m->str_flags & PSTRING_LEN) { 2499 case PSTRING_1_LE: 2500 return 1; 2501 case PSTRING_2_LE: 2502 case PSTRING_2_BE: 2503 return 2; 2504 case PSTRING_4_LE: 2505 case PSTRING_4_BE: 2506 return 4; 2507 default: 2508 abort(); /* Impossible */ 2509 return 1; 2510 } 2511} 2512protected size_t 2513file_pstring_get_length(const struct magic *m, const char *s) 2514{ 2515 size_t len = 0; 2516 2517 switch (m->str_flags & PSTRING_LEN) { 2518 case PSTRING_1_LE: 2519 len = *s; 2520 break; 2521 case PSTRING_2_LE: 2522 len = (s[1] << 8) | s[0]; 2523 break; 2524 case PSTRING_2_BE: 2525 len = (s[0] << 8) | s[1]; 2526 break; 2527 case PSTRING_4_LE: 2528 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2529 break; 2530 case PSTRING_4_BE: 2531 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2532 break; 2533 default: 2534 abort(); /* Impossible */ 2535 } 2536 2537 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2538 len -= file_pstring_length_size(m); 2539 2540 return len; 2541} 2542