apprentice.c revision 186691
1/* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * apprentice - make one pass through /etc/magic, learning its secrets. 30 */ 31 32#include "file.h" 33#include "magic.h" 34#include "patchlevel.h" 35#include <stdlib.h> 36#ifdef HAVE_UNISTD_H 37#include <unistd.h> 38#endif 39#include <string.h> 40#include <assert.h> 41#include <ctype.h> 42#include <fcntl.h> 43#include <sys/stat.h> 44#include <sys/param.h> 45#ifdef QUICK 46#include <sys/mman.h> 47#endif 48#include <sys/types.h> 49#include <dirent.h> 50 51#ifndef lint 52FILE_RCSID("@(#)$File: apprentice.c,v 1.140 2008/07/20 04:02:15 christos Exp $") 53#endif /* lint */ 54 55#define EATAB {while (isascii((unsigned char) *l) && \ 56 isspace((unsigned char) *l)) ++l;} 57#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 58 tolower((unsigned char) (l)) : (l)) 59/* 60 * Work around a bug in headers on Digital Unix. 61 * At least confirmed for: OSF1 V4.0 878 62 */ 63#if defined(__osf__) && defined(__DECC) 64#ifdef MAP_FAILED 65#undef MAP_FAILED 66#endif 67#endif 68 69#ifndef MAP_FAILED 70#define MAP_FAILED (void *) -1 71#endif 72 73#ifndef MAP_FILE 74#define MAP_FILE 0 75#endif 76 77#ifndef MAXPATHLEN 78#define MAXPATHLEN 1024 79#endif 80 81struct magic_entry { 82 struct magic *mp; 83 uint32_t cont_count; 84 uint32_t max_count; 85}; 86 87int file_formats[FILE_NAMES_SIZE]; 88const size_t file_nformats = FILE_NAMES_SIZE; 89const char *file_names[FILE_NAMES_SIZE]; 90const size_t file_nnames = FILE_NAMES_SIZE; 91 92private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 93private int hextoint(int); 94private const char *getstr(struct magic_set *, const char *, char *, int, 95 int *, int); 96private int parse(struct magic_set *, struct magic_entry **, uint32_t *, 97 const char *, size_t, int); 98private void eatsize(const char **); 99private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); 100private size_t apprentice_magic_strength(const struct magic *); 101private int apprentice_sort(const void *, const void *); 102private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, 103 const char *, int); 104private void byteswap(struct magic *, uint32_t); 105private void bs1(struct magic *); 106private uint16_t swap2(uint16_t); 107private uint32_t swap4(uint32_t); 108private uint64_t swap8(uint64_t); 109private void mkdbname(const char *, char **, int); 110private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 111 const char *); 112private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 113 const char *); 114private int check_format_type(const char *, int); 115private int check_format(struct magic_set *, struct magic *); 116private int get_op(char); 117private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 118private int parse_strength(struct magic_set *, struct magic_entry *, 119 const char *); 120 121 122private size_t maxmagic = 0; 123private size_t magicsize = sizeof(struct magic); 124 125private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 126 127private struct { 128 const char *name; 129 size_t len; 130 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 131} bang[] = { 132#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 133 DECLARE_FIELD(mime), 134 DECLARE_FIELD(strength), 135#undef DECLARE_FIELD 136 { NULL, 0, NULL } 137}; 138 139#ifdef COMPILE_ONLY 140 141int main(int, char *[]); 142 143int 144main(int argc, char *argv[]) 145{ 146 int ret; 147 struct magic_set *ms; 148 char *progname; 149 150 if ((progname = strrchr(argv[0], '/')) != NULL) 151 progname++; 152 else 153 progname = argv[0]; 154 155 if (argc != 2) { 156 (void)fprintf(stderr, "Usage: %s file\n", progname); 157 return 1; 158 } 159 160 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 161 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 162 return 1; 163 } 164 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 165 if (ret == 1) 166 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 167 magic_close(ms); 168 return ret; 169} 170#endif /* COMPILE_ONLY */ 171 172static const struct type_tbl_s { 173 const char name[16]; 174 const size_t len; 175 const int type; 176 const int format; 177} type_tbl[] = { 178# define XX(s) s, (sizeof(s) - 1) 179# define XX_NULL "", 0 180 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 181 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 182 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 183 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 184 { XX("string"), FILE_STRING, FILE_FMT_STR }, 185 { XX("date"), FILE_DATE, FILE_FMT_STR }, 186 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 187 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 188 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 189 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 190 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 191 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 192 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 193 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 194 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 195 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 196 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 197 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 198 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 199 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 200 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 201 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 202 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 203 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 204 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 205 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 206 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 207 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 208 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 209 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 210 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 211 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 212 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 213 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 214 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 215 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 216 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 217 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 218 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 219# undef XX 220# undef XX_NULL 221}; 222 223private int 224get_type(const char *l, const char **t) 225{ 226 const struct type_tbl_s *p; 227 228 for (p = type_tbl; p->len; p++) { 229 if (strncmp(l, p->name, p->len) == 0) { 230 if (t) 231 *t = l + p->len; 232 break; 233 } 234 } 235 return p->type; 236} 237 238private void 239init_file_tables(void) 240{ 241 static int done = 0; 242 const struct type_tbl_s *p; 243 244 if (done) 245 return; 246 done++; 247 248 for (p = type_tbl; p->len; p++) { 249 assert(p->type < FILE_NAMES_SIZE); 250 file_names[p->type] = p->name; 251 file_formats[p->type] = p->format; 252 } 253} 254 255/* 256 * Handle one file or directory. 257 */ 258private int 259apprentice_1(struct magic_set *ms, const char *fn, int action, 260 struct mlist *mlist) 261{ 262 struct magic *magic = NULL; 263 uint32_t nmagic = 0; 264 struct mlist *ml; 265 int rv = -1; 266 int mapped; 267 268 if (magicsize != FILE_MAGICSIZE) { 269 file_error(ms, 0, "magic element size %lu != %lu", 270 (unsigned long)sizeof(*magic), 271 (unsigned long)FILE_MAGICSIZE); 272 return -1; 273 } 274 275 if (action == FILE_COMPILE) { 276 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 277 if (rv != 0) 278 return -1; 279 rv = apprentice_compile(ms, &magic, &nmagic, fn); 280 free(magic); 281 return rv; 282 } 283 284#ifndef COMPILE_ONLY 285 if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { 286 if (ms->flags & MAGIC_CHECK) 287 file_magwarn(ms, "using regular magic file `%s'", fn); 288 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 289 if (rv != 0) 290 return -1; 291 } 292 293 mapped = rv; 294 295 if (magic == NULL) { 296 file_delmagic(magic, mapped, nmagic); 297 return -1; 298 } 299 300 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) { 301 file_delmagic(magic, mapped, nmagic); 302 file_oomem(ms, sizeof(*ml)); 303 return -1; 304 } 305 306 ml->magic = magic; 307 ml->nmagic = nmagic; 308 ml->mapped = mapped; 309 310 mlist->prev->next = ml; 311 ml->prev = mlist->prev; 312 ml->next = mlist; 313 mlist->prev = ml; 314 315 return 0; 316#endif /* COMPILE_ONLY */ 317} 318 319protected void 320file_delmagic(struct magic *p, int type, size_t entries) 321{ 322 if (p == NULL) 323 return; 324 switch (type) { 325#ifdef QUICK 326 case 2: 327 p--; 328 (void)munmap((void *)p, sizeof(*p) * (entries + 1)); 329 break; 330#endif 331 case 1: 332 p--; 333 /*FALLTHROUGH*/ 334 case 0: 335 free(p); 336 break; 337 default: 338 abort(); 339 } 340} 341 342/* const char *fn: list of magic files and directories */ 343protected struct mlist * 344file_apprentice(struct magic_set *ms, const char *fn, int action) 345{ 346 char *p, *mfn; 347 int file_err, errs = -1; 348 struct mlist *mlist; 349 350 init_file_tables(); 351 352 if (fn == NULL) 353 fn = getenv("MAGIC"); 354 if (fn == NULL) 355 fn = MAGIC; 356 357 if ((mfn = strdup(fn)) == NULL) { 358 file_oomem(ms, strlen(fn)); 359 return NULL; 360 } 361 fn = mfn; 362 363 if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) { 364 free(mfn); 365 file_oomem(ms, sizeof(*mlist)); 366 return NULL; 367 } 368 mlist->next = mlist->prev = mlist; 369 370 while (fn) { 371 p = strchr(fn, PATHSEP); 372 if (p) 373 *p++ = '\0'; 374 if (*fn == '\0') 375 break; 376 file_err = apprentice_1(ms, fn, action, mlist); 377 errs = MAX(errs, file_err); 378 fn = p; 379 } 380 if (errs == -1) { 381 free(mfn); 382 free(mlist); 383 mlist = NULL; 384 file_error(ms, 0, "could not find any magic files!"); 385 return NULL; 386 } 387 free(mfn); 388 return mlist; 389} 390 391/* 392 * Get weight of this magic entry, for sorting purposes. 393 */ 394private size_t 395apprentice_magic_strength(const struct magic *m) 396{ 397#define MULT 10 398 size_t val = 2 * MULT; /* baseline strength */ 399 400 switch (m->type) { 401 case FILE_DEFAULT: /* make sure this sorts last */ 402 if (m->factor_op != FILE_FACTOR_OP_NONE) 403 abort(); 404 return 0; 405 406 case FILE_BYTE: 407 val += 1 * MULT; 408 break; 409 410 case FILE_SHORT: 411 case FILE_LESHORT: 412 case FILE_BESHORT: 413 val += 2 * MULT; 414 break; 415 416 case FILE_LONG: 417 case FILE_LELONG: 418 case FILE_BELONG: 419 case FILE_MELONG: 420 val += 4 * MULT; 421 break; 422 423 case FILE_PSTRING: 424 case FILE_STRING: 425 val += m->vallen * MULT; 426 break; 427 428 case FILE_BESTRING16: 429 case FILE_LESTRING16: 430 val += m->vallen * MULT / 2; 431 break; 432 433 case FILE_SEARCH: 434 case FILE_REGEX: 435 val += m->vallen * MAX(MULT / m->vallen, 1); 436 break; 437 438 case FILE_DATE: 439 case FILE_LEDATE: 440 case FILE_BEDATE: 441 case FILE_MEDATE: 442 case FILE_LDATE: 443 case FILE_LELDATE: 444 case FILE_BELDATE: 445 case FILE_MELDATE: 446 case FILE_FLOAT: 447 case FILE_BEFLOAT: 448 case FILE_LEFLOAT: 449 val += 4 * MULT; 450 break; 451 452 case FILE_QUAD: 453 case FILE_BEQUAD: 454 case FILE_LEQUAD: 455 case FILE_QDATE: 456 case FILE_LEQDATE: 457 case FILE_BEQDATE: 458 case FILE_QLDATE: 459 case FILE_LEQLDATE: 460 case FILE_BEQLDATE: 461 case FILE_DOUBLE: 462 case FILE_BEDOUBLE: 463 case FILE_LEDOUBLE: 464 val += 8 * MULT; 465 break; 466 467 default: 468 val = 0; 469 (void)fprintf(stderr, "Bad type %d\n", m->type); 470 abort(); 471 } 472 473 switch (m->reln) { 474 case 'x': /* matches anything penalize */ 475 case '!': /* matches almost anything penalize */ 476 val = 0; 477 break; 478 479 case '=': /* Exact match, prefer */ 480 val += MULT; 481 break; 482 483 case '>': 484 case '<': /* comparison match reduce strength */ 485 val -= 2 * MULT; 486 break; 487 488 case '^': 489 case '&': /* masking bits, we could count them too */ 490 val -= MULT; 491 break; 492 493 default: 494 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 495 abort(); 496 } 497 498 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 499 val = 1; 500 501 switch (m->factor_op) { 502 case FILE_FACTOR_OP_NONE: 503 break; 504 case FILE_FACTOR_OP_PLUS: 505 val += m->factor; 506 break; 507 case FILE_FACTOR_OP_MINUS: 508 val -= m->factor; 509 break; 510 case FILE_FACTOR_OP_TIMES: 511 val *= m->factor; 512 break; 513 case FILE_FACTOR_OP_DIV: 514 val /= m->factor; 515 break; 516 default: 517 abort(); 518 } 519 520 /* 521 * Magic entries with no description get a bonus because they depend 522 * on subsequent magic entries to print something. 523 */ 524 if (m->desc[0] == '\0') 525 val++; 526 return val; 527} 528 529/* 530 * Sort callback for sorting entries by "strength" (basically length) 531 */ 532private int 533apprentice_sort(const void *a, const void *b) 534{ 535 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 536 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 537 size_t sa = apprentice_magic_strength(ma->mp); 538 size_t sb = apprentice_magic_strength(mb->mp); 539 if (sa == sb) 540 return 0; 541 else if (sa > sb) 542 return -1; 543 else 544 return 1; 545} 546 547private void 548set_test_type(struct magic *mstart, struct magic *m) 549{ 550 switch (m->type) { 551 case FILE_BYTE: 552 case FILE_SHORT: 553 case FILE_LONG: 554 case FILE_DATE: 555 case FILE_BESHORT: 556 case FILE_BELONG: 557 case FILE_BEDATE: 558 case FILE_LESHORT: 559 case FILE_LELONG: 560 case FILE_LEDATE: 561 case FILE_LDATE: 562 case FILE_BELDATE: 563 case FILE_LELDATE: 564 case FILE_MEDATE: 565 case FILE_MELDATE: 566 case FILE_MELONG: 567 case FILE_QUAD: 568 case FILE_LEQUAD: 569 case FILE_BEQUAD: 570 case FILE_QDATE: 571 case FILE_LEQDATE: 572 case FILE_BEQDATE: 573 case FILE_QLDATE: 574 case FILE_LEQLDATE: 575 case FILE_BEQLDATE: 576 case FILE_FLOAT: 577 case FILE_BEFLOAT: 578 case FILE_LEFLOAT: 579 case FILE_DOUBLE: 580 case FILE_BEDOUBLE: 581 case FILE_LEDOUBLE: 582 case FILE_STRING: 583 case FILE_PSTRING: 584 case FILE_BESTRING16: 585 case FILE_LESTRING16: 586 /* binary test, set flag */ 587 mstart->flag |= BINTEST; 588 break; 589 case FILE_REGEX: 590 case FILE_SEARCH: 591 /* binary test if pattern is not text */ 592 if (file_looks_utf8(m->value.us, m->vallen, NULL, NULL) <= 0) 593 mstart->flag |= BINTEST; 594 break; 595 case FILE_DEFAULT: 596 /* can't deduce anything; we shouldn't see this at the 597 top level anyway */ 598 break; 599 case FILE_INVALID: 600 default: 601 /* invalid search type, but no need to complain here */ 602 break; 603 } 604} 605 606/* 607 * Load and parse one file. 608 */ 609private void 610load_1(struct magic_set *ms, int action, const char *fn, int *errs, 611 struct magic_entry **marray, uint32_t *marraycount) 612{ 613 char line[BUFSIZ]; 614 size_t lineno = 0; 615 FILE *f = fopen(ms->file = fn, "r"); 616 if (f == NULL) { 617 if (errno != ENOENT) 618 file_error(ms, errno, "cannot read magic file `%s'", 619 fn); 620 (*errs)++; 621 } else { 622 /* read and parse this file */ 623 for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) { 624 size_t len; 625 len = strlen(line); 626 if (len == 0) /* null line, garbage, etc */ 627 continue; 628 if (line[len - 1] == '\n') { 629 lineno++; 630 line[len - 1] = '\0'; /* delete newline */ 631 } 632 if (line[0] == '\0') /* empty, do not parse */ 633 continue; 634 if (line[0] == '#') /* comment, do not parse */ 635 continue; 636 if (line[0] == '!' && line[1] == ':') { 637 size_t i; 638 639 for (i = 0; bang[i].name != NULL; i++) { 640 if (len - 2 > bang[i].len && 641 memcmp(bang[i].name, line + 2, 642 bang[i].len) == 0) 643 break; 644 } 645 if (bang[i].name == NULL) { 646 file_error(ms, 0, 647 "Unknown !: entry `%s'", line); 648 (*errs)++; 649 continue; 650 } 651 if (*marraycount == 0) { 652 file_error(ms, 0, 653 "No current entry for :!%s type", 654 bang[i].name); 655 (*errs)++; 656 continue; 657 } 658 if ((*bang[i].fun)(ms, 659 &(*marray)[*marraycount - 1], 660 line + bang[i].len + 2) != 0) { 661 (*errs)++; 662 continue; 663 } 664 continue; 665 } 666 if (parse(ms, marray, marraycount, line, lineno, 667 action) != 0) 668 (*errs)++; 669 } 670 671 (void)fclose(f); 672 } 673} 674 675/* 676 * parse a file or directory of files 677 * const char *fn: name of magic file or directory 678 */ 679private int 680apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 681 const char *fn, int action) 682{ 683 int errs = 0; 684 struct magic_entry *marray; 685 uint32_t marraycount, i, mentrycount = 0, starttest; 686 size_t slen; 687 char subfn[MAXPATHLEN]; 688 struct stat st; 689 DIR *dir; 690 struct dirent *d; 691 692 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 693 694 maxmagic = MAXMAGIS; 695 if ((marray = CAST(struct magic_entry *, calloc(maxmagic, 696 sizeof(*marray)))) == NULL) { 697 file_oomem(ms, maxmagic * sizeof(*marray)); 698 return -1; 699 } 700 marraycount = 0; 701 702 /* print silly verbose header for USG compat. */ 703 if (action == FILE_CHECK) 704 (void)fprintf(stderr, "%s\n", usg_hdr); 705 706 /* load directory or file */ 707 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 708 dir = opendir(fn); 709 if (dir) { 710 while ((d = readdir(dir)) != NULL) { 711 snprintf(subfn, sizeof(subfn), "%s/%s", 712 fn, d->d_name); 713 if (stat(subfn, &st) == 0 && 714 S_ISREG(st.st_mode)) { 715 load_1(ms, action, subfn, &errs, 716 &marray, &marraycount); 717 } 718 } 719 closedir(dir); 720 } else 721 errs++; 722 } else 723 load_1(ms, action, fn, &errs, &marray, &marraycount); 724 if (errs) 725 goto out; 726 727 /* Set types of tests */ 728 for (i = 0; i < marraycount; ) { 729 if (marray[i].mp->cont_level != 0) { 730 i++; 731 continue; 732 } 733 734 starttest = i; 735 do { 736 static const char text[] = "text"; 737 static const char binary[] = "binary"; 738 static const size_t len = sizeof(text); 739 set_test_type(marray[starttest].mp, marray[i].mp); 740 if ((ms->flags & MAGIC_DEBUG) == 0) 741 continue; 742 (void)fprintf(stderr, "%s%s%s: %s\n", 743 marray[i].mp->mimetype, 744 marray[i].mp->mimetype[0] == '\0' ? "" : "; ", 745 marray[i].mp->desc[0] ? marray[i].mp->desc : 746 "(no description)", 747 marray[i].mp->flag & BINTEST ? binary : text); 748 if (marray[i].mp->flag & BINTEST) { 749 char *p = strstr(marray[i].mp->desc, text); 750 if (p && (p == marray[i].mp->desc || 751 isspace((unsigned char)p[-1])) && 752 (p + len - marray[i].mp->desc == 753 MAXstring || (p[len] == '\0' || 754 isspace((unsigned char)p[len])))) 755 (void)fprintf(stderr, "*** Possible " 756 "binary test for text type\n"); 757 } 758 } while (++i < marraycount && marray[i].mp->cont_level != 0); 759 } 760 761 qsort(marray, marraycount, sizeof(*marray), apprentice_sort); 762 763 /* 764 * Make sure that any level 0 "default" line is last (if one exists). 765 */ 766 for (i = 0; i < marraycount; i++) { 767 if (marray[i].mp->cont_level == 0 && 768 marray[i].mp->type == FILE_DEFAULT) { 769 while (++i < marraycount) 770 if (marray[i].mp->cont_level == 0) 771 break; 772 if (i != marraycount) { 773 ms->line = marray[i].mp->lineno; /* XXX - Ugh! */ 774 file_magwarn(ms, 775 "level 0 \"default\" did not sort last"); 776 } 777 break; 778 } 779 } 780 781 for (i = 0; i < marraycount; i++) 782 mentrycount += marray[i].cont_count; 783 784 slen = sizeof(**magicp) * mentrycount; 785 if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) { 786 file_oomem(ms, slen); 787 errs++; 788 goto out; 789 } 790 791 mentrycount = 0; 792 for (i = 0; i < marraycount; i++) { 793 (void)memcpy(*magicp + mentrycount, marray[i].mp, 794 marray[i].cont_count * sizeof(**magicp)); 795 mentrycount += marray[i].cont_count; 796 } 797out: 798 for (i = 0; i < marraycount; i++) 799 free(marray[i].mp); 800 free(marray); 801 if (errs) { 802 *magicp = NULL; 803 *nmagicp = 0; 804 return errs; 805 } else { 806 *nmagicp = mentrycount; 807 return 0; 808 } 809 810} 811 812/* 813 * extend the sign bit if the comparison is to be signed 814 */ 815protected uint64_t 816file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 817{ 818 if (!(m->flag & UNSIGNED)) { 819 switch(m->type) { 820 /* 821 * Do not remove the casts below. They are 822 * vital. When later compared with the data, 823 * the sign extension must have happened. 824 */ 825 case FILE_BYTE: 826 v = (char) v; 827 break; 828 case FILE_SHORT: 829 case FILE_BESHORT: 830 case FILE_LESHORT: 831 v = (short) v; 832 break; 833 case FILE_DATE: 834 case FILE_BEDATE: 835 case FILE_LEDATE: 836 case FILE_MEDATE: 837 case FILE_LDATE: 838 case FILE_BELDATE: 839 case FILE_LELDATE: 840 case FILE_MELDATE: 841 case FILE_LONG: 842 case FILE_BELONG: 843 case FILE_LELONG: 844 case FILE_MELONG: 845 case FILE_FLOAT: 846 case FILE_BEFLOAT: 847 case FILE_LEFLOAT: 848 v = (int32_t) v; 849 break; 850 case FILE_QUAD: 851 case FILE_BEQUAD: 852 case FILE_LEQUAD: 853 case FILE_QDATE: 854 case FILE_QLDATE: 855 case FILE_BEQDATE: 856 case FILE_BEQLDATE: 857 case FILE_LEQDATE: 858 case FILE_LEQLDATE: 859 case FILE_DOUBLE: 860 case FILE_BEDOUBLE: 861 case FILE_LEDOUBLE: 862 v = (int64_t) v; 863 break; 864 case FILE_STRING: 865 case FILE_PSTRING: 866 case FILE_BESTRING16: 867 case FILE_LESTRING16: 868 case FILE_REGEX: 869 case FILE_SEARCH: 870 case FILE_DEFAULT: 871 break; 872 default: 873 if (ms->flags & MAGIC_CHECK) 874 file_magwarn(ms, "cannot happen: m->type=%d\n", 875 m->type); 876 return ~0U; 877 } 878 } 879 return v; 880} 881 882private int 883string_modifier_check(struct magic_set *ms, struct magic *m) 884{ 885 if ((ms->flags & MAGIC_CHECK) == 0) 886 return 0; 887 888 switch (m->type) { 889 case FILE_BESTRING16: 890 case FILE_LESTRING16: 891 if (m->str_flags != 0) { 892 file_magwarn(ms, 893 "no modifiers allowed for 16-bit strings\n"); 894 return -1; 895 } 896 break; 897 case FILE_STRING: 898 case FILE_PSTRING: 899 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 900 file_magwarn(ms, 901 "'/%c' only allowed on regex and search\n", 902 CHAR_REGEX_OFFSET_START); 903 return -1; 904 } 905 break; 906 case FILE_SEARCH: 907 if (m->str_range == 0) { 908 file_magwarn(ms, 909 "missing range; defaulting to %d\n", 910 STRING_DEFAULT_RANGE); 911 m->str_range = STRING_DEFAULT_RANGE; 912 return -1; 913 } 914 break; 915 case FILE_REGEX: 916 if ((m->str_flags & STRING_COMPACT_BLANK) != 0) { 917 file_magwarn(ms, "'/%c' not allowed on regex\n", 918 CHAR_COMPACT_BLANK); 919 return -1; 920 } 921 if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) { 922 file_magwarn(ms, "'/%c' not allowed on regex\n", 923 CHAR_COMPACT_OPTIONAL_BLANK); 924 return -1; 925 } 926 break; 927 default: 928 file_magwarn(ms, "coding error: m->type=%d\n", 929 m->type); 930 return -1; 931 } 932 return 0; 933} 934 935private int 936get_op(char c) 937{ 938 switch (c) { 939 case '&': 940 return FILE_OPAND; 941 case '|': 942 return FILE_OPOR; 943 case '^': 944 return FILE_OPXOR; 945 case '+': 946 return FILE_OPADD; 947 case '-': 948 return FILE_OPMINUS; 949 case '*': 950 return FILE_OPMULTIPLY; 951 case '/': 952 return FILE_OPDIVIDE; 953 case '%': 954 return FILE_OPMODULO; 955 default: 956 return -1; 957 } 958} 959 960#ifdef ENABLE_CONDITIONALS 961private int 962get_cond(const char *l, const char **t) 963{ 964 static const struct cond_tbl_s { 965 char name[8]; 966 size_t len; 967 int cond; 968 } cond_tbl[] = { 969 { "if", 2, COND_IF }, 970 { "elif", 4, COND_ELIF }, 971 { "else", 4, COND_ELSE }, 972 { "", 0, COND_NONE }, 973 }; 974 const struct cond_tbl_s *p; 975 976 for (p = cond_tbl; p->len; p++) { 977 if (strncmp(l, p->name, p->len) == 0 && 978 isspace((unsigned char)l[p->len])) { 979 if (t) 980 *t = l + p->len; 981 break; 982 } 983 } 984 return p->cond; 985} 986 987private int 988check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 989{ 990 int last_cond; 991 last_cond = ms->c.li[cont_level].last_cond; 992 993 switch (cond) { 994 case COND_IF: 995 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 996 if (ms->flags & MAGIC_CHECK) 997 file_magwarn(ms, "syntax error: `if'"); 998 return -1; 999 } 1000 last_cond = COND_IF; 1001 break; 1002 1003 case COND_ELIF: 1004 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1005 if (ms->flags & MAGIC_CHECK) 1006 file_magwarn(ms, "syntax error: `elif'"); 1007 return -1; 1008 } 1009 last_cond = COND_ELIF; 1010 break; 1011 1012 case COND_ELSE: 1013 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1014 if (ms->flags & MAGIC_CHECK) 1015 file_magwarn(ms, "syntax error: `else'"); 1016 return -1; 1017 } 1018 last_cond = COND_NONE; 1019 break; 1020 1021 case COND_NONE: 1022 last_cond = COND_NONE; 1023 break; 1024 } 1025 1026 ms->c.li[cont_level].last_cond = last_cond; 1027 return 0; 1028} 1029#endif /* ENABLE_CONDITIONALS */ 1030 1031/* 1032 * parse one line from magic file, put into magic[index++] if valid 1033 */ 1034private int 1035parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 1036 const char *line, size_t lineno, int action) 1037{ 1038#ifdef ENABLE_CONDITIONALS 1039 static uint32_t last_cont_level = 0; 1040#endif 1041 size_t i; 1042 struct magic_entry *me; 1043 struct magic *m; 1044 const char *l = line; 1045 char *t; 1046 int op; 1047 uint32_t cont_level; 1048 1049 cont_level = 0; 1050 1051 while (*l == '>') { 1052 ++l; /* step over */ 1053 cont_level++; 1054 } 1055#ifdef ENABLE_CONDITIONALS 1056 if (cont_level == 0 || cont_level > last_cont_level) 1057 if (file_check_mem(ms, cont_level) == -1) 1058 return -1; 1059 last_cont_level = cont_level; 1060#endif 1061 1062#define ALLOC_CHUNK (size_t)10 1063#define ALLOC_INCR (size_t)200 1064 1065 if (cont_level != 0) { 1066 if (*nmentryp == 0) { 1067 file_error(ms, 0, "No current entry for continuation"); 1068 return -1; 1069 } 1070 me = &(*mentryp)[*nmentryp - 1]; 1071 if (me->cont_count == me->max_count) { 1072 struct magic *nm; 1073 size_t cnt = me->max_count + ALLOC_CHUNK; 1074 if ((nm = CAST(struct magic *, realloc(me->mp, 1075 sizeof(*nm) * cnt))) == NULL) { 1076 file_oomem(ms, sizeof(*nm) * cnt); 1077 return -1; 1078 } 1079 me->mp = m = nm; 1080 me->max_count = cnt; 1081 } 1082 m = &me->mp[me->cont_count++]; 1083 (void)memset(m, 0, sizeof(*m)); 1084 m->cont_level = cont_level; 1085 } else { 1086 if (*nmentryp == maxmagic) { 1087 struct magic_entry *mp; 1088 1089 maxmagic += ALLOC_INCR; 1090 if ((mp = CAST(struct magic_entry *, 1091 realloc(*mentryp, sizeof(*mp) * maxmagic))) == 1092 NULL) { 1093 file_oomem(ms, sizeof(*mp) * maxmagic); 1094 return -1; 1095 } 1096 (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * 1097 ALLOC_INCR); 1098 *mentryp = mp; 1099 } 1100 me = &(*mentryp)[*nmentryp]; 1101 if (me->mp == NULL) { 1102 size_t len = sizeof(*m) * ALLOC_CHUNK; 1103 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1104 file_oomem(ms, len); 1105 return -1; 1106 } 1107 me->mp = m; 1108 me->max_count = ALLOC_CHUNK; 1109 } else 1110 m = me->mp; 1111 (void)memset(m, 0, sizeof(*m)); 1112 m->factor_op = FILE_FACTOR_OP_NONE; 1113 m->cont_level = 0; 1114 me->cont_count = 1; 1115 } 1116 m->lineno = lineno; 1117 1118 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1119 ++l; /* step over */ 1120 m->flag |= OFFADD; 1121 } 1122 if (*l == '(') { 1123 ++l; /* step over */ 1124 m->flag |= INDIR; 1125 if (m->flag & OFFADD) 1126 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1127 1128 if (*l == '&') { /* m->cont_level == 0 checked below */ 1129 ++l; /* step over */ 1130 m->flag |= OFFADD; 1131 } 1132 } 1133 /* Indirect offsets are not valid at level 0. */ 1134 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1135 if (ms->flags & MAGIC_CHECK) 1136 file_magwarn(ms, "relative offset at level 0"); 1137 1138 /* get offset, then skip over it */ 1139 m->offset = (uint32_t)strtoul(l, &t, 0); 1140 if (l == t) 1141 if (ms->flags & MAGIC_CHECK) 1142 file_magwarn(ms, "offset `%s' invalid", l); 1143 l = t; 1144 1145 if (m->flag & INDIR) { 1146 m->in_type = FILE_LONG; 1147 m->in_offset = 0; 1148 /* 1149 * read [.lbs][+-]nnnnn) 1150 */ 1151 if (*l == '.') { 1152 l++; 1153 switch (*l) { 1154 case 'l': 1155 m->in_type = FILE_LELONG; 1156 break; 1157 case 'L': 1158 m->in_type = FILE_BELONG; 1159 break; 1160 case 'm': 1161 m->in_type = FILE_MELONG; 1162 break; 1163 case 'h': 1164 case 's': 1165 m->in_type = FILE_LESHORT; 1166 break; 1167 case 'H': 1168 case 'S': 1169 m->in_type = FILE_BESHORT; 1170 break; 1171 case 'c': 1172 case 'b': 1173 case 'C': 1174 case 'B': 1175 m->in_type = FILE_BYTE; 1176 break; 1177 case 'e': 1178 case 'f': 1179 case 'g': 1180 m->in_type = FILE_LEDOUBLE; 1181 break; 1182 case 'E': 1183 case 'F': 1184 case 'G': 1185 m->in_type = FILE_BEDOUBLE; 1186 break; 1187 default: 1188 if (ms->flags & MAGIC_CHECK) 1189 file_magwarn(ms, 1190 "indirect offset type `%c' invalid", 1191 *l); 1192 break; 1193 } 1194 l++; 1195 } 1196 1197 m->in_op = 0; 1198 if (*l == '~') { 1199 m->in_op |= FILE_OPINVERSE; 1200 l++; 1201 } 1202 if ((op = get_op(*l)) != -1) { 1203 m->in_op |= op; 1204 l++; 1205 } 1206 if (*l == '(') { 1207 m->in_op |= FILE_OPINDIRECT; 1208 l++; 1209 } 1210 if (isdigit((unsigned char)*l) || *l == '-') { 1211 m->in_offset = (int32_t)strtol(l, &t, 0); 1212 if (l == t) 1213 if (ms->flags & MAGIC_CHECK) 1214 file_magwarn(ms, 1215 "in_offset `%s' invalid", l); 1216 l = t; 1217 } 1218 if (*l++ != ')' || 1219 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1220 if (ms->flags & MAGIC_CHECK) 1221 file_magwarn(ms, 1222 "missing ')' in indirect offset"); 1223 } 1224 EATAB; 1225 1226#ifdef ENABLE_CONDITIONALS 1227 m->cond = get_cond(l, &l); 1228 if (check_cond(ms, m->cond, cont_level) == -1) 1229 return -1; 1230 1231 EATAB; 1232#endif 1233 1234 if (*l == 'u') { 1235 ++l; 1236 m->flag |= UNSIGNED; 1237 } 1238 1239 m->type = get_type(l, &l); 1240 if (m->type == FILE_INVALID) { 1241 if (ms->flags & MAGIC_CHECK) 1242 file_magwarn(ms, "type `%s' invalid", l); 1243 return -1; 1244 } 1245 1246 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1247 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1248 1249 m->mask_op = 0; 1250 if (*l == '~') { 1251 if (!IS_STRING(m->type)) 1252 m->mask_op |= FILE_OPINVERSE; 1253 else if (ms->flags & MAGIC_CHECK) 1254 file_magwarn(ms, "'~' invalid for string types"); 1255 ++l; 1256 } 1257 m->str_range = 0; 1258 m->str_flags = 0; 1259 m->num_mask = 0; 1260 if ((op = get_op(*l)) != -1) { 1261 if (!IS_STRING(m->type)) { 1262 uint64_t val; 1263 ++l; 1264 m->mask_op |= op; 1265 val = (uint64_t)strtoull(l, &t, 0); 1266 l = t; 1267 m->num_mask = file_signextend(ms, m, val); 1268 eatsize(&l); 1269 } 1270 else if (op == FILE_OPDIVIDE) { 1271 int have_range = 0; 1272 while (!isspace((unsigned char)*++l)) { 1273 switch (*l) { 1274 case '0': case '1': case '2': 1275 case '3': case '4': case '5': 1276 case '6': case '7': case '8': 1277 case '9': 1278 if (have_range && 1279 (ms->flags & MAGIC_CHECK)) 1280 file_magwarn(ms, 1281 "multiple ranges"); 1282 have_range = 1; 1283 m->str_range = strtoul(l, &t, 0); 1284 if (m->str_range == 0) 1285 file_magwarn(ms, 1286 "zero range"); 1287 l = t - 1; 1288 break; 1289 case CHAR_COMPACT_BLANK: 1290 m->str_flags |= STRING_COMPACT_BLANK; 1291 break; 1292 case CHAR_COMPACT_OPTIONAL_BLANK: 1293 m->str_flags |= 1294 STRING_COMPACT_OPTIONAL_BLANK; 1295 break; 1296 case CHAR_IGNORE_LOWERCASE: 1297 m->str_flags |= STRING_IGNORE_LOWERCASE; 1298 break; 1299 case CHAR_IGNORE_UPPERCASE: 1300 m->str_flags |= STRING_IGNORE_UPPERCASE; 1301 break; 1302 case CHAR_REGEX_OFFSET_START: 1303 m->str_flags |= REGEX_OFFSET_START; 1304 break; 1305 default: 1306 if (ms->flags & MAGIC_CHECK) 1307 file_magwarn(ms, 1308 "string extension `%c' invalid", 1309 *l); 1310 return -1; 1311 } 1312 /* allow multiple '/' for readability */ 1313 if (l[1] == '/' && 1314 !isspace((unsigned char)l[2])) 1315 l++; 1316 } 1317 if (string_modifier_check(ms, m) == -1) 1318 return -1; 1319 } 1320 else { 1321 if (ms->flags & MAGIC_CHECK) 1322 file_magwarn(ms, "invalid string op: %c", *t); 1323 return -1; 1324 } 1325 } 1326 /* 1327 * We used to set mask to all 1's here, instead let's just not do 1328 * anything if mask = 0 (unless you have a better idea) 1329 */ 1330 EATAB; 1331 1332 switch (*l) { 1333 case '>': 1334 case '<': 1335 m->reln = *l; 1336 ++l; 1337 if (*l == '=') { 1338 if (ms->flags & MAGIC_CHECK) { 1339 file_magwarn(ms, "%c= not supported", 1340 m->reln); 1341 return -1; 1342 } 1343 ++l; 1344 } 1345 break; 1346 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1347 case '&': 1348 case '^': 1349 case '=': 1350 m->reln = *l; 1351 ++l; 1352 if (*l == '=') { 1353 /* HP compat: ignore &= etc. */ 1354 ++l; 1355 } 1356 break; 1357 case '!': 1358 m->reln = *l; 1359 ++l; 1360 break; 1361 default: 1362 m->reln = '='; /* the default relation */ 1363 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1364 isspace((unsigned char)l[1])) || !l[1])) { 1365 m->reln = *l; 1366 ++l; 1367 } 1368 break; 1369 } 1370 /* 1371 * Grab the value part, except for an 'x' reln. 1372 */ 1373 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1374 return -1; 1375 1376 /* 1377 * TODO finish this macro and start using it! 1378 * #define offsetcheck {if (offset > HOWMANY-1) 1379 * magwarn("offset too big"); } 1380 */ 1381 1382 /* 1383 * Now get last part - the description 1384 */ 1385 EATAB; 1386 if (l[0] == '\b') { 1387 ++l; 1388 m->flag |= NOSPACE; 1389 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1390 ++l; 1391 ++l; 1392 m->flag |= NOSPACE; 1393 } 1394 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1395 continue; 1396 if (i == sizeof(m->desc)) { 1397 m->desc[sizeof(m->desc) - 1] = '\0'; 1398 if (ms->flags & MAGIC_CHECK) 1399 file_magwarn(ms, "description `%s' truncated", m->desc); 1400 } 1401 1402 /* 1403 * We only do this check while compiling, or if any of the magic 1404 * files were not compiled. 1405 */ 1406 if (ms->flags & MAGIC_CHECK) { 1407 if (check_format(ms, m) == -1) 1408 return -1; 1409 } 1410#ifndef COMPILE_ONLY 1411 if (action == FILE_CHECK) { 1412 file_mdump(m); 1413 } 1414#endif 1415 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1416 if (m->cont_level == 0) 1417 ++(*nmentryp); /* make room for next */ 1418 return 0; 1419} 1420 1421/* 1422 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1423 * if valid 1424 */ 1425private int 1426parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1427{ 1428 const char *l = line; 1429 char *el; 1430 unsigned long factor; 1431 struct magic *m = &me->mp[0]; 1432 1433 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1434 file_magwarn(ms, 1435 "Current entry already has a strength type: %c %d", 1436 m->factor_op, m->factor); 1437 return -1; 1438 } 1439 EATAB; 1440 switch (*l) { 1441 case FILE_FACTOR_OP_NONE: 1442 case FILE_FACTOR_OP_PLUS: 1443 case FILE_FACTOR_OP_MINUS: 1444 case FILE_FACTOR_OP_TIMES: 1445 case FILE_FACTOR_OP_DIV: 1446 m->factor_op = *l++; 1447 break; 1448 default: 1449 file_magwarn(ms, "Unknown factor op `%c'", *l); 1450 return -1; 1451 } 1452 EATAB; 1453 factor = strtoul(l, &el, 0); 1454 if (factor > 255) { 1455 file_magwarn(ms, "Too large factor `%lu'", factor); 1456 goto out; 1457 } 1458 if (*el && !isspace((unsigned char)*el)) { 1459 file_magwarn(ms, "Bad factor `%s'", l); 1460 goto out; 1461 } 1462 m->factor = (uint8_t)factor; 1463 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1464 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1465 m->factor_op, m->factor); 1466 goto out; 1467 } 1468 return 0; 1469out: 1470 m->factor_op = FILE_FACTOR_OP_NONE; 1471 m->factor = 0; 1472 return -1; 1473} 1474 1475/* 1476 * parse a MIME annotation line from magic file, put into magic[index - 1] 1477 * if valid 1478 */ 1479private int 1480parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 1481{ 1482 size_t i; 1483 const char *l = line; 1484 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1485 1486 if (m->mimetype[0] != '\0') { 1487 file_magwarn(ms, "Current entry already has a MIME type `%s'," 1488 " new type `%s'", m->mimetype, l); 1489 return -1; 1490 } 1491 1492 EATAB; 1493 for (i = 0; 1494 *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l)) 1495 || strchr("-+/.", *l)) && i < sizeof(m->mimetype); 1496 m->mimetype[i++] = *l++) 1497 continue; 1498 if (i == sizeof(m->mimetype)) { 1499 m->desc[sizeof(m->mimetype) - 1] = '\0'; 1500 if (ms->flags & MAGIC_CHECK) 1501 file_magwarn(ms, "MIME type `%s' truncated %zu", 1502 m->mimetype, i); 1503 } else 1504 m->mimetype[i] = '\0'; 1505 1506 if (i > 0) 1507 return 0; 1508 else 1509 return -1; 1510} 1511 1512private int 1513check_format_type(const char *ptr, int type) 1514{ 1515 int quad = 0; 1516 if (*ptr == '\0') { 1517 /* Missing format string; bad */ 1518 return -1; 1519 } 1520 1521 switch (type) { 1522 case FILE_FMT_QUAD: 1523 quad = 1; 1524 /*FALLTHROUGH*/ 1525 case FILE_FMT_NUM: 1526 if (*ptr == '-') 1527 ptr++; 1528 if (*ptr == '.') 1529 ptr++; 1530 while (isdigit((unsigned char)*ptr)) ptr++; 1531 if (*ptr == '.') 1532 ptr++; 1533 while (isdigit((unsigned char)*ptr)) ptr++; 1534 if (quad) { 1535 if (*ptr++ != 'l') 1536 return -1; 1537 if (*ptr++ != 'l') 1538 return -1; 1539 } 1540 1541 switch (*ptr++) { 1542 case 'l': 1543 switch (*ptr++) { 1544 case 'i': 1545 case 'd': 1546 case 'u': 1547 case 'x': 1548 case 'X': 1549 return 0; 1550 default: 1551 return -1; 1552 } 1553 1554 case 'h': 1555 switch (*ptr++) { 1556 case 'h': 1557 switch (*ptr++) { 1558 case 'i': 1559 case 'd': 1560 case 'u': 1561 case 'x': 1562 case 'X': 1563 return 0; 1564 default: 1565 return -1; 1566 } 1567 case 'd': 1568 return 0; 1569 default: 1570 return -1; 1571 } 1572 1573 case 'i': 1574 case 'c': 1575 case 'd': 1576 case 'u': 1577 case 'x': 1578 case 'X': 1579 return 0; 1580 1581 default: 1582 return -1; 1583 } 1584 1585 case FILE_FMT_FLOAT: 1586 case FILE_FMT_DOUBLE: 1587 if (*ptr == '-') 1588 ptr++; 1589 if (*ptr == '.') 1590 ptr++; 1591 while (isdigit((unsigned char)*ptr)) ptr++; 1592 if (*ptr == '.') 1593 ptr++; 1594 while (isdigit((unsigned char)*ptr)) ptr++; 1595 1596 switch (*ptr++) { 1597 case 'e': 1598 case 'E': 1599 case 'f': 1600 case 'F': 1601 case 'g': 1602 case 'G': 1603 return 0; 1604 1605 default: 1606 return -1; 1607 } 1608 1609 1610 case FILE_FMT_STR: 1611 if (*ptr == '-') 1612 ptr++; 1613 while (isdigit((unsigned char )*ptr)) 1614 ptr++; 1615 if (*ptr == '.') { 1616 ptr++; 1617 while (isdigit((unsigned char )*ptr)) 1618 ptr++; 1619 } 1620 1621 switch (*ptr++) { 1622 case 's': 1623 return 0; 1624 default: 1625 return -1; 1626 } 1627 1628 default: 1629 /* internal error */ 1630 abort(); 1631 } 1632 /*NOTREACHED*/ 1633 return -1; 1634} 1635 1636/* 1637 * Check that the optional printf format in description matches 1638 * the type of the magic. 1639 */ 1640private int 1641check_format(struct magic_set *ms, struct magic *m) 1642{ 1643 char *ptr; 1644 1645 for (ptr = m->desc; *ptr; ptr++) 1646 if (*ptr == '%') 1647 break; 1648 if (*ptr == '\0') { 1649 /* No format string; ok */ 1650 return 1; 1651 } 1652 1653 assert(file_nformats == file_nnames); 1654 1655 if (m->type >= file_nformats) { 1656 file_magwarn(ms, "Internal error inconsistency between " 1657 "m->type and format strings"); 1658 return -1; 1659 } 1660 if (file_formats[m->type] == FILE_FMT_NONE) { 1661 file_magwarn(ms, "No format string for `%s' with description " 1662 "`%s'", m->desc, file_names[m->type]); 1663 return -1; 1664 } 1665 1666 ptr++; 1667 if (check_format_type(ptr, file_formats[m->type]) == -1) { 1668 /* 1669 * TODO: this error message is unhelpful if the format 1670 * string is not one character long 1671 */ 1672 file_magwarn(ms, "Printf format `%c' is not valid for type " 1673 "`%s' in description `%s'", 1674 ptr && *ptr ? *ptr : '?', 1675 file_names[m->type], m->desc); 1676 return -1; 1677 } 1678 1679 for (; *ptr; ptr++) { 1680 if (*ptr == '%') { 1681 file_magwarn(ms, 1682 "Too many format strings (should have at most one) " 1683 "for `%s' with description `%s'", 1684 file_names[m->type], m->desc); 1685 return -1; 1686 } 1687 } 1688 return 0; 1689} 1690 1691/* 1692 * Read a numeric value from a pointer, into the value union of a magic 1693 * pointer, according to the magic type. Update the string pointer to point 1694 * just after the number read. Return 0 for success, non-zero for failure. 1695 */ 1696private int 1697getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 1698{ 1699 int slen; 1700 1701 switch (m->type) { 1702 case FILE_BESTRING16: 1703 case FILE_LESTRING16: 1704 case FILE_STRING: 1705 case FILE_PSTRING: 1706 case FILE_REGEX: 1707 case FILE_SEARCH: 1708 *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen, action); 1709 if (*p == NULL) { 1710 if (ms->flags & MAGIC_CHECK) 1711 file_magwarn(ms, "cannot get string from `%s'", 1712 m->value.s); 1713 return -1; 1714 } 1715 m->vallen = slen; 1716 if (m->type == FILE_PSTRING) 1717 m->vallen++; 1718 return 0; 1719 case FILE_FLOAT: 1720 case FILE_BEFLOAT: 1721 case FILE_LEFLOAT: 1722 if (m->reln != 'x') { 1723 char *ep; 1724#ifdef HAVE_STRTOF 1725 m->value.f = strtof(*p, &ep); 1726#else 1727 m->value.f = (float)strtod(*p, &ep); 1728#endif 1729 *p = ep; 1730 } 1731 return 0; 1732 case FILE_DOUBLE: 1733 case FILE_BEDOUBLE: 1734 case FILE_LEDOUBLE: 1735 if (m->reln != 'x') { 1736 char *ep; 1737 m->value.d = strtod(*p, &ep); 1738 *p = ep; 1739 } 1740 return 0; 1741 default: 1742 if (m->reln != 'x') { 1743 char *ep; 1744 m->value.q = file_signextend(ms, m, 1745 (uint64_t)strtoull(*p, &ep, 0)); 1746 *p = ep; 1747 eatsize(p); 1748 } 1749 return 0; 1750 } 1751} 1752 1753/* 1754 * Convert a string containing C character escapes. Stop at an unescaped 1755 * space or tab. 1756 * Copy the converted version to "p", returning its length in *slen. 1757 * Return updated scan pointer as function result. 1758 */ 1759private const char * 1760getstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen, int action) 1761{ 1762 const char *origs = s; 1763 char *origp = p; 1764 char *pmax = p + plen - 1; 1765 int c; 1766 int val; 1767 1768 while ((c = *s++) != '\0') { 1769 if (isspace((unsigned char) c)) 1770 break; 1771 if (p >= pmax) { 1772 file_error(ms, 0, "string too long: `%s'", origs); 1773 return NULL; 1774 } 1775 if (c == '\\') { 1776 switch(c = *s++) { 1777 1778 case '\0': 1779 if (action == FILE_COMPILE) 1780 file_magwarn(ms, "incomplete escape"); 1781 goto out; 1782 1783 case '\t': 1784 if (action == FILE_COMPILE) { 1785 file_magwarn(ms, 1786 "escaped tab found, use \\t instead"); 1787 action++; 1788 } 1789 /*FALLTHROUGH*/ 1790 default: 1791 if (action == FILE_COMPILE) { 1792 if (isprint((unsigned char)c)) 1793 file_magwarn(ms, 1794 "no need to escape `%c'", c); 1795 else 1796 file_magwarn(ms, 1797 "unknown escape sequence: \\%03o", c); 1798 } 1799 /*FALLTHROUGH*/ 1800 /* space, perhaps force people to use \040? */ 1801 case ' ': 1802#if 0 1803 /* 1804 * Other things people escape, but shouldn't need to, 1805 * so we disallow them 1806 */ 1807 case '\'': 1808 case '"': 1809 case '?': 1810#endif 1811 /* Relations */ 1812 case '>': 1813 case '<': 1814 case '&': 1815 case '^': 1816 case '=': 1817 case '!': 1818 /* and baskslash itself */ 1819 case '\\': 1820 *p++ = (char) c; 1821 break; 1822 1823 case 'a': 1824 *p++ = '\a'; 1825 break; 1826 1827 case 'b': 1828 *p++ = '\b'; 1829 break; 1830 1831 case 'f': 1832 *p++ = '\f'; 1833 break; 1834 1835 case 'n': 1836 *p++ = '\n'; 1837 break; 1838 1839 case 'r': 1840 *p++ = '\r'; 1841 break; 1842 1843 case 't': 1844 *p++ = '\t'; 1845 break; 1846 1847 case 'v': 1848 *p++ = '\v'; 1849 break; 1850 1851 /* \ and up to 3 octal digits */ 1852 case '0': 1853 case '1': 1854 case '2': 1855 case '3': 1856 case '4': 1857 case '5': 1858 case '6': 1859 case '7': 1860 val = c - '0'; 1861 c = *s++; /* try for 2 */ 1862 if (c >= '0' && c <= '7') { 1863 val = (val << 3) | (c - '0'); 1864 c = *s++; /* try for 3 */ 1865 if (c >= '0' && c <= '7') 1866 val = (val << 3) | (c-'0'); 1867 else 1868 --s; 1869 } 1870 else 1871 --s; 1872 *p++ = (char)val; 1873 break; 1874 1875 /* \x and up to 2 hex digits */ 1876 case 'x': 1877 val = 'x'; /* Default if no digits */ 1878 c = hextoint(*s++); /* Get next char */ 1879 if (c >= 0) { 1880 val = c; 1881 c = hextoint(*s++); 1882 if (c >= 0) 1883 val = (val << 4) + c; 1884 else 1885 --s; 1886 } else 1887 --s; 1888 *p++ = (char)val; 1889 break; 1890 } 1891 } else 1892 *p++ = (char)c; 1893 } 1894out: 1895 *p = '\0'; 1896 *slen = p - origp; 1897 return s; 1898} 1899 1900 1901/* Single hex char to int; -1 if not a hex char. */ 1902private int 1903hextoint(int c) 1904{ 1905 if (!isascii((unsigned char) c)) 1906 return -1; 1907 if (isdigit((unsigned char) c)) 1908 return c - '0'; 1909 if ((c >= 'a') && (c <= 'f')) 1910 return c + 10 - 'a'; 1911 if (( c>= 'A') && (c <= 'F')) 1912 return c + 10 - 'A'; 1913 return -1; 1914} 1915 1916 1917/* 1918 * Print a string containing C character escapes. 1919 */ 1920protected void 1921file_showstr(FILE *fp, const char *s, size_t len) 1922{ 1923 char c; 1924 1925 for (;;) { 1926 c = *s++; 1927 if (len == ~0U) { 1928 if (c == '\0') 1929 break; 1930 } 1931 else { 1932 if (len-- == 0) 1933 break; 1934 } 1935 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 1936 (void) fputc(c, fp); 1937 else { 1938 (void) fputc('\\', fp); 1939 switch (c) { 1940 case '\a': 1941 (void) fputc('a', fp); 1942 break; 1943 1944 case '\b': 1945 (void) fputc('b', fp); 1946 break; 1947 1948 case '\f': 1949 (void) fputc('f', fp); 1950 break; 1951 1952 case '\n': 1953 (void) fputc('n', fp); 1954 break; 1955 1956 case '\r': 1957 (void) fputc('r', fp); 1958 break; 1959 1960 case '\t': 1961 (void) fputc('t', fp); 1962 break; 1963 1964 case '\v': 1965 (void) fputc('v', fp); 1966 break; 1967 1968 default: 1969 (void) fprintf(fp, "%.3o", c & 0377); 1970 break; 1971 } 1972 } 1973 } 1974} 1975 1976/* 1977 * eatsize(): Eat the size spec from a number [eg. 10UL] 1978 */ 1979private void 1980eatsize(const char **p) 1981{ 1982 const char *l = *p; 1983 1984 if (LOWCASE(*l) == 'u') 1985 l++; 1986 1987 switch (LOWCASE(*l)) { 1988 case 'l': /* long */ 1989 case 's': /* short */ 1990 case 'h': /* short */ 1991 case 'b': /* char/byte */ 1992 case 'c': /* char/byte */ 1993 l++; 1994 /*FALLTHROUGH*/ 1995 default: 1996 break; 1997 } 1998 1999 *p = l; 2000} 2001 2002/* 2003 * handle a compiled file. 2004 */ 2005private int 2006apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 2007 const char *fn) 2008{ 2009 int fd; 2010 struct stat st; 2011 uint32_t *ptr; 2012 uint32_t version; 2013 int needsbyteswap; 2014 char *dbname = NULL; 2015 void *mm = NULL; 2016 2017 mkdbname(fn, &dbname, 0); 2018 if (dbname == NULL) 2019 goto error2; 2020 2021 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2022 goto error2; 2023 2024 if (fstat(fd, &st) == -1) { 2025 file_error(ms, errno, "cannot stat `%s'", dbname); 2026 goto error1; 2027 } 2028 if (st.st_size < 8) { 2029 file_error(ms, 0, "file `%s' is too small", dbname); 2030 goto error1; 2031 } 2032 2033#ifdef QUICK 2034 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2035 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2036 file_error(ms, errno, "cannot map `%s'", dbname); 2037 goto error1; 2038 } 2039#define RET 2 2040#else 2041 if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) { 2042 file_oomem(ms, (size_t)st.st_size); 2043 goto error1; 2044 } 2045 if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) { 2046 file_badread(ms); 2047 goto error1; 2048 } 2049#define RET 1 2050#endif 2051 *magicp = CAST(struct magic *, mm); 2052 (void)close(fd); 2053 fd = -1; 2054 ptr = (uint32_t *)(void *)*magicp; 2055 if (*ptr != MAGICNO) { 2056 if (swap4(*ptr) != MAGICNO) { 2057 file_error(ms, 0, "bad magic in `%s'", dbname); 2058 goto error1; 2059 } 2060 needsbyteswap = 1; 2061 } else 2062 needsbyteswap = 0; 2063 if (needsbyteswap) 2064 version = swap4(ptr[1]); 2065 else 2066 version = ptr[1]; 2067 if (version != VERSIONNO) { 2068 file_error(ms, 0, "File %d.%d supports only %d version magic " 2069 "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel, 2070 VERSIONNO, dbname, version); 2071 goto error1; 2072 } 2073 *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)); 2074 if (*nmagicp > 0) 2075 (*nmagicp)--; 2076 (*magicp)++; 2077 if (needsbyteswap) 2078 byteswap(*magicp, *nmagicp); 2079 free(dbname); 2080 return RET; 2081 2082error1: 2083 if (fd != -1) 2084 (void)close(fd); 2085 if (mm) { 2086#ifdef QUICK 2087 (void)munmap((void *)mm, (size_t)st.st_size); 2088#else 2089 free(mm); 2090#endif 2091 } else { 2092 *magicp = NULL; 2093 *nmagicp = 0; 2094 } 2095error2: 2096 free(dbname); 2097 return -1; 2098} 2099 2100private const uint32_t ar[] = { 2101 MAGICNO, VERSIONNO 2102}; 2103/* 2104 * handle an mmaped file. 2105 */ 2106private int 2107apprentice_compile(struct magic_set *ms, struct magic **magicp, 2108 uint32_t *nmagicp, const char *fn) 2109{ 2110 int fd; 2111 char *dbname; 2112 int rv = -1; 2113 2114 mkdbname(fn, &dbname, 1); 2115 2116 if (dbname == NULL) 2117 goto out; 2118 2119 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { 2120 file_error(ms, errno, "cannot open `%s'", dbname); 2121 goto out; 2122 } 2123 2124 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2125 file_error(ms, errno, "error writing `%s'", dbname); 2126 goto out; 2127 } 2128 2129 if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) 2130 != sizeof(struct magic)) { 2131 file_error(ms, errno, "error seeking `%s'", dbname); 2132 goto out; 2133 } 2134 2135 if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 2136 != (ssize_t)(sizeof(struct magic) * *nmagicp)) { 2137 file_error(ms, errno, "error writing `%s'", dbname); 2138 goto out; 2139 } 2140 2141 (void)close(fd); 2142 rv = 0; 2143out: 2144 free(dbname); 2145 return rv; 2146} 2147 2148private const char ext[] = ".mgc"; 2149/* 2150 * make a dbname 2151 */ 2152private void 2153mkdbname(const char *fn, char **buf, int strip) 2154{ 2155 const char *p; 2156 if (strip) { 2157 if ((p = strrchr(fn, '/')) != NULL) 2158 fn = ++p; 2159 } 2160 2161 if ((p = strstr(fn, ext)) != NULL && p[sizeof(ext) - 1] == '\0') 2162 *buf = strdup(fn); 2163 else 2164 (void)asprintf(buf, "%s%s", fn, ext); 2165 2166 if (buf && *buf && strlen(*buf) > MAXPATHLEN) { 2167 free(*buf); 2168 *buf = NULL; 2169 } 2170} 2171 2172/* 2173 * Byteswap an mmap'ed file if needed 2174 */ 2175private void 2176byteswap(struct magic *magic, uint32_t nmagic) 2177{ 2178 uint32_t i; 2179 for (i = 0; i < nmagic; i++) 2180 bs1(&magic[i]); 2181} 2182 2183/* 2184 * swap a short 2185 */ 2186private uint16_t 2187swap2(uint16_t sv) 2188{ 2189 uint16_t rv; 2190 uint8_t *s = (uint8_t *)(void *)&sv; 2191 uint8_t *d = (uint8_t *)(void *)&rv; 2192 d[0] = s[1]; 2193 d[1] = s[0]; 2194 return rv; 2195} 2196 2197/* 2198 * swap an int 2199 */ 2200private uint32_t 2201swap4(uint32_t sv) 2202{ 2203 uint32_t rv; 2204 uint8_t *s = (uint8_t *)(void *)&sv; 2205 uint8_t *d = (uint8_t *)(void *)&rv; 2206 d[0] = s[3]; 2207 d[1] = s[2]; 2208 d[2] = s[1]; 2209 d[3] = s[0]; 2210 return rv; 2211} 2212 2213/* 2214 * swap a quad 2215 */ 2216private uint64_t 2217swap8(uint64_t sv) 2218{ 2219 uint64_t rv; 2220 uint8_t *s = (uint8_t *)(void *)&sv; 2221 uint8_t *d = (uint8_t *)(void *)&rv; 2222#if 0 2223 d[0] = s[3]; 2224 d[1] = s[2]; 2225 d[2] = s[1]; 2226 d[3] = s[0]; 2227 d[4] = s[7]; 2228 d[5] = s[6]; 2229 d[6] = s[5]; 2230 d[7] = s[4]; 2231#else 2232 d[0] = s[7]; 2233 d[1] = s[6]; 2234 d[2] = s[5]; 2235 d[3] = s[4]; 2236 d[4] = s[3]; 2237 d[5] = s[2]; 2238 d[6] = s[1]; 2239 d[7] = s[0]; 2240#endif 2241 return rv; 2242} 2243 2244/* 2245 * byteswap a single magic entry 2246 */ 2247private void 2248bs1(struct magic *m) 2249{ 2250 m->cont_level = swap2(m->cont_level); 2251 m->offset = swap4((uint32_t)m->offset); 2252 m->in_offset = swap4((uint32_t)m->in_offset); 2253 m->lineno = swap4((uint32_t)m->lineno); 2254 if (IS_STRING(m->type)) { 2255 m->str_range = swap4(m->str_range); 2256 m->str_flags = swap4(m->str_flags); 2257 } 2258 else { 2259 m->value.q = swap8(m->value.q); 2260 m->num_mask = swap8(m->num_mask); 2261 } 2262} 2263