1/* $NetBSD: funcs.c,v 1.4 2012/02/22 17:53:51 christos Exp $ */ 2 3/* 4 * Copyright (c) Christos Zoulas 2003. 5 * All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29#include "file.h" 30 31#ifndef lint 32#if 0 33FILE_RCSID("@(#)$File: funcs.c,v 1.60 2011/12/08 12:38:24 rrt Exp $") 34#else 35__RCSID("$NetBSD: funcs.c,v 1.4 2012/02/22 17:53:51 christos Exp $"); 36#endif 37#endif /* lint */ 38 39#include "magic.h" 40#include <stdarg.h> 41#include <stdlib.h> 42#include <string.h> 43#include <ctype.h> 44#if defined(HAVE_WCHAR_H) 45#include <wchar.h> 46#endif 47#if defined(HAVE_WCTYPE_H) 48#include <wctype.h> 49#endif 50#if defined(HAVE_LIMITS_H) 51#include <limits.h> 52#endif 53 54#ifndef SIZE_MAX 55#define SIZE_MAX ((size_t)~0) 56#endif 57 58/* 59 * Like printf, only we append to a buffer. 60 */ 61protected int 62file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 63{ 64 int len; 65 char *buf, *newstr; 66 67 len = vasprintf(&buf, fmt, ap); 68 if (len < 0) 69 goto out; 70 71 if (ms->o.buf != NULL) { 72 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 73 free(buf); 74 if (len < 0) 75 goto out; 76 free(ms->o.buf); 77 buf = newstr; 78 } 79 ms->o.buf = buf; 80 return 0; 81out: 82 file_error(ms, errno, "vasprintf failed"); 83 return -1; 84} 85 86protected int 87file_printf(struct magic_set *ms, const char *fmt, ...) 88{ 89 int rv; 90 va_list ap; 91 92 va_start(ap, fmt); 93 rv = file_vprintf(ms, fmt, ap); 94 va_end(ap); 95 return rv; 96} 97 98/* 99 * error - print best error message possible 100 */ 101/*VARARGS*/ 102private void 103file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 104 size_t lineno) 105{ 106 /* Only the first error is ok */ 107 if (ms->event_flags & EVENT_HAD_ERR) 108 return; 109 if (lineno != 0) { 110 free(ms->o.buf); 111 ms->o.buf = NULL; 112 file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 113 } 114 file_vprintf(ms, f, va); 115 if (error > 0) 116 file_printf(ms, " (%s)", strerror(error)); 117 ms->event_flags |= EVENT_HAD_ERR; 118 ms->error = error; 119} 120 121/*VARARGS*/ 122protected void 123file_error(struct magic_set *ms, int error, const char *f, ...) 124{ 125 va_list va; 126 va_start(va, f); 127 file_error_core(ms, error, f, va, 0); 128 va_end(va); 129} 130 131/* 132 * Print an error with magic line number. 133 */ 134/*VARARGS*/ 135protected void 136file_magerror(struct magic_set *ms, const char *f, ...) 137{ 138 va_list va; 139 va_start(va, f); 140 file_error_core(ms, 0, f, va, ms->line); 141 va_end(va); 142} 143 144protected void 145file_oomem(struct magic_set *ms, size_t len) 146{ 147 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 148 len); 149} 150 151protected void 152file_badseek(struct magic_set *ms) 153{ 154 file_error(ms, errno, "error seeking"); 155} 156 157protected void 158file_badread(struct magic_set *ms) 159{ 160 file_error(ms, errno, "error reading"); 161} 162 163#ifndef COMPILE_ONLY 164/*ARGSUSED*/ 165protected int 166file_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((__unused__)), 167 const void *buf, size_t nb) 168{ 169 int m = 0, rv = 0, looks_text = 0; 170 int mime = ms->flags & MAGIC_MIME; 171 const unsigned char *ubuf = CAST(const unsigned char *, buf); 172 unichar *u8buf = NULL; 173 size_t ulen; 174 const char *code = NULL; 175 const char *code_mime = "binary"; 176 const char *type = NULL; 177 178 179 180 if (nb == 0) { 181 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 182 file_printf(ms, mime ? "application/x-empty" : 183 "empty") == -1) 184 return -1; 185 return 1; 186 } else if (nb == 1) { 187 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 188 file_printf(ms, mime ? "application/octet-stream" : 189 "very short file (no magic)") == -1) 190 return -1; 191 return 1; 192 } 193 194 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 195 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 196 &code, &code_mime, &type); 197 } 198 199#ifdef __EMX__ 200 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 201 switch (file_os2_apptype(ms, inname, buf, nb)) { 202 case -1: 203 return -1; 204 case 0: 205 break; 206 default: 207 return 1; 208 } 209 } 210#endif 211#if HAVE_FORK 212 /* try compression stuff */ 213 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 214 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 215 if ((ms->flags & MAGIC_DEBUG) != 0) 216 (void)fprintf(stderr, "zmagic %d\n", m); 217 goto done; 218 } 219#endif 220 /* Check if we have a tar file */ 221 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 222 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 223 if ((ms->flags & MAGIC_DEBUG) != 0) 224 (void)fprintf(stderr, "tar %d\n", m); 225 goto done; 226 } 227 228 /* Check if we have a CDF file */ 229 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 230 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 231 if ((ms->flags & MAGIC_DEBUG) != 0) 232 (void)fprintf(stderr, "cdf %d\n", m); 233 goto done; 234 } 235 236 /* try soft magic tests */ 237 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 238 if ((m = file_softmagic(ms, ubuf, nb, BINTEST, 239 looks_text)) != 0) { 240 if ((ms->flags & MAGIC_DEBUG) != 0) 241 (void)fprintf(stderr, "softmagic %d\n", m); 242#ifdef BUILTIN_ELF 243 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 244 nb > 5 && fd != -1) { 245 /* 246 * We matched something in the file, so this 247 * *might* be an ELF file, and the file is at 248 * least 5 bytes long, so if it's an ELF file 249 * it has at least one byte past the ELF magic 250 * number - try extracting information from the 251 * ELF headers that cannot easily * be 252 * extracted with rules in the magic file. 253 */ 254 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 255 if ((ms->flags & MAGIC_DEBUG) != 0) 256 (void)fprintf(stderr, 257 "elf %d\n", m); 258 } 259#endif 260 goto done; 261 } 262 263 /* try text properties */ 264 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 265 266 if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 267 if ((ms->flags & MAGIC_DEBUG) != 0) 268 (void)fprintf(stderr, "ascmagic %d\n", m); 269 goto done; 270 } 271 272 /* try to discover text encoding */ 273 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 274 if (looks_text == 0) 275 if ((m = file_ascmagic_with_encoding( ms, ubuf, 276 nb, u8buf, ulen, code, type, looks_text)) 277 != 0) { 278 if ((ms->flags & MAGIC_DEBUG) != 0) 279 (void)fprintf(stderr, 280 "ascmagic/enc %d\n", m); 281 goto done; 282 } 283 } 284 } 285 286 /* give up */ 287 m = 1; 288 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 289 file_printf(ms, mime ? "application/octet-stream" : "data") == -1) { 290 rv = -1; 291 } 292 done: 293 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 294 if (ms->flags & MAGIC_MIME_TYPE) 295 if (file_printf(ms, "; charset=") == -1) 296 rv = -1; 297 if (file_printf(ms, "%s", code_mime) == -1) 298 rv = -1; 299 } 300 free(u8buf); 301 if (rv) 302 return rv; 303 304 return m; 305} 306#endif 307 308protected int 309file_reset(struct magic_set *ms) 310{ 311 if (ms->mlist == NULL) { 312 file_error(ms, 0, "no magic files loaded"); 313 return -1; 314 } 315 if (ms->o.buf) { 316 free(ms->o.buf); 317 ms->o.buf = NULL; 318 } 319 if (ms->o.pbuf) { 320 free(ms->o.pbuf); 321 ms->o.pbuf = NULL; 322 } 323 ms->event_flags &= ~EVENT_HAD_ERR; 324 ms->error = -1; 325 return 0; 326} 327 328#define OCTALIFY(n, o) \ 329 /*LINTED*/ \ 330 (void)(*(n)++ = '\\', \ 331 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 332 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 333 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 334 (o)++) 335 336protected const char * 337file_getbuffer(struct magic_set *ms) 338{ 339 char *pbuf, *op, *np; 340 size_t psize, len; 341 342 if (ms->event_flags & EVENT_HAD_ERR) 343 return NULL; 344 345 if (ms->flags & MAGIC_RAW) 346 return ms->o.buf; 347 348 if (ms->o.buf == NULL) 349 return NULL; 350 351 /* * 4 is for octal representation, + 1 is for NUL */ 352 len = strlen(ms->o.buf); 353 if (len > (SIZE_MAX - 1) / 4) { 354 file_oomem(ms, len); 355 return NULL; 356 } 357 psize = len * 4 + 1; 358 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 359 file_oomem(ms, psize); 360 return NULL; 361 } 362 ms->o.pbuf = pbuf; 363 364#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 365 { 366 mbstate_t state; 367 wchar_t nextchar; 368 int mb_conv = 1; 369 size_t bytesconsumed; 370 char *eop; 371 (void)memset(&state, 0, sizeof(mbstate_t)); 372 373 np = ms->o.pbuf; 374 op = ms->o.buf; 375 eop = op + len; 376 377 while (op < eop) { 378 bytesconsumed = mbrtowc(&nextchar, op, 379 (size_t)(eop - op), &state); 380 if (bytesconsumed == (size_t)(-1) || 381 bytesconsumed == (size_t)(-2)) { 382 mb_conv = 0; 383 break; 384 } 385 386 if (iswprint(nextchar)) { 387 (void)memcpy(np, op, bytesconsumed); 388 op += bytesconsumed; 389 np += bytesconsumed; 390 } else { 391 while (bytesconsumed-- > 0) 392 OCTALIFY(np, op); 393 } 394 } 395 *np = '\0'; 396 397 /* Parsing succeeded as a multi-byte sequence */ 398 if (mb_conv != 0) 399 return ms->o.pbuf; 400 } 401#endif 402 403 for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 404 if (isprint((unsigned char)*op)) { 405 *np++ = *op++; 406 } else { 407 OCTALIFY(np, op); 408 } 409 } 410 *np = '\0'; 411 return ms->o.pbuf; 412} 413 414protected int 415file_check_mem(struct magic_set *ms, unsigned int level) 416{ 417 size_t len; 418 419 if (level >= ms->c.len) { 420 len = (ms->c.len += 20) * sizeof(*ms->c.li); 421 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 422 malloc(len) : 423 realloc(ms->c.li, len)); 424 if (ms->c.li == NULL) { 425 file_oomem(ms, len); 426 return -1; 427 } 428 } 429 ms->c.li[level].got_match = 0; 430#ifdef ENABLE_CONDITIONALS 431 ms->c.li[level].last_match = 0; 432 ms->c.li[level].last_cond = COND_NONE; 433#endif /* ENABLE_CONDITIONALS */ 434 return 0; 435} 436 437protected size_t 438file_printedlen(const struct magic_set *ms) 439{ 440 return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 441} 442 443protected int 444file_replace(struct magic_set *ms, const char *pat, const char *rep) 445{ 446 regex_t rx; 447 int rc; 448 449 rc = regcomp(&rx, pat, REG_EXTENDED); 450 if (rc) { 451 char errmsg[512]; 452 (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); 453 file_magerror(ms, "regex error %d, (%s)", rc, errmsg); 454 return -1; 455 } else { 456 regmatch_t rm; 457 int nm = 0; 458 while (regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 459 ms->o.buf[rm.rm_so] = '\0'; 460 if (file_printf(ms, "%s%s", rep, 461 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 462 return -1; 463 nm++; 464 } 465 regfree(&rx); 466 return nm; 467 } 468} 469