1/*-
2 * Copyright (c) 2008 Christos Zoulas
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
15 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
16 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
18 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26#include "file.h"
27
28#ifndef lint
29FILE_RCSID("@(#)$File: readcdf.c,v 1.49 2014/12/04 15:56:46 christos Exp $")
30#endif
31
32#include <assert.h>
33#include <stdlib.h>
34#include <unistd.h>
35#include <string.h>
36#include <time.h>
37#include <ctype.h>
38
39#include "cdf.h"
40#include "magic.h"
41
42#define NOTMIME(ms) (((ms)->flags & MAGIC_MIME) == 0)
43
44static const struct nv {
45	const char *pattern;
46	const char *mime;
47} app2mime[] =  {
48	{ "Word",			"msword",		},
49	{ "Excel",			"vnd.ms-excel",		},
50	{ "Powerpoint",			"vnd.ms-powerpoint",	},
51	{ "Crystal Reports",		"x-rpt",		},
52	{ "Advanced Installer",		"vnd.ms-msi",		},
53	{ "InstallShield",		"vnd.ms-msi",		},
54	{ "Microsoft Patch Compiler",	"vnd.ms-msi",		},
55	{ "NAnt",			"vnd.ms-msi",		},
56	{ "Windows Installer",		"vnd.ms-msi",		},
57	{ NULL,				NULL,			},
58}, name2mime[] = {
59	{ "WordDocument",		"msword",		},
60	{ "PowerPoint",			"vnd.ms-powerpoint",	},
61	{ "DigitalSignature",		"vnd.ms-msi",		},
62	{ NULL,				NULL,			},
63}, name2desc[] = {
64	{ "WordDocument",		"Microsoft Office Word",},
65	{ "PowerPoint",			"Microsoft PowerPoint",	},
66	{ "DigitalSignature",		"Microsoft Installer",	},
67	{ NULL,				NULL,			},
68};
69
70static const struct cv {
71	uint64_t clsid[2];
72	const char *mime;
73} clsid2mime[] = {
74	{
75		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
76		"x-msi",
77	},
78	{	{ 0,			 0			},
79		NULL,
80	},
81}, clsid2desc[] = {
82	{
83		{ 0x00000000000c1084ULL, 0x46000000000000c0ULL  },
84		"MSI Installer",
85	},
86	{	{ 0,			 0			},
87		NULL,
88	},
89};
90
91private const char *
92cdf_clsid_to_mime(const uint64_t clsid[2], const struct cv *cv)
93{
94	size_t i;
95	for (i = 0; cv[i].mime != NULL; i++) {
96		if (clsid[0] == cv[i].clsid[0] && clsid[1] == cv[i].clsid[1])
97			return cv[i].mime;
98	}
99	return NULL;
100}
101
102private const char *
103cdf_app_to_mime(const char *vbuf, const struct nv *nv)
104{
105	size_t i;
106	const char *rv = NULL;
107#ifdef USE_C_LOCALE
108	locale_t old_lc_ctype, c_lc_ctype;
109
110	c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
111	assert(c_lc_ctype != NULL);
112	old_lc_ctype = uselocale(c_lc_ctype);
113	assert(old_lc_ctype != NULL);
114#endif
115	for (i = 0; nv[i].pattern != NULL; i++)
116		if (strcasestr(vbuf, nv[i].pattern) != NULL) {
117			rv = nv[i].mime;
118			break;
119		}
120#ifdef USE_C_LOCALE
121	(void)uselocale(old_lc_ctype);
122	freelocale(c_lc_ctype);
123#endif
124	return rv;
125}
126
127private int
128cdf_file_property_info(struct magic_set *ms, const cdf_property_info_t *info,
129    size_t count, const cdf_directory_t *root_storage)
130{
131        size_t i;
132        cdf_timestamp_t tp;
133        struct timespec ts;
134        char buf[64];
135        const char *str = NULL;
136        const char *s;
137        int len;
138
139        if (!NOTMIME(ms) && root_storage)
140		str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
141		    clsid2mime);
142
143        for (i = 0; i < count; i++) {
144                cdf_print_property_name(buf, sizeof(buf), info[i].pi_id);
145                switch (info[i].pi_type) {
146                case CDF_NULL:
147                        break;
148                case CDF_SIGNED16:
149                        if (NOTMIME(ms) && file_printf(ms, ", %s: %hd", buf,
150                            info[i].pi_s16) == -1)
151                                return -1;
152                        break;
153                case CDF_SIGNED32:
154                        if (NOTMIME(ms) && file_printf(ms, ", %s: %d", buf,
155                            info[i].pi_s32) == -1)
156                                return -1;
157                        break;
158                case CDF_UNSIGNED32:
159                        if (NOTMIME(ms) && file_printf(ms, ", %s: %u", buf,
160                            info[i].pi_u32) == -1)
161                                return -1;
162                        break;
163                case CDF_FLOAT:
164                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
165                            info[i].pi_f) == -1)
166                                return -1;
167                        break;
168                case CDF_DOUBLE:
169                        if (NOTMIME(ms) && file_printf(ms, ", %s: %g", buf,
170                            info[i].pi_d) == -1)
171                                return -1;
172                        break;
173                case CDF_LENGTH32_STRING:
174                case CDF_LENGTH32_WSTRING:
175                        len = info[i].pi_str.s_len;
176                        if (len > 1) {
177                                char vbuf[1024];
178                                size_t j, k = 1;
179
180                                if (info[i].pi_type == CDF_LENGTH32_WSTRING)
181                                    k++;
182                                s = info[i].pi_str.s_buf;
183                                for (j = 0; j < sizeof(vbuf) && len--; s += k) {
184                                        if (*s == '\0')
185                                                break;
186                                        if (isprint((unsigned char)*s))
187                                                vbuf[j++] = *s;
188                                }
189                                if (j == sizeof(vbuf))
190                                        --j;
191                                vbuf[j] = '\0';
192                                if (NOTMIME(ms)) {
193                                        if (vbuf[0]) {
194                                                if (file_printf(ms, ", %s: %s",
195                                                    buf, vbuf) == -1)
196                                                        return -1;
197                                        }
198                                } else if (str == NULL && info[i].pi_id ==
199				    CDF_PROPERTY_NAME_OF_APPLICATION) {
200					str = cdf_app_to_mime(vbuf, app2mime);
201				}
202			}
203                        break;
204                case CDF_FILETIME:
205                        tp = info[i].pi_tp;
206                        if (tp != 0) {
207				char tbuf[64];
208                                if (tp < 1000000000000000LL) {
209                                        cdf_print_elapsed_time(tbuf,
210                                            sizeof(tbuf), tp);
211                                        if (NOTMIME(ms) && file_printf(ms,
212                                            ", %s: %s", buf, tbuf) == -1)
213                                                return -1;
214                                } else {
215                                        char *c, *ec;
216                                        cdf_timestamp_to_timespec(&ts, tp);
217                                        c = cdf_ctime(&ts.tv_sec, tbuf);
218                                        if (c != NULL &&
219					    (ec = strchr(c, '\n')) != NULL)
220						*ec = '\0';
221
222                                        if (NOTMIME(ms) && file_printf(ms,
223                                            ", %s: %s", buf, c) == -1)
224                                                return -1;
225                                }
226                        }
227                        break;
228                case CDF_CLIPBOARD:
229                        break;
230                default:
231                        return -1;
232                }
233        }
234        if (!NOTMIME(ms)) {
235		if (str == NULL)
236			return 0;
237                if (file_printf(ms, "application/%s", str) == -1)
238                        return -1;
239        }
240        return 1;
241}
242
243private int
244cdf_file_catalog(struct magic_set *ms, const cdf_header_t *h,
245    const cdf_stream_t *sst)
246{
247	cdf_catalog_t *cat;
248	size_t i;
249	char buf[256];
250	cdf_catalog_entry_t *ce;
251
252        if (NOTMIME(ms)) {
253		if (file_printf(ms, "Microsoft Thumbs.db [") == -1)
254			return -1;
255		if (cdf_unpack_catalog(h, sst, &cat) == -1)
256			return -1;
257		ce = cat->cat_e;
258		/* skip first entry since it has a , or paren */
259		for (i = 1; i < cat->cat_num; i++)
260			if (file_printf(ms, "%s%s",
261			    cdf_u16tos8(buf, ce[i].ce_namlen, ce[i].ce_name),
262			    i == cat->cat_num - 1 ? "]" : ", ") == -1) {
263				free(cat);
264				return -1;
265			}
266		free(cat);
267	} else {
268		if (file_printf(ms, "application/CDFV2") == -1)
269			return -1;
270	}
271	return 1;
272}
273
274private int
275cdf_file_summary_info(struct magic_set *ms, const cdf_header_t *h,
276    const cdf_stream_t *sst, const cdf_directory_t *root_storage)
277{
278        cdf_summary_info_header_t si;
279        cdf_property_info_t *info;
280        size_t count;
281        int m;
282
283        if (cdf_unpack_summary_info(sst, h, &si, &info, &count) == -1)
284                return -1;
285
286        if (NOTMIME(ms)) {
287		const char *str;
288
289                if (file_printf(ms, "Composite Document File V2 Document")
290		    == -1)
291                        return -1;
292
293                if (file_printf(ms, ", %s Endian",
294                    si.si_byte_order == 0xfffe ?  "Little" : "Big") == -1)
295                        return -2;
296                switch (si.si_os) {
297                case 2:
298                        if (file_printf(ms, ", Os: Windows, Version %d.%d",
299                            si.si_os_version & 0xff,
300                            (uint32_t)si.si_os_version >> 8) == -1)
301                                return -2;
302                        break;
303                case 1:
304                        if (file_printf(ms, ", Os: MacOS, Version %d.%d",
305                            (uint32_t)si.si_os_version >> 8,
306                            si.si_os_version & 0xff) == -1)
307                                return -2;
308                        break;
309                default:
310                        if (file_printf(ms, ", Os %d, Version: %d.%d", si.si_os,
311                            si.si_os_version & 0xff,
312                            (uint32_t)si.si_os_version >> 8) == -1)
313                                return -2;
314                        break;
315                }
316		if (root_storage) {
317			str = cdf_clsid_to_mime(root_storage->d_storage_uuid,
318			    clsid2desc);
319			if (str) {
320				if (file_printf(ms, ", %s", str) == -1)
321					return -2;
322			}
323		}
324	}
325
326        m = cdf_file_property_info(ms, info, count, root_storage);
327        free(info);
328
329        return m == -1 ? -2 : m;
330}
331
332#ifdef notdef
333private char *
334format_clsid(char *buf, size_t len, const uint64_t uuid[2]) {
335	snprintf(buf, len, "%.8" PRIx64 "-%.4" PRIx64 "-%.4" PRIx64 "-%.4"
336	    PRIx64 "-%.12" PRIx64,
337	    (uuid[0] >> 32) & (uint64_t)0x000000000ffffffffULL,
338	    (uuid[0] >> 16) & (uint64_t)0x0000000000000ffffULL,
339	    (uuid[0] >>  0) & (uint64_t)0x0000000000000ffffULL,
340	    (uuid[1] >> 48) & (uint64_t)0x0000000000000ffffULL,
341	    (uuid[1] >>  0) & (uint64_t)0x0000fffffffffffffULL);
342	return buf;
343}
344#endif
345
346protected int
347file_trycdf(struct magic_set *ms, int fd, const unsigned char *buf,
348    size_t nbytes)
349{
350        cdf_info_t info;
351        cdf_header_t h;
352        cdf_sat_t sat, ssat;
353        cdf_stream_t sst, scn;
354        cdf_dir_t dir;
355        int i;
356        const char *expn = "";
357        const char *corrupt = "corrupt: ";
358        const cdf_directory_t *root_storage;
359
360        info.i_fd = fd;
361        info.i_buf = buf;
362        info.i_len = nbytes;
363        if (ms->flags & MAGIC_APPLE)
364                return 0;
365        if (cdf_read_header(&info, &h) == -1)
366                return 0;
367#ifdef CDF_DEBUG
368        cdf_dump_header(&h);
369#endif
370
371        if ((i = cdf_read_sat(&info, &h, &sat)) == -1) {
372                expn = "Can't read SAT";
373                goto out0;
374        }
375#ifdef CDF_DEBUG
376        cdf_dump_sat("SAT", &sat, CDF_SEC_SIZE(&h));
377#endif
378
379        if ((i = cdf_read_ssat(&info, &h, &sat, &ssat)) == -1) {
380                expn = "Can't read SSAT";
381                goto out1;
382        }
383#ifdef CDF_DEBUG
384        cdf_dump_sat("SSAT", &ssat, CDF_SHORT_SEC_SIZE(&h));
385#endif
386
387        if ((i = cdf_read_dir(&info, &h, &sat, &dir)) == -1) {
388                expn = "Can't read directory";
389                goto out2;
390        }
391
392        if ((i = cdf_read_short_stream(&info, &h, &sat, &dir, &sst,
393	    &root_storage)) == -1) {
394                expn = "Cannot read short stream";
395                goto out3;
396        }
397#ifdef CDF_DEBUG
398        cdf_dump_dir(&info, &h, &sat, &ssat, &sst, &dir);
399#endif
400#ifdef notdef
401	if (root_storage) {
402		if (NOTMIME(ms)) {
403			char clsbuf[128];
404			if (file_printf(ms, "CLSID %s, ",
405			    format_clsid(clsbuf, sizeof(clsbuf),
406			    root_storage->d_storage_uuid)) == -1)
407				return -1;
408		}
409	}
410#endif
411
412	if ((i = cdf_read_user_stream(&info, &h, &sat, &ssat, &sst, &dir,
413	    "FileHeader", &scn)) != -1) {
414#define HWP5_SIGNATURE "HWP Document File"
415		if (scn.sst_dirlen >= sizeof(HWP5_SIGNATURE) - 1
416		    && memcmp(scn.sst_tab, HWP5_SIGNATURE,
417		    sizeof(HWP5_SIGNATURE) - 1) == 0) {
418		    if (NOTMIME(ms)) {
419			if (file_printf(ms,
420			    "Hangul (Korean) Word Processor File 5.x") == -1)
421			    return -1;
422		    } else {
423			if (file_printf(ms, "application/x-hwp") == -1)
424			    return -1;
425		    }
426		    i = 1;
427		    goto out5;
428		} else {
429		    free(scn.sst_tab);
430		    scn.sst_tab = NULL;
431		    scn.sst_len = 0;
432		    scn.sst_dirlen = 0;
433		}
434	}
435
436        if ((i = cdf_read_summary_info(&info, &h, &sat, &ssat, &sst, &dir,
437            &scn)) == -1) {
438                if (errno == ESRCH) {
439			if ((i = cdf_read_catalog(&info, &h, &sat, &ssat, &sst,
440			    &dir, &scn)) == -1) {
441				corrupt = expn;
442				if ((i = cdf_read_encrypted_package(&info, &h,
443				    &sat, &ssat, &sst, &dir, &scn)) == -1)
444					expn = "No summary info";
445				else {
446					expn = "Encrypted";
447					i = -1;
448				}
449				goto out4;
450			}
451#ifdef CDF_DEBUG
452			cdf_dump_catalog(&h, &scn);
453#endif
454			if ((i = cdf_file_catalog(ms, &h, &scn))
455			    < 0)
456				expn = "Can't expand catalog";
457                } else {
458                        expn = "Cannot read summary info";
459                }
460                goto out4;
461        }
462#ifdef CDF_DEBUG
463        cdf_dump_summary_info(&h, &scn);
464#endif
465        if ((i = cdf_file_summary_info(ms, &h, &scn, root_storage)) < 0)
466            expn = "Can't expand summary_info";
467
468	if (i == 0) {
469		const char *str = NULL;
470		cdf_directory_t *d;
471		char name[__arraycount(d->d_name)];
472		size_t j, k;
473
474		for (j = 0; str == NULL && j < dir.dir_len; j++) {
475			d = &dir.dir_tab[j];
476			for (k = 0; k < sizeof(name); k++)
477				name[k] = (char)cdf_tole2(d->d_name[k]);
478			str = cdf_app_to_mime(name,
479			    NOTMIME(ms) ? name2desc : name2mime);
480		}
481		if (NOTMIME(ms)) {
482			if (str != NULL) {
483				if (file_printf(ms, "%s", str) == -1)
484					return -1;
485				i = 1;
486			}
487		} else {
488			if (str == NULL)
489				str = "vnd.ms-office";
490			if (file_printf(ms, "application/%s", str) == -1)
491				return -1;
492			i = 1;
493		}
494	}
495out5:
496        free(scn.sst_tab);
497out4:
498        free(sst.sst_tab);
499out3:
500        free(dir.dir_tab);
501out2:
502        free(ssat.sat_tab);
503out1:
504        free(sat.sat_tab);
505out0:
506	if (i == -1) {
507	    if (NOTMIME(ms)) {
508		if (file_printf(ms,
509		    "Composite Document File V2 Document") == -1)
510		    return -1;
511		if (*expn)
512		    if (file_printf(ms, ", %s%s", corrupt, expn) == -1)
513			return -1;
514	    } else {
515		if (file_printf(ms, "application/CDFV2-%s",
516		    *corrupt ? "corrupt" : "encrypted") == -1)
517		    return -1;
518	    }
519	    i = 1;
520	}
521        return i;
522}
523