1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH		256	/* Maximum match length. */
50#define MINMATCH		3	/* Minimum match length. */
51/*
52 * Literal table format:
53 * +0              +256                      +510
54 * +---------------+-------------------------+
55 * | literal code  |       match length      |
56 * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57 * +---------------+-------------------------+
58 *  <---          LT_BITLEN_SIZE         --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE		(3 + 16)
65
66struct lzh_dec {
67	/* Decoding status. */
68	int     		 state;
69
70	/*
71	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72	 * data.
73	 */
74	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	/* dirname and filename could be in different codepages */
179	struct archive_string_conv *sconv_dir;
180	struct archive_string_conv *sconv_fname;
181	struct archive_string_conv *opt_sconv;
182
183	struct archive_string 	 dirname;
184	struct archive_string 	 filename;
185	struct archive_wstring	 ws;
186
187	unsigned char		 dos_attr;
188
189	/* Flag to mark progress that an archive was read their first header.*/
190	char			 found_first_header;
191	/* Flag to mark that indicates an empty directory. */
192	char			 directory;
193
194	/* Flags to mark progress of decompression. */
195	char			 decompress_init;
196	char			 end_of_entry;
197	char			 end_of_entry_cleanup;
198	char			 entry_is_compressed;
199
200	char			 format_name[64];
201
202	struct lzh_stream	 strm;
203};
204
205/*
206 * LHA header common member offset.
207 */
208#define H_METHOD_OFFSET	2	/* Compress type. */
209#define H_ATTR_OFFSET	19	/* DOS attribute. */
210#define H_LEVEL_OFFSET	20	/* Header Level.  */
211#define H_SIZE		22	/* Minimum header size. */
212
213static int      archive_read_format_lha_bid(struct archive_read *, int);
214static int      archive_read_format_lha_options(struct archive_read *,
215		    const char *, const char *);
216static int	archive_read_format_lha_read_header(struct archive_read *,
217		    struct archive_entry *);
218static int	archive_read_format_lha_read_data(struct archive_read *,
219		    const void **, size_t *, int64_t *);
220static int	archive_read_format_lha_read_data_skip(struct archive_read *);
221static int	archive_read_format_lha_cleanup(struct archive_read *);
222
223static void	lha_replace_path_separator(struct lha *,
224		    struct archive_entry *);
225static int	lha_read_file_header_0(struct archive_read *, struct lha *);
226static int	lha_read_file_header_1(struct archive_read *, struct lha *);
227static int	lha_read_file_header_2(struct archive_read *, struct lha *);
228static int	lha_read_file_header_3(struct archive_read *, struct lha *);
229static int	lha_read_file_extended_header(struct archive_read *,
230		    struct lha *, uint16_t *, int, size_t, size_t *);
231static size_t	lha_check_header_format(const void *);
232static int	lha_skip_sfx(struct archive_read *);
233static time_t	lha_dos_time(const unsigned char *);
234static time_t	lha_win_time(uint64_t, long *);
235static unsigned char	lha_calcsum(unsigned char, const void *,
236		    int, size_t);
237static int	lha_parse_linkname(struct archive_wstring *,
238		    struct archive_wstring *);
239static int	lha_read_data_none(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static int	lha_read_data_lzh(struct archive_read *, const void **,
242		    size_t *, int64_t *);
243static void	lha_crc16_init(void);
244static uint16_t lha_crc16(uint16_t, const void *, size_t);
245static int	lzh_decode_init(struct lzh_stream *, const char *);
246static void	lzh_decode_free(struct lzh_stream *);
247static int	lzh_decode(struct lzh_stream *, int);
248static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249static int	lzh_huffman_init(struct huffman *, size_t, int);
250static void	lzh_huffman_free(struct huffman *);
251static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252static int	lzh_make_fake_table(struct huffman *, uint16_t);
253static int	lzh_make_huffman_table(struct huffman *);
254static inline int lzh_decode_huffman(struct huffman *, unsigned);
255static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
256
257
258int
259archive_read_support_format_lha(struct archive *_a)
260{
261	struct archive_read *a = (struct archive_read *)_a;
262	struct lha *lha;
263	int r;
264
265	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
267
268	lha = (struct lha *)calloc(1, sizeof(*lha));
269	if (lha == NULL) {
270		archive_set_error(&a->archive, ENOMEM,
271		    "Can't allocate lha data");
272		return (ARCHIVE_FATAL);
273	}
274	archive_string_init(&lha->ws);
275
276	r = __archive_read_register_format(a,
277	    lha,
278	    "lha",
279	    archive_read_format_lha_bid,
280	    archive_read_format_lha_options,
281	    archive_read_format_lha_read_header,
282	    archive_read_format_lha_read_data,
283	    archive_read_format_lha_read_data_skip,
284	    NULL,
285	    archive_read_format_lha_cleanup,
286	    NULL,
287	    NULL);
288
289	if (r != ARCHIVE_OK)
290		free(lha);
291	return (ARCHIVE_OK);
292}
293
294static size_t
295lha_check_header_format(const void *h)
296{
297	const unsigned char *p = h;
298	size_t next_skip_bytes;
299
300	switch (p[H_METHOD_OFFSET+3]) {
301	/*
302	 * "-lh0-" ... "-lh7-" "-lhd-"
303	 * "-lzs-" "-lz5-"
304	 */
305	case '0': case '1': case '2': case '3':
306	case '4': case '5': case '6': case '7':
307	case 'd':
308	case 's':
309		next_skip_bytes = 4;
310
311		/* b0 == 0 means the end of an LHa archive file.	*/
312		if (p[0] == 0)
313			break;
314		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315		    ||  p[H_METHOD_OFFSET+4] != '-')
316			break;
317
318		if (p[H_METHOD_OFFSET+2] == 'h') {
319			/* "-lh?-" */
320			if (p[H_METHOD_OFFSET+3] == 's')
321				break;
322			if (p[H_LEVEL_OFFSET] == 0)
323				return (0);
324			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325				return (0);
326		}
327		if (p[H_METHOD_OFFSET+2] == 'z') {
328			/* LArc extensions: -lzs-,-lz4- and -lz5- */
329			if (p[H_LEVEL_OFFSET] != 0)
330				break;
331			if (p[H_METHOD_OFFSET+3] == 's'
332			    || p[H_METHOD_OFFSET+3] == '4'
333			    || p[H_METHOD_OFFSET+3] == '5')
334				return (0);
335		}
336		break;
337	case 'h': next_skip_bytes = 1; break;
338	case 'z': next_skip_bytes = 1; break;
339	case 'l': next_skip_bytes = 2; break;
340	case '-': next_skip_bytes = 3; break;
341	default : next_skip_bytes = 4; break;
342	}
343
344	return (next_skip_bytes);
345}
346
347static int
348archive_read_format_lha_bid(struct archive_read *a, int best_bid)
349{
350	const char *p;
351	const void *buff;
352	ssize_t bytes_avail, offset, window;
353	size_t next;
354
355	/* If there's already a better bid than we can ever
356	   make, don't bother testing. */
357	if (best_bid > 30)
358		return (-1);
359
360	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361		return (-1);
362
363	if (lha_check_header_format(p) == 0)
364		return (30);
365
366	if (p[0] == 'M' && p[1] == 'Z') {
367		/* PE file */
368		offset = 0;
369		window = 4096;
370		while (offset < (1024 * 20)) {
371			buff = __archive_read_ahead(a, offset + window,
372			    &bytes_avail);
373			if (buff == NULL) {
374				/* Remaining bytes are less than window. */
375				window >>= 1;
376				if (window < (H_SIZE + 3))
377					return (0);
378				continue;
379			}
380			p = (const char *)buff + offset;
381			while (p + H_SIZE < (const char *)buff + bytes_avail) {
382				if ((next = lha_check_header_format(p)) == 0)
383					return (30);
384				p += next;
385			}
386			offset = p - (const char *)buff;
387		}
388	}
389	return (0);
390}
391
392static int
393archive_read_format_lha_options(struct archive_read *a,
394    const char *key, const char *val)
395{
396	struct lha *lha;
397	int ret = ARCHIVE_FAILED;
398
399	lha = (struct lha *)(a->format->data);
400	if (strcmp(key, "hdrcharset")  == 0) {
401		if (val == NULL || val[0] == 0)
402			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403			    "lha: hdrcharset option needs a character-set name");
404		else {
405			lha->opt_sconv =
406			    archive_string_conversion_from_charset(
407				&a->archive, val, 0);
408			if (lha->opt_sconv != NULL)
409				ret = ARCHIVE_OK;
410			else
411				ret = ARCHIVE_FATAL;
412		}
413		return (ret);
414	}
415
416	/* Note: The "warn" return is just to inform the options
417	 * supervisor that we didn't handle it.  It will generate
418	 * a suitable error if no one used this option. */
419	return (ARCHIVE_WARN);
420}
421
422static int
423lha_skip_sfx(struct archive_read *a)
424{
425	const void *h;
426	const char *p, *q;
427	size_t next, skip;
428	ssize_t bytes, window;
429
430	window = 4096;
431	for (;;) {
432		h = __archive_read_ahead(a, window, &bytes);
433		if (h == NULL) {
434			/* Remaining bytes are less than window. */
435			window >>= 1;
436			if (window < (H_SIZE + 3))
437				goto fatal;
438			continue;
439		}
440		if (bytes < H_SIZE)
441			goto fatal;
442		p = h;
443		q = p + bytes;
444
445		/*
446		 * Scan ahead until we find something that looks
447		 * like the lha header.
448		 */
449		while (p + H_SIZE < q) {
450			if ((next = lha_check_header_format(p)) == 0) {
451				skip = p - (const char *)h;
452				__archive_read_consume(a, skip);
453				return (ARCHIVE_OK);
454			}
455			p += next;
456		}
457		skip = p - (const char *)h;
458		__archive_read_consume(a, skip);
459	}
460fatal:
461	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462	    "Couldn't find out LHa header");
463	return (ARCHIVE_FATAL);
464}
465
466static int
467truncated_error(struct archive_read *a)
468{
469	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470	    "Truncated LHa header");
471	return (ARCHIVE_FATAL);
472}
473
474static int
475archive_read_format_lha_read_header(struct archive_read *a,
476    struct archive_entry *entry)
477{
478	struct archive_wstring linkname;
479	struct archive_wstring pathname;
480	struct lha *lha;
481	const unsigned char *p;
482	const char *signature;
483	int err;
484	struct archive_mstring conv_buffer;
485	const wchar_t *conv_buffer_p;
486
487	lha_crc16_init();
488
489	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490	if (a->archive.archive_format_name == NULL)
491		a->archive.archive_format_name = "lha";
492
493	lha = (struct lha *)(a->format->data);
494	lha->decompress_init = 0;
495	lha->end_of_entry = 0;
496	lha->end_of_entry_cleanup = 0;
497	lha->entry_unconsumed = 0;
498
499	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
500		/*
501		 * LHa archiver added 0 to the tail of its archive file as
502		 * the mark of the end of the archive.
503		 */
504		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505		if (signature == NULL || signature[0] == 0)
506			return (ARCHIVE_EOF);
507		return (truncated_error(a));
508	}
509
510	signature = (const char *)p;
511	if (lha->found_first_header == 0 &&
512	    signature[0] == 'M' && signature[1] == 'Z') {
513                /* This is an executable?  Must be self-extracting... 	*/
514		err = lha_skip_sfx(a);
515		if (err < ARCHIVE_WARN)
516			return (err);
517
518		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519			return (truncated_error(a));
520		signature = (const char *)p;
521	}
522	/* signature[0] == 0 means the end of an LHa archive file. */
523	if (signature[0] == 0)
524		return (ARCHIVE_EOF);
525
526	/*
527	 * Check the header format and method type.
528	 */
529	if (lha_check_header_format(p) != 0) {
530		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
531		    "Bad LHa file");
532		return (ARCHIVE_FATAL);
533	}
534
535	/* We've found the first header. */
536	lha->found_first_header = 1;
537	/* Set a default value and common data */
538	lha->header_size = 0;
539	lha->level = p[H_LEVEL_OFFSET];
540	lha->method[0] = p[H_METHOD_OFFSET+1];
541	lha->method[1] = p[H_METHOD_OFFSET+2];
542	lha->method[2] = p[H_METHOD_OFFSET+3];
543	if (memcmp(lha->method, "lhd", 3) == 0)
544		lha->directory = 1;
545	else
546		lha->directory = 0;
547	if (memcmp(lha->method, "lh0", 3) == 0 ||
548	    memcmp(lha->method, "lz4", 3) == 0)
549		lha->entry_is_compressed = 0;
550	else
551		lha->entry_is_compressed = 1;
552
553	lha->compsize = 0;
554	lha->origsize = 0;
555	lha->setflag = 0;
556	lha->birthtime = 0;
557	lha->birthtime_tv_nsec = 0;
558	lha->mtime = 0;
559	lha->mtime_tv_nsec = 0;
560	lha->atime = 0;
561	lha->atime_tv_nsec = 0;
562	lha->mode = (lha->directory)? 0777 : 0666;
563	lha->uid = 0;
564	lha->gid = 0;
565	archive_string_empty(&lha->dirname);
566	archive_string_empty(&lha->filename);
567	lha->dos_attr = 0;
568	if (lha->opt_sconv != NULL) {
569		lha->sconv_dir = lha->opt_sconv;
570		lha->sconv_fname = lha->opt_sconv;
571	} else {
572		lha->sconv_dir = NULL;
573		lha->sconv_fname = NULL;
574	}
575
576	switch (p[H_LEVEL_OFFSET]) {
577	case 0:
578		err = lha_read_file_header_0(a, lha);
579		break;
580	case 1:
581		err = lha_read_file_header_1(a, lha);
582		break;
583	case 2:
584		err = lha_read_file_header_2(a, lha);
585		break;
586	case 3:
587		err = lha_read_file_header_3(a, lha);
588		break;
589	default:
590		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
592		err = ARCHIVE_FATAL;
593		break;
594	}
595	if (err < ARCHIVE_WARN)
596		return (err);
597
598
599	if (!lha->directory && archive_strlen(&lha->filename) == 0)
600		/* The filename has not been set */
601		return (truncated_error(a));
602
603	/*
604	 * Make a pathname from a dirname and a filename, after converting to Unicode.
605	 * This is because codepages might differ between dirname and filename.
606	*/
607	archive_string_init(&pathname);
608	archive_string_init(&linkname);
609	archive_string_init(&conv_buffer.aes_mbs);
610	archive_string_init(&conv_buffer.aes_mbs_in_locale);
611	archive_string_init(&conv_buffer.aes_utf8);
612	archive_string_init(&conv_buffer.aes_wcs);
613	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614		archive_set_error(&a->archive,
615			ARCHIVE_ERRNO_FILE_FORMAT,
616			"Pathname cannot be converted "
617			"from %s to Unicode.",
618			archive_string_conversion_charset_name(lha->sconv_dir));
619		err = ARCHIVE_FATAL;
620	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
621		err = ARCHIVE_FATAL;
622	if (err == ARCHIVE_FATAL) {
623		archive_mstring_clean(&conv_buffer);
624		archive_wstring_free(&pathname);
625		archive_wstring_free(&linkname);
626		return (err);
627	}
628	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
629
630	archive_string_empty(&conv_buffer.aes_mbs);
631	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632	archive_string_empty(&conv_buffer.aes_utf8);
633	archive_wstring_empty(&conv_buffer.aes_wcs);
634	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635		archive_set_error(&a->archive,
636			ARCHIVE_ERRNO_FILE_FORMAT,
637			"Pathname cannot be converted "
638			"from %s to Unicode.",
639			archive_string_conversion_charset_name(lha->sconv_fname));
640		err = ARCHIVE_FATAL;
641	}
642	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
643		err = ARCHIVE_FATAL;
644	if (err == ARCHIVE_FATAL) {
645		archive_mstring_clean(&conv_buffer);
646		archive_wstring_free(&pathname);
647		archive_wstring_free(&linkname);
648		return (err);
649	}
650	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651	archive_mstring_clean(&conv_buffer);
652
653	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
654		/*
655	 	 * Extract the symlink-name if it's included in the pathname.
656	 	 */
657		if (!lha_parse_linkname(&linkname, &pathname)) {
658			/* We couldn't get the symlink-name. */
659			archive_set_error(&a->archive,
660		    	    ARCHIVE_ERRNO_FILE_FORMAT,
661			    "Unknown symlink-name");
662			archive_wstring_free(&pathname);
663			archive_wstring_free(&linkname);
664			return (ARCHIVE_FAILED);
665		}
666	} else {
667		/*
668		 * Make sure a file-type is set.
669		 * The mode has been overridden if it is in the extended data.
670		 */
671		lha->mode = (lha->mode & ~AE_IFMT) |
672		    ((lha->directory)? AE_IFDIR: AE_IFREG);
673	}
674	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675	    (lha->dos_attr & 1) != 0)
676		lha->mode &= ~(0222);/* read only. */
677
678	/*
679	 * Set basic file parameters.
680	 */
681	archive_entry_copy_pathname_w(entry, pathname.s);
682	archive_wstring_free(&pathname);
683	if (archive_strlen(&linkname) > 0) {
684		archive_entry_copy_symlink_w(entry, linkname.s);
685	} else
686		archive_entry_set_symlink(entry, NULL);
687	archive_wstring_free(&linkname);
688	/*
689	 * When a header level is 0, there is a possibility that
690	 * a pathname and a symlink has '\' character, a directory
691	 * separator in DOS/Windows. So we should convert it to '/'.
692	 */
693	if (p[H_LEVEL_OFFSET] == 0)
694		lha_replace_path_separator(lha, entry);
695
696	archive_entry_set_mode(entry, lha->mode);
697	archive_entry_set_uid(entry, lha->uid);
698	archive_entry_set_gid(entry, lha->gid);
699	if (archive_strlen(&lha->uname) > 0)
700		archive_entry_set_uname(entry, lha->uname.s);
701	if (archive_strlen(&lha->gname) > 0)
702		archive_entry_set_gname(entry, lha->gname.s);
703	if (lha->setflag & BIRTHTIME_IS_SET) {
704		archive_entry_set_birthtime(entry, lha->birthtime,
705		    lha->birthtime_tv_nsec);
706		archive_entry_set_ctime(entry, lha->birthtime,
707		    lha->birthtime_tv_nsec);
708	} else {
709		archive_entry_unset_birthtime(entry);
710		archive_entry_unset_ctime(entry);
711	}
712	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713	if (lha->setflag & ATIME_IS_SET)
714		archive_entry_set_atime(entry, lha->atime,
715		    lha->atime_tv_nsec);
716	else
717		archive_entry_unset_atime(entry);
718	if (lha->directory || archive_entry_symlink(entry) != NULL)
719		archive_entry_unset_size(entry);
720	else
721		archive_entry_set_size(entry, lha->origsize);
722
723	/*
724	 * Prepare variables used to read a file content.
725	 */
726	lha->entry_bytes_remaining = lha->compsize;
727	if (lha->entry_bytes_remaining < 0) {
728		archive_set_error(&a->archive,
729		    ARCHIVE_ERRNO_FILE_FORMAT,
730		    "Invalid LHa entry size");
731		return (ARCHIVE_FATAL);
732	}
733	lha->entry_offset = 0;
734	lha->entry_crc_calculated = 0;
735
736	/*
737	 * This file does not have a content.
738	 */
739	if (lha->directory || lha->compsize == 0)
740		lha->end_of_entry = 1;
741
742	snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-",
743	    lha->method[0], lha->method[1], lha->method[2]);
744	a->archive.archive_format_name = lha->format_name;
745
746	return (err);
747}
748
749/*
750 * Replace a DOS path separator '\' by a character '/'.
751 * Some multi-byte character set have  a character '\' in its second byte.
752 */
753static void
754lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
755{
756	const wchar_t *wp;
757	size_t i;
758
759	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760		archive_wstrcpy(&(lha->ws), wp);
761		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762			if (lha->ws.s[i] == L'\\')
763				lha->ws.s[i] = L'/';
764		}
765		archive_entry_copy_pathname_w(entry, lha->ws.s);
766	}
767
768	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769		archive_wstrcpy(&(lha->ws), wp);
770		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771			if (lha->ws.s[i] == L'\\')
772				lha->ws.s[i] = L'/';
773		}
774		archive_entry_copy_symlink_w(entry, lha->ws.s);
775	}
776}
777
778/*
779 * Header 0 format
780 *
781 * +0              +1         +2               +7                  +11
782 * +---------------+----------+----------------+-------------------+
783 * |header size(*1)|header sum|compression type|compressed size(*2)|
784 * +---------------+----------+----------------+-------------------+
785 *                             <---------------------(*1)----------*
786 *
787 * +11               +15       +17       +19            +20              +21
788 * +-----------------+---------+---------+--------------+----------------+
789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790 * +-----------------+---------+---------+--------------+----------------+
791 * *--------------------------------(*1)---------------------------------*
792 *
793 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
794 * +---------------+---------+----------+----------------+------------------+
795 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
796 * +---------------+---------+----------+----------------+------------------+
797 *                  <--(*3)->                             <------(*2)------>
798 * *----------------------(*1)-------------------------->
799 *
800 */
801#define H0_HEADER_SIZE_OFFSET	0
802#define H0_HEADER_SUM_OFFSET	1
803#define H0_COMP_SIZE_OFFSET	7
804#define H0_ORIG_SIZE_OFFSET	11
805#define H0_DOS_TIME_OFFSET	15
806#define H0_NAME_LEN_OFFSET	21
807#define H0_FILE_NAME_OFFSET	22
808#define H0_FIXED_SIZE		24
809static int
810lha_read_file_header_0(struct archive_read *a, struct lha *lha)
811{
812	const unsigned char *p;
813	int extdsize, namelen;
814	unsigned char headersum, sum_calculated;
815
816	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817		return (truncated_error(a));
818	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819	headersum = p[H0_HEADER_SUM_OFFSET];
820	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823	namelen = p[H0_NAME_LEN_OFFSET];
824	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827		    "Invalid LHa header");
828		return (ARCHIVE_FATAL);
829	}
830	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831		return (truncated_error(a));
832
833	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834	/* When extdsize == -2, A CRC16 value is not present in the header. */
835	if (extdsize >= 0) {
836		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837		lha->setflag |= CRC_IS_SET;
838	}
839	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
840
841	/* Read an extended header */
842	if (extdsize > 0) {
843		/* This extended data is set by 'LHa for UNIX' only.
844		 * Maybe fixed size.
845		 */
846		p += H0_FILE_NAME_OFFSET + namelen + 2;
847		if (p[0] == 'U' && extdsize == 12) {
848			/* p[1] is a minor version. */
849			lha->mtime = archive_le32dec(&p[2]);
850			lha->mode = archive_le16dec(&p[6]);
851			lha->uid = archive_le16dec(&p[8]);
852			lha->gid = archive_le16dec(&p[10]);
853			lha->setflag |= UNIX_MODE_IS_SET;
854		}
855	}
856	__archive_read_consume(a, lha->header_size);
857
858	if (sum_calculated != headersum) {
859		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860		    "LHa header sum error");
861		return (ARCHIVE_FATAL);
862	}
863
864	return (ARCHIVE_OK);
865}
866
867/*
868 * Header 1 format
869 *
870 * +0              +1         +2               +7            +11
871 * +---------------+----------+----------------+-------------+
872 * |header size(*1)|header sum|compression type|skip size(*2)|
873 * +---------------+----------+----------------+-------------+
874 *                             <---------------(*1)----------*
875 *
876 * +11               +15       +17       +19            +20              +21
877 * +-----------------+---------+---------+--------------+----------------+
878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879 * +-----------------+---------+---------+--------------+----------------+
880 * *-------------------------------(*1)----------------------------------*
881 *
882 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
883 * +---------------+---------+----------+-----------+-----------+
884 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
885 * +---------------+---------+----------+-----------+-----------+
886 *                  <--(*3)->
887 * *----------------------------(*1)----------------------------*
888 *
889 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
890 * +----------------+---------------------+------------------------+
891 * |next header size| extended header(*5) |     compressed data    |
892 * +----------------+---------------------+------------------------+
893 * *------(*1)-----> <--------------------(*2)-------------------->
894 */
895#define H1_HEADER_SIZE_OFFSET	0
896#define H1_HEADER_SUM_OFFSET	1
897#define H1_COMP_SIZE_OFFSET	7
898#define H1_ORIG_SIZE_OFFSET	11
899#define H1_DOS_TIME_OFFSET	15
900#define H1_NAME_LEN_OFFSET	21
901#define H1_FILE_NAME_OFFSET	22
902#define H1_FIXED_SIZE		27
903static int
904lha_read_file_header_1(struct archive_read *a, struct lha *lha)
905{
906	const unsigned char *p;
907	size_t extdsize;
908	int i, err, err2;
909	int namelen, padding;
910	unsigned char headersum, sum_calculated;
911
912	err = ARCHIVE_OK;
913
914	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915		return (truncated_error(a));
916
917	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918	headersum = p[H1_HEADER_SUM_OFFSET];
919	/* Note: An extended header size is included in a compsize. */
920	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923	namelen = p[H1_NAME_LEN_OFFSET];
924	/* Calculate a padding size. The result will be normally 0 only(?) */
925	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
926
927	if (namelen > 230 || padding < 0)
928		goto invalid;
929
930	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931		return (truncated_error(a));
932
933	for (i = 0; i < namelen; i++) {
934		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935			goto invalid;/* Invalid filename. */
936	}
937	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939	lha->setflag |= CRC_IS_SET;
940
941	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942	/* Consume used bytes but not include `next header size' data
943	 * since it will be consumed in lha_read_file_extended_header(). */
944	__archive_read_consume(a, lha->header_size - 2);
945
946	/* Read extended headers */
947	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948	    (size_t)(lha->compsize + 2), &extdsize);
949	if (err2 < ARCHIVE_WARN)
950		return (err2);
951	if (err2 < err)
952		err = err2;
953	/* Get a real compressed file size. */
954	lha->compsize -= extdsize - 2;
955
956	if (lha->compsize < 0)
957		goto invalid;	/* Invalid compressed file size */
958
959	if (sum_calculated != headersum) {
960		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961		    "LHa header sum error");
962		return (ARCHIVE_FATAL);
963	}
964	return (err);
965invalid:
966	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967	    "Invalid LHa header");
968	return (ARCHIVE_FATAL);
969}
970
971/*
972 * Header 2 format
973 *
974 * +0              +2               +7                  +11               +15
975 * +---------------+----------------+-------------------+-----------------+
976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977 * +---------------+----------------+-------------------+-----------------+
978 *  <--------------------------------(*1)---------------------------------*
979 *
980 * +15               +19          +20              +21        +23         +24
981 * +-----------------+------------+----------------+----------+-----------+
982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
983 * +-----------------+------------+----------------+----------+-----------+
984 * *---------------------------------(*1)---------------------------------*
985 *
986 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
987 * +----------------+-------------------+-------------+-------------------+
988 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
989 * +----------------+-------------------+-------------+-------------------+
990 * *--------------------------(*1)-------------------> <------(*2)------->
991 *
992 */
993#define H2_HEADER_SIZE_OFFSET	0
994#define H2_COMP_SIZE_OFFSET	7
995#define H2_ORIG_SIZE_OFFSET	11
996#define H2_TIME_OFFSET		15
997#define H2_CRC_OFFSET		21
998#define H2_FIXED_SIZE		24
999static int
1000lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1001{
1002	const unsigned char *p;
1003	size_t extdsize;
1004	int err, padding;
1005	uint16_t header_crc;
1006
1007	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008		return (truncated_error(a));
1009
1010	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015	lha->setflag |= CRC_IS_SET;
1016
1017	if (lha->header_size < H2_FIXED_SIZE) {
1018		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019		    "Invalid LHa header size");
1020		return (ARCHIVE_FATAL);
1021	}
1022
1023	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024	__archive_read_consume(a, H2_FIXED_SIZE);
1025
1026	/* Read extended headers */
1027	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1029	if (err < ARCHIVE_WARN)
1030		return (err);
1031
1032	/* Calculate a padding size. The result will be normally 0 or 1. */
1033	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1034	if (padding > 0) {
1035		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036			return (truncated_error(a));
1037		header_crc = lha_crc16(header_crc, p, padding);
1038		__archive_read_consume(a, padding);
1039	}
1040
1041	if (header_crc != lha->header_crc) {
1042#ifndef DONT_FAIL_ON_CRC_ERROR
1043		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1044		    "LHa header CRC error");
1045		return (ARCHIVE_FATAL);
1046#endif
1047	}
1048	return (err);
1049}
1050
1051/*
1052 * Header 3 format
1053 *
1054 * +0           +2               +7                  +11               +15
1055 * +------------+----------------+-------------------+-----------------+
1056 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1057 * +------------+----------------+-------------------+-----------------+
1058 *  <-------------------------------(*1)-------------------------------*
1059 *
1060 * +15               +19          +20              +21        +23         +24
1061 * +-----------------+------------+----------------+----------+-----------+
1062 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1063 * +-----------------+------------+----------------+----------+-----------+
1064 * *--------------------------------(*1)----------------------------------*
1065 *
1066 * +24             +28              +32                 +32+(*3)
1067 * +---------------+----------------+-------------------+-----------------+
1068 * |header size(*1)|next header size|extended header(*3)| compressed data |
1069 * +---------------+----------------+-------------------+-----------------+
1070 * *------------------------(*1)-----------------------> <------(*2)----->
1071 *
1072 */
1073#define H3_FIELD_LEN_OFFSET	0
1074#define H3_COMP_SIZE_OFFSET	7
1075#define H3_ORIG_SIZE_OFFSET	11
1076#define H3_TIME_OFFSET		15
1077#define H3_CRC_OFFSET		21
1078#define H3_HEADER_SIZE_OFFSET	24
1079#define H3_FIXED_SIZE		28
1080static int
1081lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1082{
1083	const unsigned char *p;
1084	size_t extdsize;
1085	int err;
1086	uint16_t header_crc;
1087
1088	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1089		return (truncated_error(a));
1090
1091	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1092		goto invalid;
1093	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1094	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1095	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1096	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1097	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1098	lha->setflag |= CRC_IS_SET;
1099
1100	if (lha->header_size < H3_FIXED_SIZE + 4)
1101		goto invalid;
1102	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1103	__archive_read_consume(a, H3_FIXED_SIZE);
1104
1105	/* Read extended headers */
1106	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1107		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1108	if (err < ARCHIVE_WARN)
1109		return (err);
1110
1111	if (header_crc != lha->header_crc) {
1112#ifndef DONT_FAIL_ON_CRC_ERROR
1113		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1114		    "LHa header CRC error");
1115		return (ARCHIVE_FATAL);
1116#endif
1117	}
1118	return (err);
1119invalid:
1120	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1121	    "Invalid LHa header");
1122	return (ARCHIVE_FATAL);
1123}
1124
1125/*
1126 * Extended header format
1127 *
1128 * +0             +2        +3  -- used in header 1 and 2
1129 * +0             +4        +5  -- used in header 3
1130 * +--------------+---------+-------------------+--------------+--
1131 * |ex-header size|header id|        data       |ex-header size| .......
1132 * +--------------+---------+-------------------+--------------+--
1133 *  <-------------( ex-header size)------------> <-- next extended header --*
1134 *
1135 * If the ex-header size is zero, it is the make of the end of extended
1136 * headers.
1137 *
1138 */
1139static int
1140lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1141    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1142{
1143	const void *h;
1144	const unsigned char *extdheader;
1145	size_t	extdsize;
1146	size_t	datasize;
1147	unsigned int i;
1148	unsigned char extdtype;
1149
1150#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1151#define EXT_FILENAME		0x01		/* Filename 		    */
1152#define EXT_DIRECTORY		0x02		/* Directory name	    */
1153#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1154#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1155#define EXT_FILESIZE		0x42		/* Large file size	    */
1156#define EXT_TIMEZONE		0x43		/* Time zone		    */
1157#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1158#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1159#define EXT_CODEPAGE		0x46		/* Codepage		    */
1160#define EXT_UNIX_MODE		0x50		/* File permission	    */
1161#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1162#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1163#define EXT_UNIX_UNAME		0x53		/* User name		    */
1164#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1165#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1166#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1167
1168	*total_size = sizefield_length;
1169
1170	for (;;) {
1171		/* Read an extended header size. */
1172		if ((h =
1173		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1174			return (truncated_error(a));
1175		/* Check if the size is the zero indicates the end of the
1176		 * extended header. */
1177		if (sizefield_length == sizeof(uint16_t))
1178			extdsize = archive_le16dec(h);
1179		else
1180			extdsize = archive_le32dec(h);
1181		if (extdsize == 0) {
1182			/* End of extended header */
1183			if (crc != NULL)
1184				*crc = lha_crc16(*crc, h, sizefield_length);
1185			__archive_read_consume(a, sizefield_length);
1186			return (ARCHIVE_OK);
1187		}
1188
1189		/* Sanity check to the extended header size. */
1190		if (((uint64_t)*total_size + extdsize) >
1191				    (uint64_t)limitsize ||
1192		    extdsize <= (size_t)sizefield_length)
1193			goto invalid;
1194
1195		/* Read the extended header. */
1196		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1197			return (truncated_error(a));
1198		*total_size += extdsize;
1199
1200		extdheader = (const unsigned char *)h;
1201		/* Get the extended header type. */
1202		extdtype = extdheader[sizefield_length];
1203		/* Calculate an extended data size. */
1204		datasize = extdsize - (1 + sizefield_length);
1205		/* Skip an extended header size field and type field. */
1206		extdheader += sizefield_length + 1;
1207
1208		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1209			*crc = lha_crc16(*crc, h, extdsize);
1210		switch (extdtype) {
1211		case EXT_HEADER_CRC:
1212			/* We only use a header CRC. Following data will not
1213			 * be used. */
1214			if (datasize >= 2) {
1215				lha->header_crc = archive_le16dec(extdheader);
1216				if (crc != NULL) {
1217					static const char zeros[2] = {0, 0};
1218					*crc = lha_crc16(*crc, h,
1219					    extdsize - datasize);
1220					/* CRC value itself as zero */
1221					*crc = lha_crc16(*crc, zeros, 2);
1222					*crc = lha_crc16(*crc,
1223					    extdheader+2, datasize - 2);
1224				}
1225			}
1226			break;
1227		case EXT_FILENAME:
1228			if (datasize == 0) {
1229				/* maybe directory header */
1230				archive_string_empty(&lha->filename);
1231				break;
1232			}
1233			if (extdheader[0] == '\0')
1234				goto invalid;
1235			archive_strncpy(&lha->filename,
1236			    (const char *)extdheader, datasize);
1237			break;
1238		case EXT_UTF16_FILENAME:
1239			if (datasize == 0) {
1240				/* maybe directory header */
1241				archive_string_empty(&lha->filename);
1242				break;
1243			} else if (datasize & 1) {
1244				/* UTF-16 characters take always 2 or 4 bytes */
1245				goto invalid;
1246			}
1247			if (extdheader[0] == '\0')
1248				goto invalid;
1249			archive_string_empty(&lha->filename);
1250			archive_array_append(&lha->filename,
1251				(const char *)extdheader, datasize);
1252			/* Setup a string conversion for a filename. */
1253			lha->sconv_fname =
1254			    archive_string_conversion_from_charset(&a->archive,
1255			        "UTF-16LE", 1);
1256			if (lha->sconv_fname == NULL)
1257				return (ARCHIVE_FATAL);
1258			break;
1259		case EXT_DIRECTORY:
1260			if (datasize == 0 || extdheader[0] == '\0')
1261				/* no directory name data. exit this case. */
1262				goto invalid;
1263
1264			archive_strncpy(&lha->dirname,
1265		  	    (const char *)extdheader, datasize);
1266			/*
1267			 * Convert directory delimiter from 0xFF
1268			 * to '/' for local system.
1269	 		 */
1270			for (i = 0; i < lha->dirname.length; i++) {
1271				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1272					lha->dirname.s[i] = '/';
1273			}
1274			/* Is last character directory separator? */
1275			if (lha->dirname.s[lha->dirname.length-1] != '/')
1276				/* invalid directory data */
1277				goto invalid;
1278			break;
1279		case EXT_UTF16_DIRECTORY:
1280			/* UTF-16 characters take always 2 or 4 bytes */
1281			if (datasize == 0 || (datasize & 1) ||
1282			    extdheader[0] == '\0') {
1283				/* no directory name data. exit this case. */
1284				goto invalid;
1285			}
1286
1287			archive_string_empty(&lha->dirname);
1288			archive_array_append(&lha->dirname,
1289				(const char *)extdheader, datasize);
1290			lha->sconv_dir =
1291			    archive_string_conversion_from_charset(&a->archive,
1292			        "UTF-16LE", 1);
1293			if (lha->sconv_dir == NULL)
1294				return (ARCHIVE_FATAL);
1295			else {
1296				/*
1297				 * Convert directory delimiter from 0xFFFF
1298				 * to '/' for local system.
1299				 */
1300				uint16_t dirSep;
1301				uint16_t d = 1;
1302				if (archive_be16dec(&d) == 1)
1303					dirSep = 0x2F00;
1304				else
1305					dirSep = 0x002F;
1306
1307				/* UTF-16LE character */
1308				uint16_t *utf16name =
1309				    (uint16_t *)lha->dirname.s;
1310				for (i = 0; i < lha->dirname.length / 2; i++) {
1311					if (utf16name[i] == 0xFFFF) {
1312						utf16name[i] = dirSep;
1313					}
1314				}
1315				/* Is last character directory separator? */
1316				if (utf16name[lha->dirname.length / 2 - 1] !=
1317				    dirSep) {
1318					/* invalid directory data */
1319					goto invalid;
1320				}
1321			}
1322			break;
1323		case EXT_DOS_ATTR:
1324			if (datasize == 2)
1325				lha->dos_attr = (unsigned char)
1326				    (archive_le16dec(extdheader) & 0xff);
1327			break;
1328		case EXT_TIMESTAMP:
1329			if (datasize == (sizeof(uint64_t) * 3)) {
1330				lha->birthtime = lha_win_time(
1331				    archive_le64dec(extdheader),
1332				    &lha->birthtime_tv_nsec);
1333				extdheader += sizeof(uint64_t);
1334				lha->mtime = lha_win_time(
1335				    archive_le64dec(extdheader),
1336				    &lha->mtime_tv_nsec);
1337				extdheader += sizeof(uint64_t);
1338				lha->atime = lha_win_time(
1339				    archive_le64dec(extdheader),
1340				    &lha->atime_tv_nsec);
1341				lha->setflag |= BIRTHTIME_IS_SET |
1342				    ATIME_IS_SET;
1343			}
1344			break;
1345		case EXT_FILESIZE:
1346			if (datasize == sizeof(uint64_t) * 2) {
1347				lha->compsize = archive_le64dec(extdheader);
1348				extdheader += sizeof(uint64_t);
1349				lha->origsize = archive_le64dec(extdheader);
1350				if (lha->compsize < 0 || lha->origsize < 0)
1351					goto invalid;
1352			}
1353			break;
1354		case EXT_CODEPAGE:
1355			/* Get an archived filename charset from codepage.
1356			 * This overwrites the charset specified by
1357			 * hdrcharset option. */
1358			if (datasize == sizeof(uint32_t)) {
1359				struct archive_string cp;
1360				const char *charset;
1361
1362				archive_string_init(&cp);
1363				switch (archive_le32dec(extdheader)) {
1364				case 65001: /* UTF-8 */
1365					charset = "UTF-8";
1366					break;
1367				default:
1368					archive_string_sprintf(&cp, "CP%d",
1369					    (int)archive_le32dec(extdheader));
1370					charset = cp.s;
1371					break;
1372				}
1373				lha->sconv_dir =
1374				    archive_string_conversion_from_charset(
1375					&(a->archive), charset, 1);
1376				lha->sconv_fname =
1377				    archive_string_conversion_from_charset(
1378					&(a->archive), charset, 1);
1379				archive_string_free(&cp);
1380				if (lha->sconv_dir == NULL)
1381					return (ARCHIVE_FATAL);
1382				if (lha->sconv_fname == NULL)
1383					return (ARCHIVE_FATAL);
1384			}
1385			break;
1386		case EXT_UNIX_MODE:
1387			if (datasize == sizeof(uint16_t)) {
1388				lha->mode = archive_le16dec(extdheader);
1389				lha->setflag |= UNIX_MODE_IS_SET;
1390			}
1391			break;
1392		case EXT_UNIX_GID_UID:
1393			if (datasize == (sizeof(uint16_t) * 2)) {
1394				lha->gid = archive_le16dec(extdheader);
1395				lha->uid = archive_le16dec(extdheader+2);
1396			}
1397			break;
1398		case EXT_UNIX_GNAME:
1399			if (datasize > 0)
1400				archive_strncpy(&lha->gname,
1401				    (const char *)extdheader, datasize);
1402			break;
1403		case EXT_UNIX_UNAME:
1404			if (datasize > 0)
1405				archive_strncpy(&lha->uname,
1406				    (const char *)extdheader, datasize);
1407			break;
1408		case EXT_UNIX_MTIME:
1409			if (datasize == sizeof(uint32_t))
1410				lha->mtime = archive_le32dec(extdheader);
1411			break;
1412		case EXT_OS2_NEW_ATTR:
1413			/* This extended header is OS/2 depend. */
1414			if (datasize == 16) {
1415				lha->dos_attr = (unsigned char)
1416				    (archive_le16dec(extdheader) & 0xff);
1417				lha->mode = archive_le16dec(extdheader+2);
1418				lha->gid = archive_le16dec(extdheader+4);
1419				lha->uid = archive_le16dec(extdheader+6);
1420				lha->birthtime = archive_le32dec(extdheader+8);
1421				lha->atime = archive_le32dec(extdheader+12);
1422				lha->setflag |= UNIX_MODE_IS_SET
1423				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1424			}
1425			break;
1426		case EXT_NEW_ATTR:
1427			if (datasize == 20) {
1428				lha->mode = (mode_t)archive_le32dec(extdheader);
1429				lha->gid = archive_le32dec(extdheader+4);
1430				lha->uid = archive_le32dec(extdheader+8);
1431				lha->birthtime = archive_le32dec(extdheader+12);
1432				lha->atime = archive_le32dec(extdheader+16);
1433				lha->setflag |= UNIX_MODE_IS_SET
1434				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1435			}
1436			break;
1437		case EXT_TIMEZONE:		/* Not supported */
1438			break;
1439		default:
1440			break;
1441		}
1442
1443		__archive_read_consume(a, extdsize);
1444	}
1445invalid:
1446	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1447	    "Invalid extended LHa header");
1448	return (ARCHIVE_FATAL);
1449}
1450
1451static int
1452lha_end_of_entry(struct archive_read *a)
1453{
1454	struct lha *lha = (struct lha *)(a->format->data);
1455	int r = ARCHIVE_EOF;
1456
1457	if (!lha->end_of_entry_cleanup) {
1458		if ((lha->setflag & CRC_IS_SET) &&
1459		    lha->crc != lha->entry_crc_calculated) {
1460			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1461			    "LHa data CRC error");
1462			r = ARCHIVE_WARN;
1463		}
1464
1465		/* End-of-entry cleanup done. */
1466		lha->end_of_entry_cleanup = 1;
1467	}
1468	return (r);
1469}
1470
1471static int
1472archive_read_format_lha_read_data(struct archive_read *a,
1473    const void **buff, size_t *size, int64_t *offset)
1474{
1475	struct lha *lha = (struct lha *)(a->format->data);
1476	int r;
1477
1478	if (lha->entry_unconsumed) {
1479		/* Consume as much as the decompressor actually used. */
1480		__archive_read_consume(a, lha->entry_unconsumed);
1481		lha->entry_unconsumed = 0;
1482	}
1483	if (lha->end_of_entry) {
1484		*offset = lha->entry_offset;
1485		*size = 0;
1486		*buff = NULL;
1487		return (lha_end_of_entry(a));
1488	}
1489
1490	if (lha->entry_is_compressed)
1491		r =  lha_read_data_lzh(a, buff, size, offset);
1492	else
1493		/* No compression. */
1494		r =  lha_read_data_none(a, buff, size, offset);
1495	return (r);
1496}
1497
1498/*
1499 * Read a file content in no compression.
1500 *
1501 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1502 * lha->end_of_entry if it consumes all of the data.
1503 */
1504static int
1505lha_read_data_none(struct archive_read *a, const void **buff,
1506    size_t *size, int64_t *offset)
1507{
1508	struct lha *lha = (struct lha *)(a->format->data);
1509	ssize_t bytes_avail;
1510
1511	if (lha->entry_bytes_remaining == 0) {
1512		*buff = NULL;
1513		*size = 0;
1514		*offset = lha->entry_offset;
1515		lha->end_of_entry = 1;
1516		return (ARCHIVE_OK);
1517	}
1518	/*
1519	 * Note: '1' here is a performance optimization.
1520	 * Recall that the decompression layer returns a count of
1521	 * available bytes; asking for more than that forces the
1522	 * decompressor to combine reads by copying data.
1523	 */
1524	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1525	if (bytes_avail <= 0) {
1526		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1527		    "Truncated LHa file data");
1528		return (ARCHIVE_FATAL);
1529	}
1530	if (bytes_avail > lha->entry_bytes_remaining)
1531		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1532	lha->entry_crc_calculated =
1533	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1534	*size = bytes_avail;
1535	*offset = lha->entry_offset;
1536	lha->entry_offset += bytes_avail;
1537	lha->entry_bytes_remaining -= bytes_avail;
1538	if (lha->entry_bytes_remaining == 0)
1539		lha->end_of_entry = 1;
1540	lha->entry_unconsumed = bytes_avail;
1541	return (ARCHIVE_OK);
1542}
1543
1544/*
1545 * Read a file content in LZHUFF encoding.
1546 *
1547 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1548 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1549 * all of the data.
1550 */
1551static int
1552lha_read_data_lzh(struct archive_read *a, const void **buff,
1553    size_t *size, int64_t *offset)
1554{
1555	struct lha *lha = (struct lha *)(a->format->data);
1556	ssize_t bytes_avail;
1557	int r;
1558
1559	/* If we haven't yet read any data, initialize the decompressor. */
1560	if (!lha->decompress_init) {
1561		r = lzh_decode_init(&(lha->strm), lha->method);
1562		switch (r) {
1563		case ARCHIVE_OK:
1564			break;
1565		case ARCHIVE_FAILED:
1566        		/* Unsupported compression. */
1567			*buff = NULL;
1568			*size = 0;
1569			*offset = 0;
1570			archive_set_error(&a->archive,
1571			    ARCHIVE_ERRNO_FILE_FORMAT,
1572			    "Unsupported lzh compression method -%c%c%c-",
1573			    lha->method[0], lha->method[1], lha->method[2]);
1574			/* We know compressed size; just skip it. */
1575			archive_read_format_lha_read_data_skip(a);
1576			return (ARCHIVE_WARN);
1577		default:
1578			archive_set_error(&a->archive, ENOMEM,
1579			    "Couldn't allocate memory "
1580			    "for lzh decompression");
1581			return (ARCHIVE_FATAL);
1582		}
1583		/* We've initialized decompression for this stream. */
1584		lha->decompress_init = 1;
1585		lha->strm.avail_out = 0;
1586		lha->strm.total_out = 0;
1587	}
1588
1589	/*
1590	 * Note: '1' here is a performance optimization.
1591	 * Recall that the decompression layer returns a count of
1592	 * available bytes; asking for more than that forces the
1593	 * decompressor to combine reads by copying data.
1594	 */
1595	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1596	if (bytes_avail <= 0) {
1597		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1598		    "Truncated LHa file body");
1599		return (ARCHIVE_FATAL);
1600	}
1601	if (bytes_avail > lha->entry_bytes_remaining)
1602		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1603
1604	lha->strm.avail_in = (int)bytes_avail;
1605	lha->strm.total_in = 0;
1606	lha->strm.avail_out = 0;
1607
1608	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1609	switch (r) {
1610	case ARCHIVE_OK:
1611		break;
1612	case ARCHIVE_EOF:
1613		lha->end_of_entry = 1;
1614		break;
1615	default:
1616		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1617		    "Bad lzh data");
1618		return (ARCHIVE_FAILED);
1619	}
1620	lha->entry_unconsumed = lha->strm.total_in;
1621	lha->entry_bytes_remaining -= lha->strm.total_in;
1622
1623	if (lha->strm.avail_out) {
1624		*offset = lha->entry_offset;
1625		*size = lha->strm.avail_out;
1626		*buff = lha->strm.ref_ptr;
1627		lha->entry_crc_calculated =
1628		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1629		lha->entry_offset += *size;
1630	} else {
1631		*offset = lha->entry_offset;
1632		*size = 0;
1633		*buff = NULL;
1634		if (lha->end_of_entry)
1635			return (lha_end_of_entry(a));
1636	}
1637	return (ARCHIVE_OK);
1638}
1639
1640/*
1641 * Skip a file content.
1642 */
1643static int
1644archive_read_format_lha_read_data_skip(struct archive_read *a)
1645{
1646	struct lha *lha;
1647	int64_t bytes_skipped;
1648
1649	lha = (struct lha *)(a->format->data);
1650
1651	if (lha->entry_unconsumed) {
1652		/* Consume as much as the decompressor actually used. */
1653		__archive_read_consume(a, lha->entry_unconsumed);
1654		lha->entry_unconsumed = 0;
1655	}
1656
1657	/* if we've already read to end of data, we're done. */
1658	if (lha->end_of_entry_cleanup)
1659		return (ARCHIVE_OK);
1660
1661	/*
1662	 * If the length is at the beginning, we can skip the
1663	 * compressed data much more quickly.
1664	 */
1665	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1666	if (bytes_skipped < 0)
1667		return (ARCHIVE_FATAL);
1668
1669	/* This entry is finished and done. */
1670	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1671	return (ARCHIVE_OK);
1672}
1673
1674static int
1675archive_read_format_lha_cleanup(struct archive_read *a)
1676{
1677	struct lha *lha = (struct lha *)(a->format->data);
1678
1679	lzh_decode_free(&(lha->strm));
1680	archive_string_free(&(lha->dirname));
1681	archive_string_free(&(lha->filename));
1682	archive_string_free(&(lha->uname));
1683	archive_string_free(&(lha->gname));
1684	archive_wstring_free(&(lha->ws));
1685	free(lha);
1686	(a->format->data) = NULL;
1687	return (ARCHIVE_OK);
1688}
1689
1690/*
1691 * 'LHa for UNIX' utility has archived a symbolic-link name after
1692 * a pathname with '|' character.
1693 * This function extracts the symbolic-link name from the pathname.
1694 *
1695 * example.
1696 *   1. a symbolic-name is 'aaa/bb/cc'
1697 *   2. a filename is 'xxx/bbb'
1698 *  then an archived pathname is 'xxx/bbb|aaa/bb/cc'
1699 */
1700static int
1701lha_parse_linkname(struct archive_wstring *linkname,
1702    struct archive_wstring *pathname)
1703{
1704	wchar_t *	linkptr;
1705	size_t 	symlen;
1706
1707	linkptr = wcschr(pathname->s, L'|');
1708	if (linkptr != NULL) {
1709		symlen = wcslen(linkptr + 1);
1710		archive_wstrncpy(linkname, linkptr+1, symlen);
1711
1712		*linkptr = 0;
1713		pathname->length = wcslen(pathname->s);
1714
1715		return (1);
1716	}
1717	return (0);
1718}
1719
1720/* Convert an MSDOS-style date/time into Unix-style time. */
1721static time_t
1722lha_dos_time(const unsigned char *p)
1723{
1724	int msTime, msDate;
1725	struct tm ts;
1726
1727	msTime = archive_le16dec(p);
1728	msDate = archive_le16dec(p+2);
1729
1730	memset(&ts, 0, sizeof(ts));
1731	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1732	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1733	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1734	ts.tm_hour = (msTime >> 11) & 0x1f;
1735	ts.tm_min = (msTime >> 5) & 0x3f;
1736	ts.tm_sec = (msTime << 1) & 0x3e;
1737	ts.tm_isdst = -1;
1738	return (mktime(&ts));
1739}
1740
1741/* Convert an MS-Windows-style date/time into Unix-style time. */
1742static time_t
1743lha_win_time(uint64_t wintime, long *ns)
1744{
1745#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1746
1747	if (wintime >= EPOC_TIME) {
1748		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1749		if (ns != NULL)
1750			*ns = (long)(wintime % 10000000) * 100;
1751		return (wintime / 10000000);
1752	} else {
1753		if (ns != NULL)
1754			*ns = 0;
1755		return (0);
1756	}
1757}
1758
1759static unsigned char
1760lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1761{
1762	unsigned char const *p = (unsigned char const *)pp;
1763
1764	p += offset;
1765	for (;size > 0; --size)
1766		sum += *p++;
1767	return (sum);
1768}
1769
1770static uint16_t crc16tbl[2][256];
1771static void
1772lha_crc16_init(void)
1773{
1774	unsigned int i;
1775	static int crc16init = 0;
1776
1777	if (crc16init)
1778		return;
1779	crc16init = 1;
1780
1781	for (i = 0; i < 256; i++) {
1782		unsigned int j;
1783		uint16_t crc = (uint16_t)i;
1784		for (j = 8; j; j--)
1785			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1786		crc16tbl[0][i] = crc;
1787	}
1788
1789	for (i = 0; i < 256; i++) {
1790		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1791			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1792	}
1793}
1794
1795static uint16_t
1796lha_crc16(uint16_t crc, const void *pp, size_t len)
1797{
1798	const unsigned char *p = (const unsigned char *)pp;
1799	const uint16_t *buff;
1800	const union {
1801		uint32_t i;
1802		char c[4];
1803	} u = { 0x01020304 };
1804
1805	if (len == 0)
1806		return crc;
1807
1808	/* Process unaligned address. */
1809	if (((uintptr_t)p) & (uintptr_t)0x1) {
1810		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1811		len--;
1812	}
1813	buff = (const uint16_t *)p;
1814	/*
1815	 * Modern C compiler such as GCC does not unroll automatically yet
1816	 * without unrolling pragma, and Clang is so. So we should
1817	 * unroll this loop for its performance.
1818	 */
1819	for (;len >= 8; len -= 8) {
1820		/* This if statement expects compiler optimization will
1821		 * remove the statement which will not be executed. */
1822#undef bswap16
1823#ifndef __has_builtin
1824#define __has_builtin(x) 0
1825#endif
1826#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1827#  define bswap16(x) _byteswap_ushort(x)
1828#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1829/* GCC 4.8 and later has __builtin_bswap16() */
1830#  define bswap16(x) __builtin_bswap16(x)
1831#elif defined(__clang__) && __has_builtin(__builtin_bswap16)
1832/* Newer clang versions have __builtin_bswap16() */
1833#  define bswap16(x) __builtin_bswap16(x)
1834#else
1835#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1836#endif
1837#define CRC16W	do { 	\
1838		if(u.c[0] == 1) { /* Big endian */		\
1839			crc ^= bswap16(*buff); buff++;		\
1840		} else						\
1841			crc ^= *buff++;				\
1842		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1843} while (0)
1844		CRC16W;
1845		CRC16W;
1846		CRC16W;
1847		CRC16W;
1848#undef CRC16W
1849#undef bswap16
1850	}
1851
1852	p = (const unsigned char *)buff;
1853	for (;len; len--) {
1854		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1855	}
1856	return crc;
1857}
1858
1859/*
1860 * Initialize LZHUF decoder.
1861 *
1862 * Returns ARCHIVE_OK if initialization was successful.
1863 * Returns ARCHIVE_FAILED if method is unsupported.
1864 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1865 * error occurred.
1866 */
1867static int
1868lzh_decode_init(struct lzh_stream *strm, const char *method)
1869{
1870	struct lzh_dec *ds;
1871	int w_bits, w_size;
1872
1873	if (strm->ds == NULL) {
1874		strm->ds = calloc(1, sizeof(*strm->ds));
1875		if (strm->ds == NULL)
1876			return (ARCHIVE_FATAL);
1877	}
1878	ds = strm->ds;
1879	ds->error = ARCHIVE_FAILED;
1880	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1881		return (ARCHIVE_FAILED);
1882	switch (method[2]) {
1883	case '5':
1884		w_bits = 13;/* 8KiB for window */
1885		break;
1886	case '6':
1887		w_bits = 15;/* 32KiB for window */
1888		break;
1889	case '7':
1890		w_bits = 16;/* 64KiB for window */
1891		break;
1892	default:
1893		return (ARCHIVE_FAILED);/* Not supported. */
1894	}
1895	ds->error = ARCHIVE_FATAL;
1896	/* Expand a window size up to 128 KiB for decompressing process
1897	 * performance whatever its original window size is. */
1898	ds->w_size = 1U << 17;
1899	ds->w_mask = ds->w_size -1;
1900	if (ds->w_buff == NULL) {
1901		ds->w_buff = malloc(ds->w_size);
1902		if (ds->w_buff == NULL)
1903			return (ARCHIVE_FATAL);
1904	}
1905	w_size = 1U << w_bits;
1906	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1907	ds->w_pos = 0;
1908	ds->state = 0;
1909	ds->pos_pt_len_size = w_bits + 1;
1910	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1911	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1912	ds->literal_pt_len_bits = 5;
1913	ds->br.cache_buffer = 0;
1914	ds->br.cache_avail = 0;
1915
1916	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1917	    != ARCHIVE_OK)
1918		return (ARCHIVE_FATAL);
1919	ds->lt.len_bits = 9;
1920	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1921	    != ARCHIVE_OK)
1922		return (ARCHIVE_FATAL);
1923	ds->error = 0;
1924
1925	return (ARCHIVE_OK);
1926}
1927
1928/*
1929 * Release LZHUF decoder.
1930 */
1931static void
1932lzh_decode_free(struct lzh_stream *strm)
1933{
1934
1935	if (strm->ds == NULL)
1936		return;
1937	free(strm->ds->w_buff);
1938	lzh_huffman_free(&(strm->ds->lt));
1939	lzh_huffman_free(&(strm->ds->pt));
1940	free(strm->ds);
1941	strm->ds = NULL;
1942}
1943
1944/*
1945 * Bit stream reader.
1946 */
1947/* Check that the cache buffer has enough bits. */
1948#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1949/* Get compressed data by bit. */
1950#define lzh_br_bits(br, n)				\
1951	(((uint16_t)((br)->cache_buffer >>		\
1952		((br)->cache_avail - (n)))) & cache_masks[n])
1953#define lzh_br_bits_forced(br, n)			\
1954	(((uint16_t)((br)->cache_buffer <<		\
1955		((n) - (br)->cache_avail))) & cache_masks[n])
1956/* Read ahead to make sure the cache buffer has enough compressed data we
1957 * will use.
1958 *  True  : completed, there is enough data in the cache buffer.
1959 *  False : we met that strm->next_in is empty, we have to get following
1960 *          bytes. */
1961#define lzh_br_read_ahead_0(strm, br, n)	\
1962	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1963/*  True  : the cache buffer has some bits as much as we need.
1964 *  False : there are no enough bits in the cache buffer to be used,
1965 *          we have to get following bytes if we could. */
1966#define lzh_br_read_ahead(strm, br, n)	\
1967	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1968
1969/* Notify how many bits we consumed. */
1970#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1971#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1972
1973static const uint16_t cache_masks[] = {
1974	0x0000, 0x0001, 0x0003, 0x0007,
1975	0x000F, 0x001F, 0x003F, 0x007F,
1976	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1977	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1978	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1979};
1980
1981/*
1982 * Shift away used bits in the cache data and fill it up with following bits.
1983 * Call this when cache buffer does not have enough bits you need.
1984 *
1985 * Returns 1 if the cache buffer is full.
1986 * Returns 0 if the cache buffer is not full; input buffer is empty.
1987 */
1988static int
1989lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1990{
1991	int n = CACHE_BITS - br->cache_avail;
1992
1993	for (;;) {
1994		const int x = n >> 3;
1995		if (strm->avail_in >= x) {
1996			switch (x) {
1997			case 8:
1998				br->cache_buffer =
1999				    ((uint64_t)strm->next_in[0]) << 56 |
2000				    ((uint64_t)strm->next_in[1]) << 48 |
2001				    ((uint64_t)strm->next_in[2]) << 40 |
2002				    ((uint64_t)strm->next_in[3]) << 32 |
2003				    ((uint32_t)strm->next_in[4]) << 24 |
2004				    ((uint32_t)strm->next_in[5]) << 16 |
2005				    ((uint32_t)strm->next_in[6]) << 8 |
2006				     (uint32_t)strm->next_in[7];
2007				strm->next_in += 8;
2008				strm->avail_in -= 8;
2009				br->cache_avail += 8 * 8;
2010				return (1);
2011			case 7:
2012				br->cache_buffer =
2013		 		   (br->cache_buffer << 56) |
2014				    ((uint64_t)strm->next_in[0]) << 48 |
2015				    ((uint64_t)strm->next_in[1]) << 40 |
2016				    ((uint64_t)strm->next_in[2]) << 32 |
2017				    ((uint64_t)strm->next_in[3]) << 24 |
2018				    ((uint64_t)strm->next_in[4]) << 16 |
2019				    ((uint64_t)strm->next_in[5]) << 8 |
2020				     (uint64_t)strm->next_in[6];
2021				strm->next_in += 7;
2022				strm->avail_in -= 7;
2023				br->cache_avail += 7 * 8;
2024				return (1);
2025			case 6:
2026				br->cache_buffer =
2027		 		   (br->cache_buffer << 48) |
2028				    ((uint64_t)strm->next_in[0]) << 40 |
2029				    ((uint64_t)strm->next_in[1]) << 32 |
2030				    ((uint64_t)strm->next_in[2]) << 24 |
2031				    ((uint64_t)strm->next_in[3]) << 16 |
2032				    ((uint64_t)strm->next_in[4]) << 8 |
2033				     (uint64_t)strm->next_in[5];
2034				strm->next_in += 6;
2035				strm->avail_in -= 6;
2036				br->cache_avail += 6 * 8;
2037				return (1);
2038			case 0:
2039				/* We have enough compressed data in
2040				 * the cache buffer.*/
2041				return (1);
2042			default:
2043				break;
2044			}
2045		}
2046		if (strm->avail_in == 0) {
2047			/* There is not enough compressed data to fill up the
2048			 * cache buffer. */
2049			return (0);
2050		}
2051		br->cache_buffer =
2052		   (br->cache_buffer << 8) | *strm->next_in++;
2053		strm->avail_in--;
2054		br->cache_avail += 8;
2055		n -= 8;
2056	}
2057}
2058
2059/*
2060 * Decode LZHUF.
2061 *
2062 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2063 *    Please set available buffer and call this function again.
2064 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2065 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2066 *    is broken or you do not set 'last' flag properly.
2067 * 4. 'last' flag is very important, you must set 1 to the flag if there
2068 *    is no input data. The lha compressed data format does not provide how
2069 *    to know the compressed data is really finished.
2070 *    Note: lha command utility check if the total size of output bytes is
2071 *    reached the uncompressed size recorded in its header. it does not mind
2072 *    that the decoding process is properly finished.
2073 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2074 *    it handles EOF as null to fill read buffer with zero until the decoding
2075 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2076 *    zeros are treated as the mark of the end of the data although the zeros
2077 *    is dummy, not the file data.
2078 */
2079static int	lzh_read_blocks(struct lzh_stream *, int);
2080static int	lzh_decode_blocks(struct lzh_stream *, int);
2081#define ST_RD_BLOCK		0
2082#define ST_RD_PT_1		1
2083#define ST_RD_PT_2		2
2084#define ST_RD_PT_3		3
2085#define ST_RD_PT_4		4
2086#define ST_RD_LITERAL_1		5
2087#define ST_RD_LITERAL_2		6
2088#define ST_RD_LITERAL_3		7
2089#define ST_RD_POS_DATA_1	8
2090#define ST_GET_LITERAL		9
2091#define ST_GET_POS_1		10
2092#define ST_GET_POS_2		11
2093#define ST_COPY_DATA		12
2094
2095static int
2096lzh_decode(struct lzh_stream *strm, int last)
2097{
2098	struct lzh_dec *ds = strm->ds;
2099	int avail_in;
2100	int r;
2101
2102	if (ds->error)
2103		return (ds->error);
2104
2105	avail_in = strm->avail_in;
2106	do {
2107		if (ds->state < ST_GET_LITERAL)
2108			r = lzh_read_blocks(strm, last);
2109		else
2110			r = lzh_decode_blocks(strm, last);
2111	} while (r == 100);
2112	strm->total_in += avail_in - strm->avail_in;
2113	return (r);
2114}
2115
2116static void
2117lzh_emit_window(struct lzh_stream *strm, size_t s)
2118{
2119	strm->ref_ptr = strm->ds->w_buff;
2120	strm->avail_out = (int)s;
2121	strm->total_out += s;
2122}
2123
2124static int
2125lzh_read_blocks(struct lzh_stream *strm, int last)
2126{
2127	struct lzh_dec *ds = strm->ds;
2128	struct lzh_br *br = &(ds->br);
2129	int c = 0, i;
2130	unsigned rbits;
2131
2132	for (;;) {
2133		switch (ds->state) {
2134		case ST_RD_BLOCK:
2135			/*
2136			 * Read a block number indicates how many blocks
2137			 * we will handle. The block is composed of a
2138			 * literal and a match, sometimes a literal only
2139			 * in particular, there are no reference data at
2140			 * the beginning of the decompression.
2141			 */
2142			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2143				if (!last)
2144					/* We need following data. */
2145					return (ARCHIVE_OK);
2146				if (lzh_br_has(br, 8)) {
2147					/*
2148					 * It seems there are extra bits.
2149					 *  1. Compressed data is broken.
2150					 *  2. `last' flag does not properly
2151					 *     set.
2152					 */
2153					goto failed;
2154				}
2155				if (ds->w_pos > 0) {
2156					lzh_emit_window(strm, ds->w_pos);
2157					ds->w_pos = 0;
2158					return (ARCHIVE_OK);
2159				}
2160				/* End of compressed data; we have completely
2161				 * handled all compressed data. */
2162				return (ARCHIVE_EOF);
2163			}
2164			ds->blocks_avail = lzh_br_bits(br, 16);
2165			if (ds->blocks_avail == 0)
2166				goto failed;
2167			lzh_br_consume(br, 16);
2168			/*
2169			 * Read a literal table compressed in huffman
2170			 * coding.
2171			 */
2172			ds->pt.len_size = ds->literal_pt_len_size;
2173			ds->pt.len_bits = ds->literal_pt_len_bits;
2174			ds->reading_position = 0;
2175			/* FALL THROUGH */
2176		case ST_RD_PT_1:
2177			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2178			 * used in reading both a literal table and a
2179			 * position table. */
2180			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2181				if (last)
2182					goto failed;/* Truncated data. */
2183				ds->state = ST_RD_PT_1;
2184				return (ARCHIVE_OK);
2185			}
2186			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2187			lzh_br_consume(br, ds->pt.len_bits);
2188			/* FALL THROUGH */
2189		case ST_RD_PT_2:
2190			if (ds->pt.len_avail == 0) {
2191				/* There is no bitlen. */
2192				if (!lzh_br_read_ahead(strm, br,
2193				    ds->pt.len_bits)) {
2194					if (last)
2195						goto failed;/* Truncated data.*/
2196					ds->state = ST_RD_PT_2;
2197					return (ARCHIVE_OK);
2198				}
2199				if (!lzh_make_fake_table(&(ds->pt),
2200				    lzh_br_bits(br, ds->pt.len_bits)))
2201					goto failed;/* Invalid data. */
2202				lzh_br_consume(br, ds->pt.len_bits);
2203				if (ds->reading_position)
2204					ds->state = ST_GET_LITERAL;
2205				else
2206					ds->state = ST_RD_LITERAL_1;
2207				break;
2208			} else if (ds->pt.len_avail > ds->pt.len_size)
2209				goto failed;/* Invalid data. */
2210			ds->loop = 0;
2211			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2212			if (ds->pt.len_avail < 3 ||
2213			    ds->pt.len_size == ds->pos_pt_len_size) {
2214				ds->state = ST_RD_PT_4;
2215				break;
2216			}
2217			/* FALL THROUGH */
2218		case ST_RD_PT_3:
2219			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2220			if (ds->loop < 3) {
2221				if (ds->loop < 0 || last)
2222					goto failed;/* Invalid data. */
2223				/* Not completed, get following data. */
2224				ds->state = ST_RD_PT_3;
2225				return (ARCHIVE_OK);
2226			}
2227			/* There are some null in bitlen of the literal. */
2228			if (!lzh_br_read_ahead(strm, br, 2)) {
2229				if (last)
2230					goto failed;/* Truncated data. */
2231				ds->state = ST_RD_PT_3;
2232				return (ARCHIVE_OK);
2233			}
2234			c = lzh_br_bits(br, 2);
2235			lzh_br_consume(br, 2);
2236			if (c > ds->pt.len_avail - 3)
2237				goto failed;/* Invalid data. */
2238			for (i = 3; c-- > 0 ;)
2239				ds->pt.bitlen[i++] = 0;
2240			ds->loop = i;
2241			/* FALL THROUGH */
2242		case ST_RD_PT_4:
2243			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2244			    ds->pt.len_avail);
2245			if (ds->loop < ds->pt.len_avail) {
2246				if (ds->loop < 0 || last)
2247					goto failed;/* Invalid data. */
2248				/* Not completed, get following data. */
2249				ds->state = ST_RD_PT_4;
2250				return (ARCHIVE_OK);
2251			}
2252			if (!lzh_make_huffman_table(&(ds->pt)))
2253				goto failed;/* Invalid data */
2254			if (ds->reading_position) {
2255				ds->state = ST_GET_LITERAL;
2256				break;
2257			}
2258			/* FALL THROUGH */
2259		case ST_RD_LITERAL_1:
2260			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2261				if (last)
2262					goto failed;/* Truncated data. */
2263				ds->state = ST_RD_LITERAL_1;
2264				return (ARCHIVE_OK);
2265			}
2266			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2267			lzh_br_consume(br, ds->lt.len_bits);
2268			/* FALL THROUGH */
2269		case ST_RD_LITERAL_2:
2270			if (ds->lt.len_avail == 0) {
2271				/* There is no bitlen. */
2272				if (!lzh_br_read_ahead(strm, br,
2273				    ds->lt.len_bits)) {
2274					if (last)
2275						goto failed;/* Truncated data.*/
2276					ds->state = ST_RD_LITERAL_2;
2277					return (ARCHIVE_OK);
2278				}
2279				if (!lzh_make_fake_table(&(ds->lt),
2280				    lzh_br_bits(br, ds->lt.len_bits)))
2281					goto failed;/* Invalid data */
2282				lzh_br_consume(br, ds->lt.len_bits);
2283				ds->state = ST_RD_POS_DATA_1;
2284				break;
2285			} else if (ds->lt.len_avail > ds->lt.len_size)
2286				goto failed;/* Invalid data */
2287			ds->loop = 0;
2288			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2289			/* FALL THROUGH */
2290		case ST_RD_LITERAL_3:
2291			i = ds->loop;
2292			while (i < ds->lt.len_avail) {
2293				if (!lzh_br_read_ahead(strm, br,
2294				    ds->pt.max_bits)) {
2295					if (last)
2296						goto failed;/* Truncated data.*/
2297					ds->loop = i;
2298					ds->state = ST_RD_LITERAL_3;
2299					return (ARCHIVE_OK);
2300				}
2301				rbits = lzh_br_bits(br, ds->pt.max_bits);
2302				c = lzh_decode_huffman(&(ds->pt), rbits);
2303				if (c > 2) {
2304					/* Note: 'c' will never be more than
2305					 * eighteen since it's limited by
2306					 * PT_BITLEN_SIZE, which is being set
2307					 * to ds->pt.len_size through
2308					 * ds->literal_pt_len_size. */
2309					lzh_br_consume(br, ds->pt.bitlen[c]);
2310					c -= 2;
2311					ds->lt.freq[c]++;
2312					ds->lt.bitlen[i++] = c;
2313				} else if (c == 0) {
2314					lzh_br_consume(br, ds->pt.bitlen[c]);
2315					ds->lt.bitlen[i++] = 0;
2316				} else {
2317					/* c == 1 or c == 2 */
2318					int n = (c == 1)?4:9;
2319					if (!lzh_br_read_ahead(strm, br,
2320					     ds->pt.bitlen[c] + n)) {
2321						if (last) /* Truncated data. */
2322							goto failed;
2323						ds->loop = i;
2324						ds->state = ST_RD_LITERAL_3;
2325						return (ARCHIVE_OK);
2326					}
2327					lzh_br_consume(br, ds->pt.bitlen[c]);
2328					c = lzh_br_bits(br, n);
2329					lzh_br_consume(br, n);
2330					c += (n == 4)?3:20;
2331					if (i + c > ds->lt.len_avail)
2332						goto failed;/* Invalid data */
2333					memset(&(ds->lt.bitlen[i]), 0, c);
2334					i += c;
2335				}
2336			}
2337			if (i > ds->lt.len_avail ||
2338			    !lzh_make_huffman_table(&(ds->lt)))
2339				goto failed;/* Invalid data */
2340			/* FALL THROUGH */
2341		case ST_RD_POS_DATA_1:
2342			/*
2343			 * Read a position table compressed in huffman
2344			 * coding.
2345			 */
2346			ds->pt.len_size = ds->pos_pt_len_size;
2347			ds->pt.len_bits = ds->pos_pt_len_bits;
2348			ds->reading_position = 1;
2349			ds->state = ST_RD_PT_1;
2350			break;
2351		case ST_GET_LITERAL:
2352			return (100);
2353		}
2354	}
2355failed:
2356	return (ds->error = ARCHIVE_FAILED);
2357}
2358
2359static int
2360lzh_decode_blocks(struct lzh_stream *strm, int last)
2361{
2362	struct lzh_dec *ds = strm->ds;
2363	struct lzh_br bre = ds->br;
2364	struct huffman *lt = &(ds->lt);
2365	struct huffman *pt = &(ds->pt);
2366	unsigned char *w_buff = ds->w_buff;
2367	unsigned char *lt_bitlen = lt->bitlen;
2368	unsigned char *pt_bitlen = pt->bitlen;
2369	int blocks_avail = ds->blocks_avail, c = 0;
2370	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2371	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2372	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2373	int state = ds->state;
2374
2375	for (;;) {
2376		switch (state) {
2377		case ST_GET_LITERAL:
2378			for (;;) {
2379				if (blocks_avail == 0) {
2380					/* We have decoded all blocks.
2381					 * Let's handle next blocks. */
2382					ds->state = ST_RD_BLOCK;
2383					ds->br = bre;
2384					ds->blocks_avail = 0;
2385					ds->w_pos = w_pos;
2386					ds->copy_pos = 0;
2387					return (100);
2388				}
2389
2390				/* lzh_br_read_ahead() always tries to fill the
2391				 * cache buffer up. In specific situation we
2392				 * are close to the end of the data, the cache
2393				 * buffer will not be full and thus we have to
2394				 * determine if the cache buffer has some bits
2395				 * as much as we need after lzh_br_read_ahead()
2396				 * failed. */
2397				if (!lzh_br_read_ahead(strm, &bre,
2398				    lt_max_bits)) {
2399					if (!last)
2400						goto next_data;
2401					/* Remaining bits are less than
2402					 * maximum bits(lt.max_bits) but maybe
2403					 * it still remains as much as we need,
2404					 * so we should try to use it with
2405					 * dummy bits. */
2406					c = lzh_decode_huffman(lt,
2407					      lzh_br_bits_forced(&bre,
2408					        lt_max_bits));
2409					lzh_br_consume(&bre, lt_bitlen[c]);
2410					if (!lzh_br_has(&bre, 0))
2411						goto failed;/* Over read. */
2412				} else {
2413					c = lzh_decode_huffman(lt,
2414					      lzh_br_bits(&bre, lt_max_bits));
2415					lzh_br_consume(&bre, lt_bitlen[c]);
2416				}
2417				blocks_avail--;
2418				if (c > UCHAR_MAX)
2419					/* Current block is a match data. */
2420					break;
2421				/*
2422				 * 'c' is exactly a literal code.
2423				 */
2424				/* Save a decoded code to reference it
2425				 * afterward. */
2426				w_buff[w_pos] = c;
2427				if (++w_pos >= w_size) {
2428					w_pos = 0;
2429					lzh_emit_window(strm, w_size);
2430					goto next_data;
2431				}
2432			}
2433			/* 'c' is the length of a match pattern we have
2434			 * already extracted, which has be stored in
2435			 * window(ds->w_buff). */
2436			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2437			/* FALL THROUGH */
2438		case ST_GET_POS_1:
2439			/*
2440			 * Get a reference position.
2441			 */
2442			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2443				if (!last) {
2444					state = ST_GET_POS_1;
2445					ds->copy_len = copy_len;
2446					goto next_data;
2447				}
2448				copy_pos = lzh_decode_huffman(pt,
2449				    lzh_br_bits_forced(&bre, pt_max_bits));
2450				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2451				if (!lzh_br_has(&bre, 0))
2452					goto failed;/* Over read. */
2453			} else {
2454				copy_pos = lzh_decode_huffman(pt,
2455				    lzh_br_bits(&bre, pt_max_bits));
2456				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2457			}
2458			/* FALL THROUGH */
2459		case ST_GET_POS_2:
2460			if (copy_pos > 1) {
2461				/* We need an additional adjustment number to
2462				 * the position. */
2463				int p = copy_pos - 1;
2464				if (!lzh_br_read_ahead(strm, &bre, p)) {
2465					if (last)
2466						goto failed;/* Truncated data.*/
2467					state = ST_GET_POS_2;
2468					ds->copy_len = copy_len;
2469					ds->copy_pos = copy_pos;
2470					goto next_data;
2471				}
2472				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2473				lzh_br_consume(&bre, p);
2474			}
2475			/* The position is actually a distance from the last
2476			 * code we had extracted and thus we have to convert
2477			 * it to a position of the window. */
2478			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2479			/* FALL THROUGH */
2480		case ST_COPY_DATA:
2481			/*
2482			 * Copy `copy_len' bytes as extracted data from
2483			 * the window into the output buffer.
2484			 */
2485			for (;;) {
2486				int l;
2487
2488				l = copy_len;
2489				if (copy_pos > w_pos) {
2490					if (l > w_size - copy_pos)
2491						l = w_size - copy_pos;
2492				} else {
2493					if (l > w_size - w_pos)
2494						l = w_size - w_pos;
2495				}
2496				if ((copy_pos + l < w_pos)
2497				    || (w_pos + l < copy_pos)) {
2498					/* No overlap. */
2499					memcpy(w_buff + w_pos,
2500					    w_buff + copy_pos, l);
2501				} else {
2502					const unsigned char *s;
2503					unsigned char *d;
2504					int li;
2505
2506					d = w_buff + w_pos;
2507					s = w_buff + copy_pos;
2508					for (li = 0; li < l-1;) {
2509						d[li] = s[li];li++;
2510						d[li] = s[li];li++;
2511					}
2512					if (li < l)
2513						d[li] = s[li];
2514				}
2515				w_pos += l;
2516				if (w_pos == w_size) {
2517					w_pos = 0;
2518					lzh_emit_window(strm, w_size);
2519					if (copy_len <= l)
2520						state = ST_GET_LITERAL;
2521					else {
2522						state = ST_COPY_DATA;
2523						ds->copy_len = copy_len - l;
2524						ds->copy_pos =
2525						    (copy_pos + l) & w_mask;
2526					}
2527					goto next_data;
2528				}
2529				if (copy_len <= l)
2530					/* A copy of current pattern ended. */
2531					break;
2532				copy_len -= l;
2533				copy_pos = (copy_pos + l) & w_mask;
2534			}
2535			state = ST_GET_LITERAL;
2536			break;
2537		}
2538	}
2539failed:
2540	return (ds->error = ARCHIVE_FAILED);
2541next_data:
2542	ds->br = bre;
2543	ds->blocks_avail = blocks_avail;
2544	ds->state = state;
2545	ds->w_pos = w_pos;
2546	return (ARCHIVE_OK);
2547}
2548
2549static int
2550lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2551{
2552	int bits;
2553
2554	if (hf->bitlen == NULL) {
2555		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2556		if (hf->bitlen == NULL)
2557			return (ARCHIVE_FATAL);
2558	}
2559	if (hf->tbl == NULL) {
2560		if (tbl_bits < HTBL_BITS)
2561			bits = tbl_bits;
2562		else
2563			bits = HTBL_BITS;
2564		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2565		if (hf->tbl == NULL)
2566			return (ARCHIVE_FATAL);
2567	}
2568	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2569		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2570		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2571		if (hf->tree == NULL)
2572			return (ARCHIVE_FATAL);
2573	}
2574	hf->len_size = (int)len_size;
2575	hf->tbl_bits = tbl_bits;
2576	return (ARCHIVE_OK);
2577}
2578
2579static void
2580lzh_huffman_free(struct huffman *hf)
2581{
2582	free(hf->bitlen);
2583	free(hf->tbl);
2584	free(hf->tree);
2585}
2586
2587static const char bitlen_tbl[0x400] = {
2588	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2589	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2590	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2591	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2592	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2593	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2594	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2595	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2596	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2597	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2598	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2599	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2600	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2601	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2602	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2603	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2604	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2605	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2606	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2607	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2608	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2609	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2610	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2611	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2612	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2613	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2614	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2615	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2616	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2617	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2618	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2619	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2620	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2621	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2622	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2623	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2624	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2625	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2626	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2627	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2628	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2629	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2630	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2631	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2632	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2633	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2634	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2635	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2636	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2637	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2638	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2639	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2640	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2641	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2642	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2643	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2644	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2645	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2646	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2647	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2648	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2649	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2650	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2651	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2652};
2653static int
2654lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2655{
2656	struct lzh_dec *ds = strm->ds;
2657	struct lzh_br *br = &(ds->br);
2658	int c, i;
2659
2660	for (i = start; i < end; ) {
2661		/*
2662		 *  bit pattern     the number we need
2663		 *     000           ->  0
2664		 *     001           ->  1
2665		 *     010           ->  2
2666		 *     ...
2667		 *     110           ->  6
2668		 *     1110          ->  7
2669		 *     11110         ->  8
2670		 *     ...
2671		 *     1111111111110 ->  16
2672		 */
2673		if (!lzh_br_read_ahead(strm, br, 3))
2674			return (i);
2675		if ((c = lzh_br_bits(br, 3)) == 7) {
2676			if (!lzh_br_read_ahead(strm, br, 13))
2677				return (i);
2678			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2679			if (c)
2680				lzh_br_consume(br, c - 3);
2681			else
2682				return (-1);/* Invalid data. */
2683		} else
2684			lzh_br_consume(br, 3);
2685		ds->pt.bitlen[i++] = c;
2686		ds->pt.freq[c]++;
2687	}
2688	return (i);
2689}
2690
2691static int
2692lzh_make_fake_table(struct huffman *hf, uint16_t c)
2693{
2694	if (c >= hf->len_size)
2695		return (0);
2696	hf->tbl[0] = c;
2697	hf->max_bits = 0;
2698	hf->shift_bits = 0;
2699	hf->bitlen[hf->tbl[0]] = 0;
2700	return (1);
2701}
2702
2703/*
2704 * Make a huffman coding table.
2705 */
2706static int
2707lzh_make_huffman_table(struct huffman *hf)
2708{
2709	uint16_t *tbl;
2710	const unsigned char *bitlen;
2711	int bitptn[17], weight[17];
2712	int i, maxbits = 0, ptn, tbl_size, w;
2713	int diffbits, len_avail;
2714
2715	/*
2716	 * Initialize bit patterns.
2717	 */
2718	ptn = 0;
2719	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2720		bitptn[i] = ptn;
2721		weight[i] = w;
2722		if (hf->freq[i]) {
2723			ptn += hf->freq[i] * w;
2724			maxbits = i;
2725		}
2726	}
2727	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2728		return (0);/* Invalid */
2729
2730	hf->max_bits = maxbits;
2731
2732	/*
2733	 * Cut out extra bits which we won't house in the table.
2734	 * This preparation reduces the same calculation in the for-loop
2735	 * making the table.
2736	 */
2737	if (maxbits < 16) {
2738		int ebits = 16 - maxbits;
2739		for (i = 1; i <= maxbits; i++) {
2740			bitptn[i] >>= ebits;
2741			weight[i] >>= ebits;
2742		}
2743	}
2744	if (maxbits > HTBL_BITS) {
2745		unsigned htbl_max;
2746		uint16_t *p;
2747
2748		diffbits = maxbits - HTBL_BITS;
2749		for (i = 1; i <= HTBL_BITS; i++) {
2750			bitptn[i] >>= diffbits;
2751			weight[i] >>= diffbits;
2752		}
2753		htbl_max = bitptn[HTBL_BITS] +
2754		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2755		p = &(hf->tbl[htbl_max]);
2756		while (p < &hf->tbl[1U<<HTBL_BITS])
2757			*p++ = 0;
2758	} else
2759		diffbits = 0;
2760	hf->shift_bits = diffbits;
2761
2762	/*
2763	 * Make the table.
2764	 */
2765	tbl_size = 1 << HTBL_BITS;
2766	tbl = hf->tbl;
2767	bitlen = hf->bitlen;
2768	len_avail = hf->len_avail;
2769	hf->tree_used = 0;
2770	for (i = 0; i < len_avail; i++) {
2771		uint16_t *p;
2772		int len, cnt;
2773		uint16_t bit;
2774		int extlen;
2775		struct htree_t *ht;
2776
2777		if (bitlen[i] == 0)
2778			continue;
2779		/* Get a bit pattern */
2780		len = bitlen[i];
2781		ptn = bitptn[len];
2782		cnt = weight[len];
2783		if (len <= HTBL_BITS) {
2784			/* Calculate next bit pattern */
2785			if ((bitptn[len] = ptn + cnt) > tbl_size)
2786				return (0);/* Invalid */
2787			/* Update the table */
2788			p = &(tbl[ptn]);
2789			if (cnt > 7) {
2790				uint16_t *pc;
2791
2792				cnt -= 8;
2793				pc = &p[cnt];
2794				pc[0] = (uint16_t)i;
2795				pc[1] = (uint16_t)i;
2796				pc[2] = (uint16_t)i;
2797				pc[3] = (uint16_t)i;
2798				pc[4] = (uint16_t)i;
2799				pc[5] = (uint16_t)i;
2800				pc[6] = (uint16_t)i;
2801				pc[7] = (uint16_t)i;
2802				if (cnt > 7) {
2803					cnt -= 8;
2804					memcpy(&p[cnt], pc,
2805						8 * sizeof(uint16_t));
2806					pc = &p[cnt];
2807					while (cnt > 15) {
2808						cnt -= 16;
2809						memcpy(&p[cnt], pc,
2810							16 * sizeof(uint16_t));
2811					}
2812				}
2813				if (cnt)
2814					memcpy(p, pc, cnt * sizeof(uint16_t));
2815			} else {
2816				while (cnt > 1) {
2817					p[--cnt] = (uint16_t)i;
2818					p[--cnt] = (uint16_t)i;
2819				}
2820				if (cnt)
2821					p[--cnt] = (uint16_t)i;
2822			}
2823			continue;
2824		}
2825
2826		/*
2827		 * A bit length is too big to be housed to a direct table,
2828		 * so we use a tree model for its extra bits.
2829		 */
2830		bitptn[len] = ptn + cnt;
2831		bit = 1U << (diffbits -1);
2832		extlen = len - HTBL_BITS;
2833
2834		p = &(tbl[ptn >> diffbits]);
2835		if (*p == 0) {
2836			*p = len_avail + hf->tree_used;
2837			ht = &(hf->tree[hf->tree_used++]);
2838			if (hf->tree_used > hf->tree_avail)
2839				return (0);/* Invalid */
2840			ht->left = 0;
2841			ht->right = 0;
2842		} else {
2843			if (*p < len_avail ||
2844			    *p >= (len_avail + hf->tree_used))
2845				return (0);/* Invalid */
2846			ht = &(hf->tree[*p - len_avail]);
2847		}
2848		while (--extlen > 0) {
2849			if (ptn & bit) {
2850				if (ht->left < len_avail) {
2851					ht->left = len_avail + hf->tree_used;
2852					ht = &(hf->tree[hf->tree_used++]);
2853					if (hf->tree_used > hf->tree_avail)
2854						return (0);/* Invalid */
2855					ht->left = 0;
2856					ht->right = 0;
2857				} else {
2858					ht = &(hf->tree[ht->left - len_avail]);
2859				}
2860			} else {
2861				if (ht->right < len_avail) {
2862					ht->right = len_avail + hf->tree_used;
2863					ht = &(hf->tree[hf->tree_used++]);
2864					if (hf->tree_used > hf->tree_avail)
2865						return (0);/* Invalid */
2866					ht->left = 0;
2867					ht->right = 0;
2868				} else {
2869					ht = &(hf->tree[ht->right - len_avail]);
2870				}
2871			}
2872			bit >>= 1;
2873		}
2874		if (ptn & bit) {
2875			if (ht->left != 0)
2876				return (0);/* Invalid */
2877			ht->left = (uint16_t)i;
2878		} else {
2879			if (ht->right != 0)
2880				return (0);/* Invalid */
2881			ht->right = (uint16_t)i;
2882		}
2883	}
2884	return (1);
2885}
2886
2887static int
2888lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2889{
2890	struct htree_t *ht;
2891	int extlen;
2892
2893	ht = hf->tree;
2894	extlen = hf->shift_bits;
2895	while (c >= hf->len_avail) {
2896		c -= hf->len_avail;
2897		if (extlen-- <= 0 || c >= hf->tree_used)
2898			return (0);
2899		if (rbits & (1U << extlen))
2900			c = ht[c].left;
2901		else
2902			c = ht[c].right;
2903	}
2904	return (c);
2905}
2906
2907static inline int
2908lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2909{
2910	int c;
2911	/*
2912	 * At first search an index table for a bit pattern.
2913	 * If it fails, search a huffman tree for.
2914	 */
2915	c = hf->tbl[rbits >> hf->shift_bits];
2916	if (c < hf->len_avail || hf->len_avail == 0)
2917		return (c);
2918	/* This bit pattern needs to be found out at a huffman tree. */
2919	return (lzh_decode_huffman_tree(hf, rbits, c));
2920}
2921
2922