archive_read_support_format_lha.c revision 358090
1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH		256	/* Maximum match length. */
50#define MINMATCH		3	/* Minimum match length. */
51/*
52 * Literal table format:
53 * +0              +256                      +510
54 * +---------------+-------------------------+
55 * | literal code  |       match length      |
56 * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57 * +---------------+-------------------------+
58 *  <---          LT_BITLEN_SIZE         --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE		(3 + 16)
65
66struct lzh_dec {
67	/* Decoding status. */
68	int     		 state;
69
70	/*
71	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72	 * data.
73	 */
74	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	/* dirname and filename could be in different codepages */
179	struct archive_string_conv *sconv_dir;
180	struct archive_string_conv *sconv_fname;
181	struct archive_string_conv *opt_sconv;
182
183	struct archive_string 	 dirname;
184	struct archive_string 	 filename;
185	struct archive_wstring	 ws;
186
187	unsigned char		 dos_attr;
188
189	/* Flag to mark progress that an archive was read their first header.*/
190	char			 found_first_header;
191	/* Flag to mark that indicates an empty directory. */
192	char			 directory;
193
194	/* Flags to mark progress of decompression. */
195	char			 decompress_init;
196	char			 end_of_entry;
197	char			 end_of_entry_cleanup;
198	char			 entry_is_compressed;
199
200	char			 format_name[64];
201
202	struct lzh_stream	 strm;
203};
204
205/*
206 * LHA header common member offset.
207 */
208#define H_METHOD_OFFSET	2	/* Compress type. */
209#define H_ATTR_OFFSET	19	/* DOS attribute. */
210#define H_LEVEL_OFFSET	20	/* Header Level.  */
211#define H_SIZE		22	/* Minimum header size. */
212
213static int      archive_read_format_lha_bid(struct archive_read *, int);
214static int      archive_read_format_lha_options(struct archive_read *,
215		    const char *, const char *);
216static int	archive_read_format_lha_read_header(struct archive_read *,
217		    struct archive_entry *);
218static int	archive_read_format_lha_read_data(struct archive_read *,
219		    const void **, size_t *, int64_t *);
220static int	archive_read_format_lha_read_data_skip(struct archive_read *);
221static int	archive_read_format_lha_cleanup(struct archive_read *);
222
223static void	lha_replace_path_separator(struct lha *,
224		    struct archive_entry *);
225static int	lha_read_file_header_0(struct archive_read *, struct lha *);
226static int	lha_read_file_header_1(struct archive_read *, struct lha *);
227static int	lha_read_file_header_2(struct archive_read *, struct lha *);
228static int	lha_read_file_header_3(struct archive_read *, struct lha *);
229static int	lha_read_file_extended_header(struct archive_read *,
230		    struct lha *, uint16_t *, int, size_t, size_t *);
231static size_t	lha_check_header_format(const void *);
232static int	lha_skip_sfx(struct archive_read *);
233static time_t	lha_dos_time(const unsigned char *);
234static time_t	lha_win_time(uint64_t, long *);
235static unsigned char	lha_calcsum(unsigned char, const void *,
236		    int, size_t);
237static int	lha_parse_linkname(struct archive_wstring *,
238		    struct archive_wstring *);
239static int	lha_read_data_none(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static int	lha_read_data_lzh(struct archive_read *, const void **,
242		    size_t *, int64_t *);
243static void	lha_crc16_init(void);
244static uint16_t lha_crc16(uint16_t, const void *, size_t);
245static int	lzh_decode_init(struct lzh_stream *, const char *);
246static void	lzh_decode_free(struct lzh_stream *);
247static int	lzh_decode(struct lzh_stream *, int);
248static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
249static int	lzh_huffman_init(struct huffman *, size_t, int);
250static void	lzh_huffman_free(struct huffman *);
251static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
252static int	lzh_make_fake_table(struct huffman *, uint16_t);
253static int	lzh_make_huffman_table(struct huffman *);
254static inline int lzh_decode_huffman(struct huffman *, unsigned);
255static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
256
257
258int
259archive_read_support_format_lha(struct archive *_a)
260{
261	struct archive_read *a = (struct archive_read *)_a;
262	struct lha *lha;
263	int r;
264
265	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
266	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
267
268	lha = (struct lha *)calloc(1, sizeof(*lha));
269	if (lha == NULL) {
270		archive_set_error(&a->archive, ENOMEM,
271		    "Can't allocate lha data");
272		return (ARCHIVE_FATAL);
273	}
274	archive_string_init(&lha->ws);
275
276	r = __archive_read_register_format(a,
277	    lha,
278	    "lha",
279	    archive_read_format_lha_bid,
280	    archive_read_format_lha_options,
281	    archive_read_format_lha_read_header,
282	    archive_read_format_lha_read_data,
283	    archive_read_format_lha_read_data_skip,
284	    NULL,
285	    archive_read_format_lha_cleanup,
286	    NULL,
287	    NULL);
288
289	if (r != ARCHIVE_OK)
290		free(lha);
291	return (ARCHIVE_OK);
292}
293
294static size_t
295lha_check_header_format(const void *h)
296{
297	const unsigned char *p = h;
298	size_t next_skip_bytes;
299
300	switch (p[H_METHOD_OFFSET+3]) {
301	/*
302	 * "-lh0-" ... "-lh7-" "-lhd-"
303	 * "-lzs-" "-lz5-"
304	 */
305	case '0': case '1': case '2': case '3':
306	case '4': case '5': case '6': case '7':
307	case 'd':
308	case 's':
309		next_skip_bytes = 4;
310
311		/* b0 == 0 means the end of an LHa archive file.	*/
312		if (p[0] == 0)
313			break;
314		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
315		    ||  p[H_METHOD_OFFSET+4] != '-')
316			break;
317
318		if (p[H_METHOD_OFFSET+2] == 'h') {
319			/* "-lh?-" */
320			if (p[H_METHOD_OFFSET+3] == 's')
321				break;
322			if (p[H_LEVEL_OFFSET] == 0)
323				return (0);
324			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325				return (0);
326		}
327		if (p[H_METHOD_OFFSET+2] == 'z') {
328			/* LArc extensions: -lzs-,-lz4- and -lz5- */
329			if (p[H_LEVEL_OFFSET] != 0)
330				break;
331			if (p[H_METHOD_OFFSET+3] == 's'
332			    || p[H_METHOD_OFFSET+3] == '4'
333			    || p[H_METHOD_OFFSET+3] == '5')
334				return (0);
335		}
336		break;
337	case 'h': next_skip_bytes = 1; break;
338	case 'z': next_skip_bytes = 1; break;
339	case 'l': next_skip_bytes = 2; break;
340	case '-': next_skip_bytes = 3; break;
341	default : next_skip_bytes = 4; break;
342	}
343
344	return (next_skip_bytes);
345}
346
347static int
348archive_read_format_lha_bid(struct archive_read *a, int best_bid)
349{
350	const char *p;
351	const void *buff;
352	ssize_t bytes_avail, offset, window;
353	size_t next;
354
355	/* If there's already a better bid than we can ever
356	   make, don't bother testing. */
357	if (best_bid > 30)
358		return (-1);
359
360	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361		return (-1);
362
363	if (lha_check_header_format(p) == 0)
364		return (30);
365
366	if (p[0] == 'M' && p[1] == 'Z') {
367		/* PE file */
368		offset = 0;
369		window = 4096;
370		while (offset < (1024 * 20)) {
371			buff = __archive_read_ahead(a, offset + window,
372			    &bytes_avail);
373			if (buff == NULL) {
374				/* Remaining bytes are less than window. */
375				window >>= 1;
376				if (window < (H_SIZE + 3))
377					return (0);
378				continue;
379			}
380			p = (const char *)buff + offset;
381			while (p + H_SIZE < (const char *)buff + bytes_avail) {
382				if ((next = lha_check_header_format(p)) == 0)
383					return (30);
384				p += next;
385			}
386			offset = p - (const char *)buff;
387		}
388	}
389	return (0);
390}
391
392static int
393archive_read_format_lha_options(struct archive_read *a,
394    const char *key, const char *val)
395{
396	struct lha *lha;
397	int ret = ARCHIVE_FAILED;
398
399	lha = (struct lha *)(a->format->data);
400	if (strcmp(key, "hdrcharset")  == 0) {
401		if (val == NULL || val[0] == 0)
402			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
403			    "lha: hdrcharset option needs a character-set name");
404		else {
405			lha->opt_sconv =
406			    archive_string_conversion_from_charset(
407				&a->archive, val, 0);
408			if (lha->opt_sconv != NULL)
409				ret = ARCHIVE_OK;
410			else
411				ret = ARCHIVE_FATAL;
412		}
413		return (ret);
414	}
415
416	/* Note: The "warn" return is just to inform the options
417	 * supervisor that we didn't handle it.  It will generate
418	 * a suitable error if no one used this option. */
419	return (ARCHIVE_WARN);
420}
421
422static int
423lha_skip_sfx(struct archive_read *a)
424{
425	const void *h;
426	const char *p, *q;
427	size_t next, skip;
428	ssize_t bytes, window;
429
430	window = 4096;
431	for (;;) {
432		h = __archive_read_ahead(a, window, &bytes);
433		if (h == NULL) {
434			/* Remaining bytes are less than window. */
435			window >>= 1;
436			if (window < (H_SIZE + 3))
437				goto fatal;
438			continue;
439		}
440		if (bytes < H_SIZE)
441			goto fatal;
442		p = h;
443		q = p + bytes;
444
445		/*
446		 * Scan ahead until we find something that looks
447		 * like the lha header.
448		 */
449		while (p + H_SIZE < q) {
450			if ((next = lha_check_header_format(p)) == 0) {
451				skip = p - (const char *)h;
452				__archive_read_consume(a, skip);
453				return (ARCHIVE_OK);
454			}
455			p += next;
456		}
457		skip = p - (const char *)h;
458		__archive_read_consume(a, skip);
459	}
460fatal:
461	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
462	    "Couldn't find out LHa header");
463	return (ARCHIVE_FATAL);
464}
465
466static int
467truncated_error(struct archive_read *a)
468{
469	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
470	    "Truncated LHa header");
471	return (ARCHIVE_FATAL);
472}
473
474static int
475archive_read_format_lha_read_header(struct archive_read *a,
476    struct archive_entry *entry)
477{
478	struct archive_wstring linkname;
479	struct archive_wstring pathname;
480	struct lha *lha;
481	const unsigned char *p;
482	const char *signature;
483	int err;
484	struct archive_mstring conv_buffer;
485	const wchar_t *conv_buffer_p;
486
487	lha_crc16_init();
488
489	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
490	if (a->archive.archive_format_name == NULL)
491		a->archive.archive_format_name = "lha";
492
493	lha = (struct lha *)(a->format->data);
494	lha->decompress_init = 0;
495	lha->end_of_entry = 0;
496	lha->end_of_entry_cleanup = 0;
497	lha->entry_unconsumed = 0;
498
499	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
500		/*
501		 * LHa archiver added 0 to the tail of its archive file as
502		 * the mark of the end of the archive.
503		 */
504		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
505		if (signature == NULL || signature[0] == 0)
506			return (ARCHIVE_EOF);
507		return (truncated_error(a));
508	}
509
510	signature = (const char *)p;
511	if (lha->found_first_header == 0 &&
512	    signature[0] == 'M' && signature[1] == 'Z') {
513                /* This is an executable?  Must be self-extracting... 	*/
514		err = lha_skip_sfx(a);
515		if (err < ARCHIVE_WARN)
516			return (err);
517
518		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
519			return (truncated_error(a));
520		signature = (const char *)p;
521	}
522	/* signature[0] == 0 means the end of an LHa archive file. */
523	if (signature[0] == 0)
524		return (ARCHIVE_EOF);
525
526	/*
527	 * Check the header format and method type.
528	 */
529	if (lha_check_header_format(p) != 0) {
530		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
531		    "Bad LHa file");
532		return (ARCHIVE_FATAL);
533	}
534
535	/* We've found the first header. */
536	lha->found_first_header = 1;
537	/* Set a default value and common data */
538	lha->header_size = 0;
539	lha->level = p[H_LEVEL_OFFSET];
540	lha->method[0] = p[H_METHOD_OFFSET+1];
541	lha->method[1] = p[H_METHOD_OFFSET+2];
542	lha->method[2] = p[H_METHOD_OFFSET+3];
543	if (memcmp(lha->method, "lhd", 3) == 0)
544		lha->directory = 1;
545	else
546		lha->directory = 0;
547	if (memcmp(lha->method, "lh0", 3) == 0 ||
548	    memcmp(lha->method, "lz4", 3) == 0)
549		lha->entry_is_compressed = 0;
550	else
551		lha->entry_is_compressed = 1;
552
553	lha->compsize = 0;
554	lha->origsize = 0;
555	lha->setflag = 0;
556	lha->birthtime = 0;
557	lha->birthtime_tv_nsec = 0;
558	lha->mtime = 0;
559	lha->mtime_tv_nsec = 0;
560	lha->atime = 0;
561	lha->atime_tv_nsec = 0;
562	lha->mode = (lha->directory)? 0777 : 0666;
563	lha->uid = 0;
564	lha->gid = 0;
565	archive_string_empty(&lha->dirname);
566	archive_string_empty(&lha->filename);
567	lha->dos_attr = 0;
568	if (lha->opt_sconv != NULL) {
569		lha->sconv_dir = lha->opt_sconv;
570		lha->sconv_fname = lha->opt_sconv;
571	} else {
572		lha->sconv_dir = NULL;
573		lha->sconv_fname = NULL;
574	}
575
576	switch (p[H_LEVEL_OFFSET]) {
577	case 0:
578		err = lha_read_file_header_0(a, lha);
579		break;
580	case 1:
581		err = lha_read_file_header_1(a, lha);
582		break;
583	case 2:
584		err = lha_read_file_header_2(a, lha);
585		break;
586	case 3:
587		err = lha_read_file_header_3(a, lha);
588		break;
589	default:
590		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
591		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
592		err = ARCHIVE_FATAL;
593		break;
594	}
595	if (err < ARCHIVE_WARN)
596		return (err);
597
598
599	if (!lha->directory && archive_strlen(&lha->filename) == 0)
600		/* The filename has not been set */
601		return (truncated_error(a));
602
603	/*
604	 * Make a pathname from a dirname and a filename, after converting to Unicode.
605	 * This is because codepages might differ between dirname and filename.
606	*/
607	archive_string_init(&pathname);
608	archive_string_init(&linkname);
609	archive_string_init(&conv_buffer.aes_mbs);
610	archive_string_init(&conv_buffer.aes_mbs_in_locale);
611	archive_string_init(&conv_buffer.aes_utf8);
612	archive_string_init(&conv_buffer.aes_wcs);
613	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) {
614		archive_set_error(&a->archive,
615			ARCHIVE_ERRNO_FILE_FORMAT,
616			"Pathname cannot be converted "
617			"from %s to Unicode.",
618			archive_string_conversion_charset_name(lha->sconv_dir));
619		err = ARCHIVE_FATAL;
620	} else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
621		err = ARCHIVE_FATAL;
622	if (err == ARCHIVE_FATAL) {
623		archive_mstring_clean(&conv_buffer);
624		archive_wstring_free(&pathname);
625		archive_wstring_free(&linkname);
626		return (err);
627	}
628	archive_wstring_copy(&pathname, &conv_buffer.aes_wcs);
629
630	archive_string_empty(&conv_buffer.aes_mbs);
631	archive_string_empty(&conv_buffer.aes_mbs_in_locale);
632	archive_string_empty(&conv_buffer.aes_utf8);
633	archive_wstring_empty(&conv_buffer.aes_wcs);
634	if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) {
635		archive_set_error(&a->archive,
636			ARCHIVE_ERRNO_FILE_FORMAT,
637			"Pathname cannot be converted "
638			"from %s to Unicode.",
639			archive_string_conversion_charset_name(lha->sconv_fname));
640		err = ARCHIVE_FATAL;
641	}
642	else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p))
643		err = ARCHIVE_FATAL;
644	if (err == ARCHIVE_FATAL) {
645		archive_mstring_clean(&conv_buffer);
646		archive_wstring_free(&pathname);
647		archive_wstring_free(&linkname);
648		return (err);
649	}
650	archive_wstring_concat(&pathname, &conv_buffer.aes_wcs);
651	archive_mstring_clean(&conv_buffer);
652
653	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
654		/*
655	 	 * Extract the symlink-name if it's included in the pathname.
656	 	 */
657		if (!lha_parse_linkname(&linkname, &pathname)) {
658			/* We couldn't get the symlink-name. */
659			archive_set_error(&a->archive,
660		    	    ARCHIVE_ERRNO_FILE_FORMAT,
661			    "Unknown symlink-name");
662			archive_wstring_free(&pathname);
663			archive_wstring_free(&linkname);
664			return (ARCHIVE_FAILED);
665		}
666	} else {
667		/*
668		 * Make sure a file-type is set.
669		 * The mode has been overridden if it is in the extended data.
670		 */
671		lha->mode = (lha->mode & ~AE_IFMT) |
672		    ((lha->directory)? AE_IFDIR: AE_IFREG);
673	}
674	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
675	    (lha->dos_attr & 1) != 0)
676		lha->mode &= ~(0222);/* read only. */
677
678	/*
679	 * Set basic file parameters.
680	 */
681	archive_entry_copy_pathname_w(entry, pathname.s);
682	archive_wstring_free(&pathname);
683	if (archive_strlen(&linkname) > 0) {
684		archive_entry_copy_symlink_w(entry, linkname.s);
685	} else
686		archive_entry_set_symlink(entry, NULL);
687	archive_wstring_free(&linkname);
688	/*
689	 * When a header level is 0, there is a possibility that
690	 * a pathname and a symlink has '\' character, a directory
691	 * separator in DOS/Windows. So we should convert it to '/'.
692	 */
693	if (p[H_LEVEL_OFFSET] == 0)
694		lha_replace_path_separator(lha, entry);
695
696	archive_entry_set_mode(entry, lha->mode);
697	archive_entry_set_uid(entry, lha->uid);
698	archive_entry_set_gid(entry, lha->gid);
699	if (archive_strlen(&lha->uname) > 0)
700		archive_entry_set_uname(entry, lha->uname.s);
701	if (archive_strlen(&lha->gname) > 0)
702		archive_entry_set_gname(entry, lha->gname.s);
703	if (lha->setflag & BIRTHTIME_IS_SET) {
704		archive_entry_set_birthtime(entry, lha->birthtime,
705		    lha->birthtime_tv_nsec);
706		archive_entry_set_ctime(entry, lha->birthtime,
707		    lha->birthtime_tv_nsec);
708	} else {
709		archive_entry_unset_birthtime(entry);
710		archive_entry_unset_ctime(entry);
711	}
712	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
713	if (lha->setflag & ATIME_IS_SET)
714		archive_entry_set_atime(entry, lha->atime,
715		    lha->atime_tv_nsec);
716	else
717		archive_entry_unset_atime(entry);
718	if (lha->directory || archive_entry_symlink(entry) != NULL)
719		archive_entry_unset_size(entry);
720	else
721		archive_entry_set_size(entry, lha->origsize);
722
723	/*
724	 * Prepare variables used to read a file content.
725	 */
726	lha->entry_bytes_remaining = lha->compsize;
727	if (lha->entry_bytes_remaining < 0) {
728		archive_set_error(&a->archive,
729		    ARCHIVE_ERRNO_FILE_FORMAT,
730		    "Invalid LHa entry size");
731		return (ARCHIVE_FATAL);
732	}
733	lha->entry_offset = 0;
734	lha->entry_crc_calculated = 0;
735
736	/*
737	 * This file does not have a content.
738	 */
739	if (lha->directory || lha->compsize == 0)
740		lha->end_of_entry = 1;
741
742	sprintf(lha->format_name, "lha -%c%c%c-",
743	    lha->method[0], lha->method[1], lha->method[2]);
744	a->archive.archive_format_name = lha->format_name;
745
746	return (err);
747}
748
749/*
750 * Replace a DOS path separator '\' by a character '/'.
751 * Some multi-byte character set have  a character '\' in its second byte.
752 */
753static void
754lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
755{
756	const wchar_t *wp;
757	size_t i;
758
759	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
760		archive_wstrcpy(&(lha->ws), wp);
761		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
762			if (lha->ws.s[i] == L'\\')
763				lha->ws.s[i] = L'/';
764		}
765		archive_entry_copy_pathname_w(entry, lha->ws.s);
766	}
767
768	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
769		archive_wstrcpy(&(lha->ws), wp);
770		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
771			if (lha->ws.s[i] == L'\\')
772				lha->ws.s[i] = L'/';
773		}
774		archive_entry_copy_symlink_w(entry, lha->ws.s);
775	}
776}
777
778/*
779 * Header 0 format
780 *
781 * +0              +1         +2               +7                  +11
782 * +---------------+----------+----------------+-------------------+
783 * |header size(*1)|header sum|compression type|compressed size(*2)|
784 * +---------------+----------+----------------+-------------------+
785 *                             <---------------------(*1)----------*
786 *
787 * +11               +15       +17       +19            +20              +21
788 * +-----------------+---------+---------+--------------+----------------+
789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
790 * +-----------------+---------+---------+--------------+----------------+
791 * *--------------------------------(*1)---------------------------------*
792 *
793 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
794 * +---------------+---------+----------+----------------+------------------+
795 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
796 * +---------------+---------+----------+----------------+------------------+
797 *                  <--(*3)->                             <------(*2)------>
798 * *----------------------(*1)-------------------------->
799 *
800 */
801#define H0_HEADER_SIZE_OFFSET	0
802#define H0_HEADER_SUM_OFFSET	1
803#define H0_COMP_SIZE_OFFSET	7
804#define H0_ORIG_SIZE_OFFSET	11
805#define H0_DOS_TIME_OFFSET	15
806#define H0_NAME_LEN_OFFSET	21
807#define H0_FILE_NAME_OFFSET	22
808#define H0_FIXED_SIZE		24
809static int
810lha_read_file_header_0(struct archive_read *a, struct lha *lha)
811{
812	const unsigned char *p;
813	int extdsize, namelen;
814	unsigned char headersum, sum_calculated;
815
816	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
817		return (truncated_error(a));
818	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
819	headersum = p[H0_HEADER_SUM_OFFSET];
820	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
821	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
822	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
823	namelen = p[H0_NAME_LEN_OFFSET];
824	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
825	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
826		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
827		    "Invalid LHa header");
828		return (ARCHIVE_FATAL);
829	}
830	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
831		return (truncated_error(a));
832
833	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
834	/* When extdsize == -2, A CRC16 value is not present in the header. */
835	if (extdsize >= 0) {
836		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
837		lha->setflag |= CRC_IS_SET;
838	}
839	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
840
841	/* Read an extended header */
842	if (extdsize > 0) {
843		/* This extended data is set by 'LHa for UNIX' only.
844		 * Maybe fixed size.
845		 */
846		p += H0_FILE_NAME_OFFSET + namelen + 2;
847		if (p[0] == 'U' && extdsize == 12) {
848			/* p[1] is a minor version. */
849			lha->mtime = archive_le32dec(&p[2]);
850			lha->mode = archive_le16dec(&p[6]);
851			lha->uid = archive_le16dec(&p[8]);
852			lha->gid = archive_le16dec(&p[10]);
853			lha->setflag |= UNIX_MODE_IS_SET;
854		}
855	}
856	__archive_read_consume(a, lha->header_size);
857
858	if (sum_calculated != headersum) {
859		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
860		    "LHa header sum error");
861		return (ARCHIVE_FATAL);
862	}
863
864	return (ARCHIVE_OK);
865}
866
867/*
868 * Header 1 format
869 *
870 * +0              +1         +2               +7            +11
871 * +---------------+----------+----------------+-------------+
872 * |header size(*1)|header sum|compression type|skip size(*2)|
873 * +---------------+----------+----------------+-------------+
874 *                             <---------------(*1)----------*
875 *
876 * +11               +15       +17       +19            +20              +21
877 * +-----------------+---------+---------+--------------+----------------+
878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
879 * +-----------------+---------+---------+--------------+----------------+
880 * *-------------------------------(*1)----------------------------------*
881 *
882 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
883 * +---------------+---------+----------+-----------+-----------+
884 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
885 * +---------------+---------+----------+-----------+-----------+
886 *                  <--(*3)->
887 * *----------------------------(*1)----------------------------*
888 *
889 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
890 * +----------------+---------------------+------------------------+
891 * |next header size| extended header(*5) |     compressed data    |
892 * +----------------+---------------------+------------------------+
893 * *------(*1)-----> <--------------------(*2)-------------------->
894 */
895#define H1_HEADER_SIZE_OFFSET	0
896#define H1_HEADER_SUM_OFFSET	1
897#define H1_COMP_SIZE_OFFSET	7
898#define H1_ORIG_SIZE_OFFSET	11
899#define H1_DOS_TIME_OFFSET	15
900#define H1_NAME_LEN_OFFSET	21
901#define H1_FILE_NAME_OFFSET	22
902#define H1_FIXED_SIZE		27
903static int
904lha_read_file_header_1(struct archive_read *a, struct lha *lha)
905{
906	const unsigned char *p;
907	size_t extdsize;
908	int i, err, err2;
909	int namelen, padding;
910	unsigned char headersum, sum_calculated;
911
912	err = ARCHIVE_OK;
913
914	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
915		return (truncated_error(a));
916
917	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
918	headersum = p[H1_HEADER_SUM_OFFSET];
919	/* Note: An extended header size is included in a compsize. */
920	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
921	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
922	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
923	namelen = p[H1_NAME_LEN_OFFSET];
924	/* Calculate a padding size. The result will be normally 0 only(?) */
925	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
926
927	if (namelen > 230 || padding < 0)
928		goto invalid;
929
930	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
931		return (truncated_error(a));
932
933	for (i = 0; i < namelen; i++) {
934		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
935			goto invalid;/* Invalid filename. */
936	}
937	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
938	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
939	lha->setflag |= CRC_IS_SET;
940
941	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
942	/* Consume used bytes but not include `next header size' data
943	 * since it will be consumed in lha_read_file_extended_header(). */
944	__archive_read_consume(a, lha->header_size - 2);
945
946	/* Read extended headers */
947	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
948	    (size_t)(lha->compsize + 2), &extdsize);
949	if (err2 < ARCHIVE_WARN)
950		return (err2);
951	if (err2 < err)
952		err = err2;
953	/* Get a real compressed file size. */
954	lha->compsize -= extdsize - 2;
955
956	if (lha->compsize < 0)
957		goto invalid;	/* Invalid compressed file size */
958
959	if (sum_calculated != headersum) {
960		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
961		    "LHa header sum error");
962		return (ARCHIVE_FATAL);
963	}
964	return (err);
965invalid:
966	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
967	    "Invalid LHa header");
968	return (ARCHIVE_FATAL);
969}
970
971/*
972 * Header 2 format
973 *
974 * +0              +2               +7                  +11               +15
975 * +---------------+----------------+-------------------+-----------------+
976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
977 * +---------------+----------------+-------------------+-----------------+
978 *  <--------------------------------(*1)---------------------------------*
979 *
980 * +15               +19          +20              +21        +23         +24
981 * +-----------------+------------+----------------+----------+-----------+
982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
983 * +-----------------+------------+----------------+----------+-----------+
984 * *---------------------------------(*1)---------------------------------*
985 *
986 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
987 * +----------------+-------------------+-------------+-------------------+
988 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
989 * +----------------+-------------------+-------------+-------------------+
990 * *--------------------------(*1)-------------------> <------(*2)------->
991 *
992 */
993#define H2_HEADER_SIZE_OFFSET	0
994#define H2_COMP_SIZE_OFFSET	7
995#define H2_ORIG_SIZE_OFFSET	11
996#define H2_TIME_OFFSET		15
997#define H2_CRC_OFFSET		21
998#define H2_FIXED_SIZE		24
999static int
1000lha_read_file_header_2(struct archive_read *a, struct lha *lha)
1001{
1002	const unsigned char *p;
1003	size_t extdsize;
1004	int err, padding;
1005	uint16_t header_crc;
1006
1007	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
1008		return (truncated_error(a));
1009
1010	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
1011	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
1012	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
1013	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
1014	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
1015	lha->setflag |= CRC_IS_SET;
1016
1017	if (lha->header_size < H2_FIXED_SIZE) {
1018		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1019		    "Invalid LHa header size");
1020		return (ARCHIVE_FATAL);
1021	}
1022
1023	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1024	__archive_read_consume(a, H2_FIXED_SIZE);
1025
1026	/* Read extended headers */
1027	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1028		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1029	if (err < ARCHIVE_WARN)
1030		return (err);
1031
1032	/* Calculate a padding size. The result will be normally 0 or 1. */
1033	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1034	if (padding > 0) {
1035		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1036			return (truncated_error(a));
1037		header_crc = lha_crc16(header_crc, p, padding);
1038		__archive_read_consume(a, padding);
1039	}
1040
1041	if (header_crc != lha->header_crc) {
1042		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1043		    "LHa header CRC error");
1044		return (ARCHIVE_FATAL);
1045	}
1046	return (err);
1047}
1048
1049/*
1050 * Header 3 format
1051 *
1052 * +0           +2               +7                  +11               +15
1053 * +------------+----------------+-------------------+-----------------+
1054 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1055 * +------------+----------------+-------------------+-----------------+
1056 *  <-------------------------------(*1)-------------------------------*
1057 *
1058 * +15               +19          +20              +21        +23         +24
1059 * +-----------------+------------+----------------+----------+-----------+
1060 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1061 * +-----------------+------------+----------------+----------+-----------+
1062 * *--------------------------------(*1)----------------------------------*
1063 *
1064 * +24             +28              +32                 +32+(*3)
1065 * +---------------+----------------+-------------------+-----------------+
1066 * |header size(*1)|next header size|extended header(*3)| compressed data |
1067 * +---------------+----------------+-------------------+-----------------+
1068 * *------------------------(*1)-----------------------> <------(*2)----->
1069 *
1070 */
1071#define H3_FIELD_LEN_OFFSET	0
1072#define H3_COMP_SIZE_OFFSET	7
1073#define H3_ORIG_SIZE_OFFSET	11
1074#define H3_TIME_OFFSET		15
1075#define H3_CRC_OFFSET		21
1076#define H3_HEADER_SIZE_OFFSET	24
1077#define H3_FIXED_SIZE		28
1078static int
1079lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1080{
1081	const unsigned char *p;
1082	size_t extdsize;
1083	int err;
1084	uint16_t header_crc;
1085
1086	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1087		return (truncated_error(a));
1088
1089	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1090		goto invalid;
1091	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1092	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1093	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1094	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1095	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1096	lha->setflag |= CRC_IS_SET;
1097
1098	if (lha->header_size < H3_FIXED_SIZE + 4)
1099		goto invalid;
1100	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1101	__archive_read_consume(a, H3_FIXED_SIZE);
1102
1103	/* Read extended headers */
1104	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1105		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1106	if (err < ARCHIVE_WARN)
1107		return (err);
1108
1109	if (header_crc != lha->header_crc) {
1110		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1111		    "LHa header CRC error");
1112		return (ARCHIVE_FATAL);
1113	}
1114	return (err);
1115invalid:
1116	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1117	    "Invalid LHa header");
1118	return (ARCHIVE_FATAL);
1119}
1120
1121/*
1122 * Extended header format
1123 *
1124 * +0             +2        +3  -- used in header 1 and 2
1125 * +0             +4        +5  -- used in header 3
1126 * +--------------+---------+-------------------+--------------+--
1127 * |ex-header size|header id|        data       |ex-header size| .......
1128 * +--------------+---------+-------------------+--------------+--
1129 *  <-------------( ex-header size)------------> <-- next extended header --*
1130 *
1131 * If the ex-header size is zero, it is the make of the end of extended
1132 * headers.
1133 *
1134 */
1135static int
1136lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1137    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1138{
1139	const void *h;
1140	const unsigned char *extdheader;
1141	size_t	extdsize;
1142	size_t	datasize;
1143	unsigned int i;
1144	unsigned char extdtype;
1145
1146#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1147#define EXT_FILENAME		0x01		/* Filename 		    */
1148#define EXT_DIRECTORY		0x02		/* Directory name	    */
1149#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1150#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1151#define EXT_FILESIZE		0x42		/* Large file size	    */
1152#define EXT_TIMEZONE		0x43		/* Time zone		    */
1153#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1154#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1155#define EXT_CODEPAGE		0x46		/* Codepage		    */
1156#define EXT_UNIX_MODE		0x50		/* File permission	    */
1157#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1158#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1159#define EXT_UNIX_UNAME		0x53		/* User name		    */
1160#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1161#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1162#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1163
1164	*total_size = sizefield_length;
1165
1166	for (;;) {
1167		/* Read an extended header size. */
1168		if ((h =
1169		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1170			return (truncated_error(a));
1171		/* Check if the size is the zero indicates the end of the
1172		 * extended header. */
1173		if (sizefield_length == sizeof(uint16_t))
1174			extdsize = archive_le16dec(h);
1175		else
1176			extdsize = archive_le32dec(h);
1177		if (extdsize == 0) {
1178			/* End of extended header */
1179			if (crc != NULL)
1180				*crc = lha_crc16(*crc, h, sizefield_length);
1181			__archive_read_consume(a, sizefield_length);
1182			return (ARCHIVE_OK);
1183		}
1184
1185		/* Sanity check to the extended header size. */
1186		if (((uint64_t)*total_size + extdsize) >
1187				    (uint64_t)limitsize ||
1188		    extdsize <= (size_t)sizefield_length)
1189			goto invalid;
1190
1191		/* Read the extended header. */
1192		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1193			return (truncated_error(a));
1194		*total_size += extdsize;
1195
1196		extdheader = (const unsigned char *)h;
1197		/* Get the extended header type. */
1198		extdtype = extdheader[sizefield_length];
1199		/* Calculate an extended data size. */
1200		datasize = extdsize - (1 + sizefield_length);
1201		/* Skip an extended header size field and type field. */
1202		extdheader += sizefield_length + 1;
1203
1204		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1205			*crc = lha_crc16(*crc, h, extdsize);
1206		switch (extdtype) {
1207		case EXT_HEADER_CRC:
1208			/* We only use a header CRC. Following data will not
1209			 * be used. */
1210			if (datasize >= 2) {
1211				lha->header_crc = archive_le16dec(extdheader);
1212				if (crc != NULL) {
1213					static const char zeros[2] = {0, 0};
1214					*crc = lha_crc16(*crc, h,
1215					    extdsize - datasize);
1216					/* CRC value itself as zero */
1217					*crc = lha_crc16(*crc, zeros, 2);
1218					*crc = lha_crc16(*crc,
1219					    extdheader+2, datasize - 2);
1220				}
1221			}
1222			break;
1223		case EXT_FILENAME:
1224			if (datasize == 0) {
1225				/* maybe directory header */
1226				archive_string_empty(&lha->filename);
1227				break;
1228			}
1229			if (extdheader[0] == '\0')
1230				goto invalid;
1231			archive_strncpy(&lha->filename,
1232			    (const char *)extdheader, datasize);
1233			break;
1234		case EXT_UTF16_FILENAME:
1235			if (datasize == 0) {
1236				/* maybe directory header */
1237				archive_string_empty(&lha->filename);
1238				break;
1239			} else if (datasize & 1) {
1240				/* UTF-16 characters take always 2 or 4 bytes */
1241				goto invalid;
1242			}
1243			if (extdheader[0] == '\0')
1244				goto invalid;
1245			archive_string_empty(&lha->filename);
1246			archive_array_append(&lha->filename,
1247				(const char *)extdheader, datasize);
1248			/* Setup a string conversion for a filename. */
1249			lha->sconv_fname =
1250			    archive_string_conversion_from_charset(&a->archive,
1251			        "UTF-16LE", 1);
1252			if (lha->sconv_fname == NULL)
1253				return (ARCHIVE_FATAL);
1254			break;
1255		case EXT_DIRECTORY:
1256			if (datasize == 0 || extdheader[0] == '\0')
1257				/* no directory name data. exit this case. */
1258				goto invalid;
1259
1260			archive_strncpy(&lha->dirname,
1261		  	    (const char *)extdheader, datasize);
1262			/*
1263			 * Convert directory delimiter from 0xFF
1264			 * to '/' for local system.
1265	 		 */
1266			for (i = 0; i < lha->dirname.length; i++) {
1267				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1268					lha->dirname.s[i] = '/';
1269			}
1270			/* Is last character directory separator? */
1271			if (lha->dirname.s[lha->dirname.length-1] != '/')
1272				/* invalid directory data */
1273				goto invalid;
1274			break;
1275		case EXT_UTF16_DIRECTORY:
1276			/* UTF-16 characters take always 2 or 4 bytes */
1277			if (datasize == 0 || (datasize & 1) ||
1278			    extdheader[0] == '\0') {
1279				/* no directory name data. exit this case. */
1280				goto invalid;
1281			}
1282
1283			archive_string_empty(&lha->dirname);
1284			archive_array_append(&lha->dirname,
1285				(const char *)extdheader, datasize);
1286			lha->sconv_dir =
1287			    archive_string_conversion_from_charset(&a->archive,
1288			        "UTF-16LE", 1);
1289			if (lha->sconv_dir == NULL)
1290				return (ARCHIVE_FATAL);
1291			else {
1292				/*
1293				 * Convert directory delimiter from 0xFFFF
1294				 * to '/' for local system.
1295				 */
1296				uint16_t dirSep;
1297				uint16_t d = 1;
1298				if (archive_be16dec(&d) == 1)
1299					dirSep = 0x2F00;
1300				else
1301					dirSep = 0x002F;
1302
1303				/* UTF-16LE character */
1304				uint16_t *utf16name =
1305				    (uint16_t *)lha->dirname.s;
1306				for (i = 0; i < lha->dirname.length / 2; i++) {
1307					if (utf16name[i] == 0xFFFF) {
1308						utf16name[i] = dirSep;
1309					}
1310				}
1311				/* Is last character directory separator? */
1312				if (utf16name[lha->dirname.length / 2 - 1] !=
1313				    dirSep) {
1314					/* invalid directory data */
1315					goto invalid;
1316				}
1317			}
1318			break;
1319		case EXT_DOS_ATTR:
1320			if (datasize == 2)
1321				lha->dos_attr = (unsigned char)
1322				    (archive_le16dec(extdheader) & 0xff);
1323			break;
1324		case EXT_TIMESTAMP:
1325			if (datasize == (sizeof(uint64_t) * 3)) {
1326				lha->birthtime = lha_win_time(
1327				    archive_le64dec(extdheader),
1328				    &lha->birthtime_tv_nsec);
1329				extdheader += sizeof(uint64_t);
1330				lha->mtime = lha_win_time(
1331				    archive_le64dec(extdheader),
1332				    &lha->mtime_tv_nsec);
1333				extdheader += sizeof(uint64_t);
1334				lha->atime = lha_win_time(
1335				    archive_le64dec(extdheader),
1336				    &lha->atime_tv_nsec);
1337				lha->setflag |= BIRTHTIME_IS_SET |
1338				    ATIME_IS_SET;
1339			}
1340			break;
1341		case EXT_FILESIZE:
1342			if (datasize == sizeof(uint64_t) * 2) {
1343				lha->compsize = archive_le64dec(extdheader);
1344				extdheader += sizeof(uint64_t);
1345				lha->origsize = archive_le64dec(extdheader);
1346			}
1347			break;
1348		case EXT_CODEPAGE:
1349			/* Get an archived filename charset from codepage.
1350			 * This overwrites the charset specified by
1351			 * hdrcharset option. */
1352			if (datasize == sizeof(uint32_t)) {
1353				struct archive_string cp;
1354				const char *charset;
1355
1356				archive_string_init(&cp);
1357				switch (archive_le32dec(extdheader)) {
1358				case 65001: /* UTF-8 */
1359					charset = "UTF-8";
1360					break;
1361				default:
1362					archive_string_sprintf(&cp, "CP%d",
1363					    (int)archive_le32dec(extdheader));
1364					charset = cp.s;
1365					break;
1366				}
1367				lha->sconv_dir =
1368				    archive_string_conversion_from_charset(
1369					&(a->archive), charset, 1);
1370				lha->sconv_fname =
1371				    archive_string_conversion_from_charset(
1372					&(a->archive), charset, 1);
1373				archive_string_free(&cp);
1374				if (lha->sconv_dir == NULL)
1375					return (ARCHIVE_FATAL);
1376				if (lha->sconv_fname == NULL)
1377					return (ARCHIVE_FATAL);
1378			}
1379			break;
1380		case EXT_UNIX_MODE:
1381			if (datasize == sizeof(uint16_t)) {
1382				lha->mode = archive_le16dec(extdheader);
1383				lha->setflag |= UNIX_MODE_IS_SET;
1384			}
1385			break;
1386		case EXT_UNIX_GID_UID:
1387			if (datasize == (sizeof(uint16_t) * 2)) {
1388				lha->gid = archive_le16dec(extdheader);
1389				lha->uid = archive_le16dec(extdheader+2);
1390			}
1391			break;
1392		case EXT_UNIX_GNAME:
1393			if (datasize > 0)
1394				archive_strncpy(&lha->gname,
1395				    (const char *)extdheader, datasize);
1396			break;
1397		case EXT_UNIX_UNAME:
1398			if (datasize > 0)
1399				archive_strncpy(&lha->uname,
1400				    (const char *)extdheader, datasize);
1401			break;
1402		case EXT_UNIX_MTIME:
1403			if (datasize == sizeof(uint32_t))
1404				lha->mtime = archive_le32dec(extdheader);
1405			break;
1406		case EXT_OS2_NEW_ATTR:
1407			/* This extended header is OS/2 depend. */
1408			if (datasize == 16) {
1409				lha->dos_attr = (unsigned char)
1410				    (archive_le16dec(extdheader) & 0xff);
1411				lha->mode = archive_le16dec(extdheader+2);
1412				lha->gid = archive_le16dec(extdheader+4);
1413				lha->uid = archive_le16dec(extdheader+6);
1414				lha->birthtime = archive_le32dec(extdheader+8);
1415				lha->atime = archive_le32dec(extdheader+12);
1416				lha->setflag |= UNIX_MODE_IS_SET
1417				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1418			}
1419			break;
1420		case EXT_NEW_ATTR:
1421			if (datasize == 20) {
1422				lha->mode = (mode_t)archive_le32dec(extdheader);
1423				lha->gid = archive_le32dec(extdheader+4);
1424				lha->uid = archive_le32dec(extdheader+8);
1425				lha->birthtime = archive_le32dec(extdheader+12);
1426				lha->atime = archive_le32dec(extdheader+16);
1427				lha->setflag |= UNIX_MODE_IS_SET
1428				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1429			}
1430			break;
1431		case EXT_TIMEZONE:		/* Not supported */
1432			break;
1433		default:
1434			break;
1435		}
1436
1437		__archive_read_consume(a, extdsize);
1438	}
1439invalid:
1440	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1441	    "Invalid extended LHa header");
1442	return (ARCHIVE_FATAL);
1443}
1444
1445static int
1446lha_end_of_entry(struct archive_read *a)
1447{
1448	struct lha *lha = (struct lha *)(a->format->data);
1449	int r = ARCHIVE_EOF;
1450
1451	if (!lha->end_of_entry_cleanup) {
1452		if ((lha->setflag & CRC_IS_SET) &&
1453		    lha->crc != lha->entry_crc_calculated) {
1454			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1455			    "LHa data CRC error");
1456			r = ARCHIVE_WARN;
1457		}
1458
1459		/* End-of-entry cleanup done. */
1460		lha->end_of_entry_cleanup = 1;
1461	}
1462	return (r);
1463}
1464
1465static int
1466archive_read_format_lha_read_data(struct archive_read *a,
1467    const void **buff, size_t *size, int64_t *offset)
1468{
1469	struct lha *lha = (struct lha *)(a->format->data);
1470	int r;
1471
1472	if (lha->entry_unconsumed) {
1473		/* Consume as much as the decompressor actually used. */
1474		__archive_read_consume(a, lha->entry_unconsumed);
1475		lha->entry_unconsumed = 0;
1476	}
1477	if (lha->end_of_entry) {
1478		*offset = lha->entry_offset;
1479		*size = 0;
1480		*buff = NULL;
1481		return (lha_end_of_entry(a));
1482	}
1483
1484	if (lha->entry_is_compressed)
1485		r =  lha_read_data_lzh(a, buff, size, offset);
1486	else
1487		/* No compression. */
1488		r =  lha_read_data_none(a, buff, size, offset);
1489	return (r);
1490}
1491
1492/*
1493 * Read a file content in no compression.
1494 *
1495 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1496 * lha->end_of_entry if it consumes all of the data.
1497 */
1498static int
1499lha_read_data_none(struct archive_read *a, const void **buff,
1500    size_t *size, int64_t *offset)
1501{
1502	struct lha *lha = (struct lha *)(a->format->data);
1503	ssize_t bytes_avail;
1504
1505	if (lha->entry_bytes_remaining == 0) {
1506		*buff = NULL;
1507		*size = 0;
1508		*offset = lha->entry_offset;
1509		lha->end_of_entry = 1;
1510		return (ARCHIVE_OK);
1511	}
1512	/*
1513	 * Note: '1' here is a performance optimization.
1514	 * Recall that the decompression layer returns a count of
1515	 * available bytes; asking for more than that forces the
1516	 * decompressor to combine reads by copying data.
1517	 */
1518	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1519	if (bytes_avail <= 0) {
1520		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1521		    "Truncated LHa file data");
1522		return (ARCHIVE_FATAL);
1523	}
1524	if (bytes_avail > lha->entry_bytes_remaining)
1525		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1526	lha->entry_crc_calculated =
1527	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1528	*size = bytes_avail;
1529	*offset = lha->entry_offset;
1530	lha->entry_offset += bytes_avail;
1531	lha->entry_bytes_remaining -= bytes_avail;
1532	if (lha->entry_bytes_remaining == 0)
1533		lha->end_of_entry = 1;
1534	lha->entry_unconsumed = bytes_avail;
1535	return (ARCHIVE_OK);
1536}
1537
1538/*
1539 * Read a file content in LZHUFF encoding.
1540 *
1541 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1542 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1543 * all of the data.
1544 */
1545static int
1546lha_read_data_lzh(struct archive_read *a, const void **buff,
1547    size_t *size, int64_t *offset)
1548{
1549	struct lha *lha = (struct lha *)(a->format->data);
1550	ssize_t bytes_avail;
1551	int r;
1552
1553	/* If we haven't yet read any data, initialize the decompressor. */
1554	if (!lha->decompress_init) {
1555		r = lzh_decode_init(&(lha->strm), lha->method);
1556		switch (r) {
1557		case ARCHIVE_OK:
1558			break;
1559		case ARCHIVE_FAILED:
1560        		/* Unsupported compression. */
1561			*buff = NULL;
1562			*size = 0;
1563			*offset = 0;
1564			archive_set_error(&a->archive,
1565			    ARCHIVE_ERRNO_FILE_FORMAT,
1566			    "Unsupported lzh compression method -%c%c%c-",
1567			    lha->method[0], lha->method[1], lha->method[2]);
1568			/* We know compressed size; just skip it. */
1569			archive_read_format_lha_read_data_skip(a);
1570			return (ARCHIVE_WARN);
1571		default:
1572			archive_set_error(&a->archive, ENOMEM,
1573			    "Couldn't allocate memory "
1574			    "for lzh decompression");
1575			return (ARCHIVE_FATAL);
1576		}
1577		/* We've initialized decompression for this stream. */
1578		lha->decompress_init = 1;
1579		lha->strm.avail_out = 0;
1580		lha->strm.total_out = 0;
1581	}
1582
1583	/*
1584	 * Note: '1' here is a performance optimization.
1585	 * Recall that the decompression layer returns a count of
1586	 * available bytes; asking for more than that forces the
1587	 * decompressor to combine reads by copying data.
1588	 */
1589	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1590	if (bytes_avail <= 0) {
1591		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1592		    "Truncated LHa file body");
1593		return (ARCHIVE_FATAL);
1594	}
1595	if (bytes_avail > lha->entry_bytes_remaining)
1596		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1597
1598	lha->strm.avail_in = (int)bytes_avail;
1599	lha->strm.total_in = 0;
1600	lha->strm.avail_out = 0;
1601
1602	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1603	switch (r) {
1604	case ARCHIVE_OK:
1605		break;
1606	case ARCHIVE_EOF:
1607		lha->end_of_entry = 1;
1608		break;
1609	default:
1610		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1611		    "Bad lzh data");
1612		return (ARCHIVE_FAILED);
1613	}
1614	lha->entry_unconsumed = lha->strm.total_in;
1615	lha->entry_bytes_remaining -= lha->strm.total_in;
1616
1617	if (lha->strm.avail_out) {
1618		*offset = lha->entry_offset;
1619		*size = lha->strm.avail_out;
1620		*buff = lha->strm.ref_ptr;
1621		lha->entry_crc_calculated =
1622		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1623		lha->entry_offset += *size;
1624	} else {
1625		*offset = lha->entry_offset;
1626		*size = 0;
1627		*buff = NULL;
1628		if (lha->end_of_entry)
1629			return (lha_end_of_entry(a));
1630	}
1631	return (ARCHIVE_OK);
1632}
1633
1634/*
1635 * Skip a file content.
1636 */
1637static int
1638archive_read_format_lha_read_data_skip(struct archive_read *a)
1639{
1640	struct lha *lha;
1641	int64_t bytes_skipped;
1642
1643	lha = (struct lha *)(a->format->data);
1644
1645	if (lha->entry_unconsumed) {
1646		/* Consume as much as the decompressor actually used. */
1647		__archive_read_consume(a, lha->entry_unconsumed);
1648		lha->entry_unconsumed = 0;
1649	}
1650
1651	/* if we've already read to end of data, we're done. */
1652	if (lha->end_of_entry_cleanup)
1653		return (ARCHIVE_OK);
1654
1655	/*
1656	 * If the length is at the beginning, we can skip the
1657	 * compressed data much more quickly.
1658	 */
1659	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1660	if (bytes_skipped < 0)
1661		return (ARCHIVE_FATAL);
1662
1663	/* This entry is finished and done. */
1664	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1665	return (ARCHIVE_OK);
1666}
1667
1668static int
1669archive_read_format_lha_cleanup(struct archive_read *a)
1670{
1671	struct lha *lha = (struct lha *)(a->format->data);
1672
1673	lzh_decode_free(&(lha->strm));
1674	archive_string_free(&(lha->dirname));
1675	archive_string_free(&(lha->filename));
1676	archive_string_free(&(lha->uname));
1677	archive_string_free(&(lha->gname));
1678	archive_wstring_free(&(lha->ws));
1679	free(lha);
1680	(a->format->data) = NULL;
1681	return (ARCHIVE_OK);
1682}
1683
1684/*
1685 * 'LHa for UNIX' utility has archived a symbolic-link name after
1686 * a pathname with '|' character.
1687 * This function extracts the symbolic-link name from the pathname.
1688 *
1689 * example.
1690 *   1. a symbolic-name is 'aaa/bb/cc'
1691 *   2. a filename is 'xxx/bbb'
1692 *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1693 */
1694static int
1695lha_parse_linkname(struct archive_wstring *linkname,
1696    struct archive_wstring *pathname)
1697{
1698	wchar_t *	linkptr;
1699	size_t 	symlen;
1700
1701	linkptr = wcschr(pathname->s, L'|');
1702	if (linkptr != NULL) {
1703		symlen = wcslen(linkptr + 1);
1704		archive_wstrncpy(linkname, linkptr+1, symlen);
1705
1706		*linkptr = 0;
1707		pathname->length = wcslen(pathname->s);
1708
1709		return (1);
1710	}
1711	return (0);
1712}
1713
1714/* Convert an MSDOS-style date/time into Unix-style time. */
1715static time_t
1716lha_dos_time(const unsigned char *p)
1717{
1718	int msTime, msDate;
1719	struct tm ts;
1720
1721	msTime = archive_le16dec(p);
1722	msDate = archive_le16dec(p+2);
1723
1724	memset(&ts, 0, sizeof(ts));
1725	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1726	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1727	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1728	ts.tm_hour = (msTime >> 11) & 0x1f;
1729	ts.tm_min = (msTime >> 5) & 0x3f;
1730	ts.tm_sec = (msTime << 1) & 0x3e;
1731	ts.tm_isdst = -1;
1732	return (mktime(&ts));
1733}
1734
1735/* Convert an MS-Windows-style date/time into Unix-style time. */
1736static time_t
1737lha_win_time(uint64_t wintime, long *ns)
1738{
1739#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1740
1741	if (wintime >= EPOC_TIME) {
1742		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1743		if (ns != NULL)
1744			*ns = (long)(wintime % 10000000) * 100;
1745		return (wintime / 10000000);
1746	} else {
1747		if (ns != NULL)
1748			*ns = 0;
1749		return (0);
1750	}
1751}
1752
1753static unsigned char
1754lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1755{
1756	unsigned char const *p = (unsigned char const *)pp;
1757
1758	p += offset;
1759	for (;size > 0; --size)
1760		sum += *p++;
1761	return (sum);
1762}
1763
1764static uint16_t crc16tbl[2][256];
1765static void
1766lha_crc16_init(void)
1767{
1768	unsigned int i;
1769	static int crc16init = 0;
1770
1771	if (crc16init)
1772		return;
1773	crc16init = 1;
1774
1775	for (i = 0; i < 256; i++) {
1776		unsigned int j;
1777		uint16_t crc = (uint16_t)i;
1778		for (j = 8; j; j--)
1779			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1780		crc16tbl[0][i] = crc;
1781	}
1782
1783	for (i = 0; i < 256; i++) {
1784		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1785			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1786	}
1787}
1788
1789static uint16_t
1790lha_crc16(uint16_t crc, const void *pp, size_t len)
1791{
1792	const unsigned char *p = (const unsigned char *)pp;
1793	const uint16_t *buff;
1794	const union {
1795		uint32_t i;
1796		char c[4];
1797	} u = { 0x01020304 };
1798
1799	if (len == 0)
1800		return crc;
1801
1802	/* Process unaligned address. */
1803	if (((uintptr_t)p) & (uintptr_t)0x1) {
1804		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1805		len--;
1806	}
1807	buff = (const uint16_t *)p;
1808	/*
1809	 * Modern C compiler such as GCC does not unroll automatically yet
1810	 * without unrolling pragma, and Clang is so. So we should
1811	 * unroll this loop for its performance.
1812	 */
1813	for (;len >= 8; len -= 8) {
1814		/* This if statement expects compiler optimization will
1815		 * remove the statement which will not be executed. */
1816#undef bswap16
1817#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1818#  define bswap16(x) _byteswap_ushort(x)
1819#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1820/* GCC 4.8 and later has __builtin_bswap16() */
1821#  define bswap16(x) __builtin_bswap16(x)
1822#elif defined(__clang__)
1823/* All clang versions have __builtin_bswap16() */
1824#  define bswap16(x) __builtin_bswap16(x)
1825#else
1826#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1827#endif
1828#define CRC16W	do { 	\
1829		if(u.c[0] == 1) { /* Big endian */		\
1830			crc ^= bswap16(*buff); buff++;		\
1831		} else						\
1832			crc ^= *buff++;				\
1833		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1834} while (0)
1835		CRC16W;
1836		CRC16W;
1837		CRC16W;
1838		CRC16W;
1839#undef CRC16W
1840#undef bswap16
1841	}
1842
1843	p = (const unsigned char *)buff;
1844	for (;len; len--) {
1845		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1846	}
1847	return crc;
1848}
1849
1850/*
1851 * Initialize LZHUF decoder.
1852 *
1853 * Returns ARCHIVE_OK if initialization was successful.
1854 * Returns ARCHIVE_FAILED if method is unsupported.
1855 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1856 * error occurred.
1857 */
1858static int
1859lzh_decode_init(struct lzh_stream *strm, const char *method)
1860{
1861	struct lzh_dec *ds;
1862	int w_bits, w_size;
1863
1864	if (strm->ds == NULL) {
1865		strm->ds = calloc(1, sizeof(*strm->ds));
1866		if (strm->ds == NULL)
1867			return (ARCHIVE_FATAL);
1868	}
1869	ds = strm->ds;
1870	ds->error = ARCHIVE_FAILED;
1871	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1872		return (ARCHIVE_FAILED);
1873	switch (method[2]) {
1874	case '5':
1875		w_bits = 13;/* 8KiB for window */
1876		break;
1877	case '6':
1878		w_bits = 15;/* 32KiB for window */
1879		break;
1880	case '7':
1881		w_bits = 16;/* 64KiB for window */
1882		break;
1883	default:
1884		return (ARCHIVE_FAILED);/* Not supported. */
1885	}
1886	ds->error = ARCHIVE_FATAL;
1887	/* Expand a window size up to 128 KiB for decompressing process
1888	 * performance whatever its original window size is. */
1889	ds->w_size = 1U << 17;
1890	ds->w_mask = ds->w_size -1;
1891	if (ds->w_buff == NULL) {
1892		ds->w_buff = malloc(ds->w_size);
1893		if (ds->w_buff == NULL)
1894			return (ARCHIVE_FATAL);
1895	}
1896	w_size = 1U << w_bits;
1897	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1898	ds->w_pos = 0;
1899	ds->state = 0;
1900	ds->pos_pt_len_size = w_bits + 1;
1901	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1902	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1903	ds->literal_pt_len_bits = 5;
1904	ds->br.cache_buffer = 0;
1905	ds->br.cache_avail = 0;
1906
1907	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1908	    != ARCHIVE_OK)
1909		return (ARCHIVE_FATAL);
1910	ds->lt.len_bits = 9;
1911	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1912	    != ARCHIVE_OK)
1913		return (ARCHIVE_FATAL);
1914	ds->error = 0;
1915
1916	return (ARCHIVE_OK);
1917}
1918
1919/*
1920 * Release LZHUF decoder.
1921 */
1922static void
1923lzh_decode_free(struct lzh_stream *strm)
1924{
1925
1926	if (strm->ds == NULL)
1927		return;
1928	free(strm->ds->w_buff);
1929	lzh_huffman_free(&(strm->ds->lt));
1930	lzh_huffman_free(&(strm->ds->pt));
1931	free(strm->ds);
1932	strm->ds = NULL;
1933}
1934
1935/*
1936 * Bit stream reader.
1937 */
1938/* Check that the cache buffer has enough bits. */
1939#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1940/* Get compressed data by bit. */
1941#define lzh_br_bits(br, n)				\
1942	(((uint16_t)((br)->cache_buffer >>		\
1943		((br)->cache_avail - (n)))) & cache_masks[n])
1944#define lzh_br_bits_forced(br, n)			\
1945	(((uint16_t)((br)->cache_buffer <<		\
1946		((n) - (br)->cache_avail))) & cache_masks[n])
1947/* Read ahead to make sure the cache buffer has enough compressed data we
1948 * will use.
1949 *  True  : completed, there is enough data in the cache buffer.
1950 *  False : we met that strm->next_in is empty, we have to get following
1951 *          bytes. */
1952#define lzh_br_read_ahead_0(strm, br, n)	\
1953	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1954/*  True  : the cache buffer has some bits as much as we need.
1955 *  False : there are no enough bits in the cache buffer to be used,
1956 *          we have to get following bytes if we could. */
1957#define lzh_br_read_ahead(strm, br, n)	\
1958	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1959
1960/* Notify how many bits we consumed. */
1961#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1962#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1963
1964static const uint16_t cache_masks[] = {
1965	0x0000, 0x0001, 0x0003, 0x0007,
1966	0x000F, 0x001F, 0x003F, 0x007F,
1967	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1968	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1969	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1970};
1971
1972/*
1973 * Shift away used bits in the cache data and fill it up with following bits.
1974 * Call this when cache buffer does not have enough bits you need.
1975 *
1976 * Returns 1 if the cache buffer is full.
1977 * Returns 0 if the cache buffer is not full; input buffer is empty.
1978 */
1979static int
1980lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1981{
1982	int n = CACHE_BITS - br->cache_avail;
1983
1984	for (;;) {
1985		const int x = n >> 3;
1986		if (strm->avail_in >= x) {
1987			switch (x) {
1988			case 8:
1989				br->cache_buffer =
1990				    ((uint64_t)strm->next_in[0]) << 56 |
1991				    ((uint64_t)strm->next_in[1]) << 48 |
1992				    ((uint64_t)strm->next_in[2]) << 40 |
1993				    ((uint64_t)strm->next_in[3]) << 32 |
1994				    ((uint32_t)strm->next_in[4]) << 24 |
1995				    ((uint32_t)strm->next_in[5]) << 16 |
1996				    ((uint32_t)strm->next_in[6]) << 8 |
1997				     (uint32_t)strm->next_in[7];
1998				strm->next_in += 8;
1999				strm->avail_in -= 8;
2000				br->cache_avail += 8 * 8;
2001				return (1);
2002			case 7:
2003				br->cache_buffer =
2004		 		   (br->cache_buffer << 56) |
2005				    ((uint64_t)strm->next_in[0]) << 48 |
2006				    ((uint64_t)strm->next_in[1]) << 40 |
2007				    ((uint64_t)strm->next_in[2]) << 32 |
2008				    ((uint32_t)strm->next_in[3]) << 24 |
2009				    ((uint32_t)strm->next_in[4]) << 16 |
2010				    ((uint32_t)strm->next_in[5]) << 8 |
2011				     (uint32_t)strm->next_in[6];
2012				strm->next_in += 7;
2013				strm->avail_in -= 7;
2014				br->cache_avail += 7 * 8;
2015				return (1);
2016			case 6:
2017				br->cache_buffer =
2018		 		   (br->cache_buffer << 48) |
2019				    ((uint64_t)strm->next_in[0]) << 40 |
2020				    ((uint64_t)strm->next_in[1]) << 32 |
2021				    ((uint32_t)strm->next_in[2]) << 24 |
2022				    ((uint32_t)strm->next_in[3]) << 16 |
2023				    ((uint32_t)strm->next_in[4]) << 8 |
2024				     (uint32_t)strm->next_in[5];
2025				strm->next_in += 6;
2026				strm->avail_in -= 6;
2027				br->cache_avail += 6 * 8;
2028				return (1);
2029			case 0:
2030				/* We have enough compressed data in
2031				 * the cache buffer.*/
2032				return (1);
2033			default:
2034				break;
2035			}
2036		}
2037		if (strm->avail_in == 0) {
2038			/* There is not enough compressed data to fill up the
2039			 * cache buffer. */
2040			return (0);
2041		}
2042		br->cache_buffer =
2043		   (br->cache_buffer << 8) | *strm->next_in++;
2044		strm->avail_in--;
2045		br->cache_avail += 8;
2046		n -= 8;
2047	}
2048}
2049
2050/*
2051 * Decode LZHUF.
2052 *
2053 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2054 *    Please set available buffer and call this function again.
2055 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2056 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2057 *    is broken or you do not set 'last' flag properly.
2058 * 4. 'last' flag is very important, you must set 1 to the flag if there
2059 *    is no input data. The lha compressed data format does not provide how
2060 *    to know the compressed data is really finished.
2061 *    Note: lha command utility check if the total size of output bytes is
2062 *    reached the uncompressed size recorded in its header. it does not mind
2063 *    that the decoding process is properly finished.
2064 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
2065 *    it handles EOF as null to fill read buffer with zero until the decoding
2066 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
2067 *    zeros are treated as the mark of the end of the data although the zeros
2068 *    is dummy, not the file data.
2069 */
2070static int	lzh_read_blocks(struct lzh_stream *, int);
2071static int	lzh_decode_blocks(struct lzh_stream *, int);
2072#define ST_RD_BLOCK		0
2073#define ST_RD_PT_1		1
2074#define ST_RD_PT_2		2
2075#define ST_RD_PT_3		3
2076#define ST_RD_PT_4		4
2077#define ST_RD_LITERAL_1		5
2078#define ST_RD_LITERAL_2		6
2079#define ST_RD_LITERAL_3		7
2080#define ST_RD_POS_DATA_1	8
2081#define ST_GET_LITERAL		9
2082#define ST_GET_POS_1		10
2083#define ST_GET_POS_2		11
2084#define ST_COPY_DATA		12
2085
2086static int
2087lzh_decode(struct lzh_stream *strm, int last)
2088{
2089	struct lzh_dec *ds = strm->ds;
2090	int avail_in;
2091	int r;
2092
2093	if (ds->error)
2094		return (ds->error);
2095
2096	avail_in = strm->avail_in;
2097	do {
2098		if (ds->state < ST_GET_LITERAL)
2099			r = lzh_read_blocks(strm, last);
2100		else
2101			r = lzh_decode_blocks(strm, last);
2102	} while (r == 100);
2103	strm->total_in += avail_in - strm->avail_in;
2104	return (r);
2105}
2106
2107static void
2108lzh_emit_window(struct lzh_stream *strm, size_t s)
2109{
2110	strm->ref_ptr = strm->ds->w_buff;
2111	strm->avail_out = (int)s;
2112	strm->total_out += s;
2113}
2114
2115static int
2116lzh_read_blocks(struct lzh_stream *strm, int last)
2117{
2118	struct lzh_dec *ds = strm->ds;
2119	struct lzh_br *br = &(ds->br);
2120	int c = 0, i;
2121	unsigned rbits;
2122
2123	for (;;) {
2124		switch (ds->state) {
2125		case ST_RD_BLOCK:
2126			/*
2127			 * Read a block number indicates how many blocks
2128			 * we will handle. The block is composed of a
2129			 * literal and a match, sometimes a literal only
2130			 * in particular, there are no reference data at
2131			 * the beginning of the decompression.
2132			 */
2133			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2134				if (!last)
2135					/* We need following data. */
2136					return (ARCHIVE_OK);
2137				if (lzh_br_has(br, 8)) {
2138					/*
2139					 * It seems there are extra bits.
2140					 *  1. Compressed data is broken.
2141					 *  2. `last' flag does not properly
2142					 *     set.
2143					 */
2144					goto failed;
2145				}
2146				if (ds->w_pos > 0) {
2147					lzh_emit_window(strm, ds->w_pos);
2148					ds->w_pos = 0;
2149					return (ARCHIVE_OK);
2150				}
2151				/* End of compressed data; we have completely
2152				 * handled all compressed data. */
2153				return (ARCHIVE_EOF);
2154			}
2155			ds->blocks_avail = lzh_br_bits(br, 16);
2156			if (ds->blocks_avail == 0)
2157				goto failed;
2158			lzh_br_consume(br, 16);
2159			/*
2160			 * Read a literal table compressed in huffman
2161			 * coding.
2162			 */
2163			ds->pt.len_size = ds->literal_pt_len_size;
2164			ds->pt.len_bits = ds->literal_pt_len_bits;
2165			ds->reading_position = 0;
2166			/* FALL THROUGH */
2167		case ST_RD_PT_1:
2168			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2169			 * used in reading both a literal table and a
2170			 * position table. */
2171			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2172				if (last)
2173					goto failed;/* Truncated data. */
2174				ds->state = ST_RD_PT_1;
2175				return (ARCHIVE_OK);
2176			}
2177			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2178			lzh_br_consume(br, ds->pt.len_bits);
2179			/* FALL THROUGH */
2180		case ST_RD_PT_2:
2181			if (ds->pt.len_avail == 0) {
2182				/* There is no bitlen. */
2183				if (!lzh_br_read_ahead(strm, br,
2184				    ds->pt.len_bits)) {
2185					if (last)
2186						goto failed;/* Truncated data.*/
2187					ds->state = ST_RD_PT_2;
2188					return (ARCHIVE_OK);
2189				}
2190				if (!lzh_make_fake_table(&(ds->pt),
2191				    lzh_br_bits(br, ds->pt.len_bits)))
2192					goto failed;/* Invalid data. */
2193				lzh_br_consume(br, ds->pt.len_bits);
2194				if (ds->reading_position)
2195					ds->state = ST_GET_LITERAL;
2196				else
2197					ds->state = ST_RD_LITERAL_1;
2198				break;
2199			} else if (ds->pt.len_avail > ds->pt.len_size)
2200				goto failed;/* Invalid data. */
2201			ds->loop = 0;
2202			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2203			if (ds->pt.len_avail < 3 ||
2204			    ds->pt.len_size == ds->pos_pt_len_size) {
2205				ds->state = ST_RD_PT_4;
2206				break;
2207			}
2208			/* FALL THROUGH */
2209		case ST_RD_PT_3:
2210			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2211			if (ds->loop < 3) {
2212				if (ds->loop < 0 || last)
2213					goto failed;/* Invalid data. */
2214				/* Not completed, get following data. */
2215				ds->state = ST_RD_PT_3;
2216				return (ARCHIVE_OK);
2217			}
2218			/* There are some null in bitlen of the literal. */
2219			if (!lzh_br_read_ahead(strm, br, 2)) {
2220				if (last)
2221					goto failed;/* Truncated data. */
2222				ds->state = ST_RD_PT_3;
2223				return (ARCHIVE_OK);
2224			}
2225			c = lzh_br_bits(br, 2);
2226			lzh_br_consume(br, 2);
2227			if (c > ds->pt.len_avail - 3)
2228				goto failed;/* Invalid data. */
2229			for (i = 3; c-- > 0 ;)
2230				ds->pt.bitlen[i++] = 0;
2231			ds->loop = i;
2232			/* FALL THROUGH */
2233		case ST_RD_PT_4:
2234			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2235			    ds->pt.len_avail);
2236			if (ds->loop < ds->pt.len_avail) {
2237				if (ds->loop < 0 || last)
2238					goto failed;/* Invalid data. */
2239				/* Not completed, get following data. */
2240				ds->state = ST_RD_PT_4;
2241				return (ARCHIVE_OK);
2242			}
2243			if (!lzh_make_huffman_table(&(ds->pt)))
2244				goto failed;/* Invalid data */
2245			if (ds->reading_position) {
2246				ds->state = ST_GET_LITERAL;
2247				break;
2248			}
2249			/* FALL THROUGH */
2250		case ST_RD_LITERAL_1:
2251			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2252				if (last)
2253					goto failed;/* Truncated data. */
2254				ds->state = ST_RD_LITERAL_1;
2255				return (ARCHIVE_OK);
2256			}
2257			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2258			lzh_br_consume(br, ds->lt.len_bits);
2259			/* FALL THROUGH */
2260		case ST_RD_LITERAL_2:
2261			if (ds->lt.len_avail == 0) {
2262				/* There is no bitlen. */
2263				if (!lzh_br_read_ahead(strm, br,
2264				    ds->lt.len_bits)) {
2265					if (last)
2266						goto failed;/* Truncated data.*/
2267					ds->state = ST_RD_LITERAL_2;
2268					return (ARCHIVE_OK);
2269				}
2270				if (!lzh_make_fake_table(&(ds->lt),
2271				    lzh_br_bits(br, ds->lt.len_bits)))
2272					goto failed;/* Invalid data */
2273				lzh_br_consume(br, ds->lt.len_bits);
2274				ds->state = ST_RD_POS_DATA_1;
2275				break;
2276			} else if (ds->lt.len_avail > ds->lt.len_size)
2277				goto failed;/* Invalid data */
2278			ds->loop = 0;
2279			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2280			/* FALL THROUGH */
2281		case ST_RD_LITERAL_3:
2282			i = ds->loop;
2283			while (i < ds->lt.len_avail) {
2284				if (!lzh_br_read_ahead(strm, br,
2285				    ds->pt.max_bits)) {
2286					if (last)
2287						goto failed;/* Truncated data.*/
2288					ds->loop = i;
2289					ds->state = ST_RD_LITERAL_3;
2290					return (ARCHIVE_OK);
2291				}
2292				rbits = lzh_br_bits(br, ds->pt.max_bits);
2293				c = lzh_decode_huffman(&(ds->pt), rbits);
2294				if (c > 2) {
2295					/* Note: 'c' will never be more than
2296					 * eighteen since it's limited by
2297					 * PT_BITLEN_SIZE, which is being set
2298					 * to ds->pt.len_size through
2299					 * ds->literal_pt_len_size. */
2300					lzh_br_consume(br, ds->pt.bitlen[c]);
2301					c -= 2;
2302					ds->lt.freq[c]++;
2303					ds->lt.bitlen[i++] = c;
2304				} else if (c == 0) {
2305					lzh_br_consume(br, ds->pt.bitlen[c]);
2306					ds->lt.bitlen[i++] = 0;
2307				} else {
2308					/* c == 1 or c == 2 */
2309					int n = (c == 1)?4:9;
2310					if (!lzh_br_read_ahead(strm, br,
2311					     ds->pt.bitlen[c] + n)) {
2312						if (last) /* Truncated data. */
2313							goto failed;
2314						ds->loop = i;
2315						ds->state = ST_RD_LITERAL_3;
2316						return (ARCHIVE_OK);
2317					}
2318					lzh_br_consume(br, ds->pt.bitlen[c]);
2319					c = lzh_br_bits(br, n);
2320					lzh_br_consume(br, n);
2321					c += (n == 4)?3:20;
2322					if (i + c > ds->lt.len_avail)
2323						goto failed;/* Invalid data */
2324					memset(&(ds->lt.bitlen[i]), 0, c);
2325					i += c;
2326				}
2327			}
2328			if (i > ds->lt.len_avail ||
2329			    !lzh_make_huffman_table(&(ds->lt)))
2330				goto failed;/* Invalid data */
2331			/* FALL THROUGH */
2332		case ST_RD_POS_DATA_1:
2333			/*
2334			 * Read a position table compressed in huffman
2335			 * coding.
2336			 */
2337			ds->pt.len_size = ds->pos_pt_len_size;
2338			ds->pt.len_bits = ds->pos_pt_len_bits;
2339			ds->reading_position = 1;
2340			ds->state = ST_RD_PT_1;
2341			break;
2342		case ST_GET_LITERAL:
2343			return (100);
2344		}
2345	}
2346failed:
2347	return (ds->error = ARCHIVE_FAILED);
2348}
2349
2350static int
2351lzh_decode_blocks(struct lzh_stream *strm, int last)
2352{
2353	struct lzh_dec *ds = strm->ds;
2354	struct lzh_br bre = ds->br;
2355	struct huffman *lt = &(ds->lt);
2356	struct huffman *pt = &(ds->pt);
2357	unsigned char *w_buff = ds->w_buff;
2358	unsigned char *lt_bitlen = lt->bitlen;
2359	unsigned char *pt_bitlen = pt->bitlen;
2360	int blocks_avail = ds->blocks_avail, c = 0;
2361	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2362	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2363	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2364	int state = ds->state;
2365
2366	for (;;) {
2367		switch (state) {
2368		case ST_GET_LITERAL:
2369			for (;;) {
2370				if (blocks_avail == 0) {
2371					/* We have decoded all blocks.
2372					 * Let's handle next blocks. */
2373					ds->state = ST_RD_BLOCK;
2374					ds->br = bre;
2375					ds->blocks_avail = 0;
2376					ds->w_pos = w_pos;
2377					ds->copy_pos = 0;
2378					return (100);
2379				}
2380
2381				/* lzh_br_read_ahead() always try to fill the
2382				 * cache buffer up. In specific situation we
2383				 * are close to the end of the data, the cache
2384				 * buffer will not be full and thus we have to
2385				 * determine if the cache buffer has some bits
2386				 * as much as we need after lzh_br_read_ahead()
2387				 * failed. */
2388				if (!lzh_br_read_ahead(strm, &bre,
2389				    lt_max_bits)) {
2390					if (!last)
2391						goto next_data;
2392					/* Remaining bits are less than
2393					 * maximum bits(lt.max_bits) but maybe
2394					 * it still remains as much as we need,
2395					 * so we should try to use it with
2396					 * dummy bits. */
2397					c = lzh_decode_huffman(lt,
2398					      lzh_br_bits_forced(&bre,
2399					        lt_max_bits));
2400					lzh_br_consume(&bre, lt_bitlen[c]);
2401					if (!lzh_br_has(&bre, 0))
2402						goto failed;/* Over read. */
2403				} else {
2404					c = lzh_decode_huffman(lt,
2405					      lzh_br_bits(&bre, lt_max_bits));
2406					lzh_br_consume(&bre, lt_bitlen[c]);
2407				}
2408				blocks_avail--;
2409				if (c > UCHAR_MAX)
2410					/* Current block is a match data. */
2411					break;
2412				/*
2413				 * 'c' is exactly a literal code.
2414				 */
2415				/* Save a decoded code to reference it
2416				 * afterward. */
2417				w_buff[w_pos] = c;
2418				if (++w_pos >= w_size) {
2419					w_pos = 0;
2420					lzh_emit_window(strm, w_size);
2421					goto next_data;
2422				}
2423			}
2424			/* 'c' is the length of a match pattern we have
2425			 * already extracted, which has be stored in
2426			 * window(ds->w_buff). */
2427			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2428			/* FALL THROUGH */
2429		case ST_GET_POS_1:
2430			/*
2431			 * Get a reference position.
2432			 */
2433			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2434				if (!last) {
2435					state = ST_GET_POS_1;
2436					ds->copy_len = copy_len;
2437					goto next_data;
2438				}
2439				copy_pos = lzh_decode_huffman(pt,
2440				    lzh_br_bits_forced(&bre, pt_max_bits));
2441				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2442				if (!lzh_br_has(&bre, 0))
2443					goto failed;/* Over read. */
2444			} else {
2445				copy_pos = lzh_decode_huffman(pt,
2446				    lzh_br_bits(&bre, pt_max_bits));
2447				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2448			}
2449			/* FALL THROUGH */
2450		case ST_GET_POS_2:
2451			if (copy_pos > 1) {
2452				/* We need an additional adjustment number to
2453				 * the position. */
2454				int p = copy_pos - 1;
2455				if (!lzh_br_read_ahead(strm, &bre, p)) {
2456					if (last)
2457						goto failed;/* Truncated data.*/
2458					state = ST_GET_POS_2;
2459					ds->copy_len = copy_len;
2460					ds->copy_pos = copy_pos;
2461					goto next_data;
2462				}
2463				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2464				lzh_br_consume(&bre, p);
2465			}
2466			/* The position is actually a distance from the last
2467			 * code we had extracted and thus we have to convert
2468			 * it to a position of the window. */
2469			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2470			/* FALL THROUGH */
2471		case ST_COPY_DATA:
2472			/*
2473			 * Copy `copy_len' bytes as extracted data from
2474			 * the window into the output buffer.
2475			 */
2476			for (;;) {
2477				int l;
2478
2479				l = copy_len;
2480				if (copy_pos > w_pos) {
2481					if (l > w_size - copy_pos)
2482						l = w_size - copy_pos;
2483				} else {
2484					if (l > w_size - w_pos)
2485						l = w_size - w_pos;
2486				}
2487				if ((copy_pos + l < w_pos)
2488				    || (w_pos + l < copy_pos)) {
2489					/* No overlap. */
2490					memcpy(w_buff + w_pos,
2491					    w_buff + copy_pos, l);
2492				} else {
2493					const unsigned char *s;
2494					unsigned char *d;
2495					int li;
2496
2497					d = w_buff + w_pos;
2498					s = w_buff + copy_pos;
2499					for (li = 0; li < l-1;) {
2500						d[li] = s[li];li++;
2501						d[li] = s[li];li++;
2502					}
2503					if (li < l)
2504						d[li] = s[li];
2505				}
2506				w_pos += l;
2507				if (w_pos == w_size) {
2508					w_pos = 0;
2509					lzh_emit_window(strm, w_size);
2510					if (copy_len <= l)
2511						state = ST_GET_LITERAL;
2512					else {
2513						state = ST_COPY_DATA;
2514						ds->copy_len = copy_len - l;
2515						ds->copy_pos =
2516						    (copy_pos + l) & w_mask;
2517					}
2518					goto next_data;
2519				}
2520				if (copy_len <= l)
2521					/* A copy of current pattern ended. */
2522					break;
2523				copy_len -= l;
2524				copy_pos = (copy_pos + l) & w_mask;
2525			}
2526			state = ST_GET_LITERAL;
2527			break;
2528		}
2529	}
2530failed:
2531	return (ds->error = ARCHIVE_FAILED);
2532next_data:
2533	ds->br = bre;
2534	ds->blocks_avail = blocks_avail;
2535	ds->state = state;
2536	ds->w_pos = w_pos;
2537	return (ARCHIVE_OK);
2538}
2539
2540static int
2541lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2542{
2543	int bits;
2544
2545	if (hf->bitlen == NULL) {
2546		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2547		if (hf->bitlen == NULL)
2548			return (ARCHIVE_FATAL);
2549	}
2550	if (hf->tbl == NULL) {
2551		if (tbl_bits < HTBL_BITS)
2552			bits = tbl_bits;
2553		else
2554			bits = HTBL_BITS;
2555		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2556		if (hf->tbl == NULL)
2557			return (ARCHIVE_FATAL);
2558	}
2559	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2560		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2561		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2562		if (hf->tree == NULL)
2563			return (ARCHIVE_FATAL);
2564	}
2565	hf->len_size = (int)len_size;
2566	hf->tbl_bits = tbl_bits;
2567	return (ARCHIVE_OK);
2568}
2569
2570static void
2571lzh_huffman_free(struct huffman *hf)
2572{
2573	free(hf->bitlen);
2574	free(hf->tbl);
2575	free(hf->tree);
2576}
2577
2578static const char bitlen_tbl[0x400] = {
2579	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2580	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2581	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2582	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2583	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2584	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2585	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2586	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2587	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2588	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2589	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2590	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2591	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2592	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2593	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2594	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2595	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2596	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2597	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2598	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2599	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2600	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2601	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2602	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2603	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2604	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2605	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2606	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2607	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2608	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2609	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2610	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2611	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2612	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2613	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2614	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2615	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2616	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2617	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2618	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2619	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2620	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2621	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2622	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2623	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2624	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2625	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2626	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2627	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2628	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2629	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2630	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2631	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2632	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2633	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2634	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2635	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2636	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2637	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2638	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2639	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2640	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2641	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2642	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2643};
2644static int
2645lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2646{
2647	struct lzh_dec *ds = strm->ds;
2648	struct lzh_br *br = &(ds->br);
2649	int c, i;
2650
2651	for (i = start; i < end; ) {
2652		/*
2653		 *  bit pattern     the number we need
2654		 *     000           ->  0
2655		 *     001           ->  1
2656		 *     010           ->  2
2657		 *     ...
2658		 *     110           ->  6
2659		 *     1110          ->  7
2660		 *     11110         ->  8
2661		 *     ...
2662		 *     1111111111110 ->  16
2663		 */
2664		if (!lzh_br_read_ahead(strm, br, 3))
2665			return (i);
2666		if ((c = lzh_br_bits(br, 3)) == 7) {
2667			if (!lzh_br_read_ahead(strm, br, 13))
2668				return (i);
2669			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2670			if (c)
2671				lzh_br_consume(br, c - 3);
2672			else
2673				return (-1);/* Invalid data. */
2674		} else
2675			lzh_br_consume(br, 3);
2676		ds->pt.bitlen[i++] = c;
2677		ds->pt.freq[c]++;
2678	}
2679	return (i);
2680}
2681
2682static int
2683lzh_make_fake_table(struct huffman *hf, uint16_t c)
2684{
2685	if (c >= hf->len_size)
2686		return (0);
2687	hf->tbl[0] = c;
2688	hf->max_bits = 0;
2689	hf->shift_bits = 0;
2690	hf->bitlen[hf->tbl[0]] = 0;
2691	return (1);
2692}
2693
2694/*
2695 * Make a huffman coding table.
2696 */
2697static int
2698lzh_make_huffman_table(struct huffman *hf)
2699{
2700	uint16_t *tbl;
2701	const unsigned char *bitlen;
2702	int bitptn[17], weight[17];
2703	int i, maxbits = 0, ptn, tbl_size, w;
2704	int diffbits, len_avail;
2705
2706	/*
2707	 * Initialize bit patterns.
2708	 */
2709	ptn = 0;
2710	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2711		bitptn[i] = ptn;
2712		weight[i] = w;
2713		if (hf->freq[i]) {
2714			ptn += hf->freq[i] * w;
2715			maxbits = i;
2716		}
2717	}
2718	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2719		return (0);/* Invalid */
2720
2721	hf->max_bits = maxbits;
2722
2723	/*
2724	 * Cut out extra bits which we won't house in the table.
2725	 * This preparation reduces the same calculation in the for-loop
2726	 * making the table.
2727	 */
2728	if (maxbits < 16) {
2729		int ebits = 16 - maxbits;
2730		for (i = 1; i <= maxbits; i++) {
2731			bitptn[i] >>= ebits;
2732			weight[i] >>= ebits;
2733		}
2734	}
2735	if (maxbits > HTBL_BITS) {
2736		unsigned htbl_max;
2737		uint16_t *p;
2738
2739		diffbits = maxbits - HTBL_BITS;
2740		for (i = 1; i <= HTBL_BITS; i++) {
2741			bitptn[i] >>= diffbits;
2742			weight[i] >>= diffbits;
2743		}
2744		htbl_max = bitptn[HTBL_BITS] +
2745		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2746		p = &(hf->tbl[htbl_max]);
2747		while (p < &hf->tbl[1U<<HTBL_BITS])
2748			*p++ = 0;
2749	} else
2750		diffbits = 0;
2751	hf->shift_bits = diffbits;
2752
2753	/*
2754	 * Make the table.
2755	 */
2756	tbl_size = 1 << HTBL_BITS;
2757	tbl = hf->tbl;
2758	bitlen = hf->bitlen;
2759	len_avail = hf->len_avail;
2760	hf->tree_used = 0;
2761	for (i = 0; i < len_avail; i++) {
2762		uint16_t *p;
2763		int len, cnt;
2764		uint16_t bit;
2765		int extlen;
2766		struct htree_t *ht;
2767
2768		if (bitlen[i] == 0)
2769			continue;
2770		/* Get a bit pattern */
2771		len = bitlen[i];
2772		ptn = bitptn[len];
2773		cnt = weight[len];
2774		if (len <= HTBL_BITS) {
2775			/* Calculate next bit pattern */
2776			if ((bitptn[len] = ptn + cnt) > tbl_size)
2777				return (0);/* Invalid */
2778			/* Update the table */
2779			p = &(tbl[ptn]);
2780			if (cnt > 7) {
2781				uint16_t *pc;
2782
2783				cnt -= 8;
2784				pc = &p[cnt];
2785				pc[0] = (uint16_t)i;
2786				pc[1] = (uint16_t)i;
2787				pc[2] = (uint16_t)i;
2788				pc[3] = (uint16_t)i;
2789				pc[4] = (uint16_t)i;
2790				pc[5] = (uint16_t)i;
2791				pc[6] = (uint16_t)i;
2792				pc[7] = (uint16_t)i;
2793				if (cnt > 7) {
2794					cnt -= 8;
2795					memcpy(&p[cnt], pc,
2796						8 * sizeof(uint16_t));
2797					pc = &p[cnt];
2798					while (cnt > 15) {
2799						cnt -= 16;
2800						memcpy(&p[cnt], pc,
2801							16 * sizeof(uint16_t));
2802					}
2803				}
2804				if (cnt)
2805					memcpy(p, pc, cnt * sizeof(uint16_t));
2806			} else {
2807				while (cnt > 1) {
2808					p[--cnt] = (uint16_t)i;
2809					p[--cnt] = (uint16_t)i;
2810				}
2811				if (cnt)
2812					p[--cnt] = (uint16_t)i;
2813			}
2814			continue;
2815		}
2816
2817		/*
2818		 * A bit length is too big to be housed to a direct table,
2819		 * so we use a tree model for its extra bits.
2820		 */
2821		bitptn[len] = ptn + cnt;
2822		bit = 1U << (diffbits -1);
2823		extlen = len - HTBL_BITS;
2824
2825		p = &(tbl[ptn >> diffbits]);
2826		if (*p == 0) {
2827			*p = len_avail + hf->tree_used;
2828			ht = &(hf->tree[hf->tree_used++]);
2829			if (hf->tree_used > hf->tree_avail)
2830				return (0);/* Invalid */
2831			ht->left = 0;
2832			ht->right = 0;
2833		} else {
2834			if (*p < len_avail ||
2835			    *p >= (len_avail + hf->tree_used))
2836				return (0);/* Invalid */
2837			ht = &(hf->tree[*p - len_avail]);
2838		}
2839		while (--extlen > 0) {
2840			if (ptn & bit) {
2841				if (ht->left < len_avail) {
2842					ht->left = len_avail + hf->tree_used;
2843					ht = &(hf->tree[hf->tree_used++]);
2844					if (hf->tree_used > hf->tree_avail)
2845						return (0);/* Invalid */
2846					ht->left = 0;
2847					ht->right = 0;
2848				} else {
2849					ht = &(hf->tree[ht->left - len_avail]);
2850				}
2851			} else {
2852				if (ht->right < len_avail) {
2853					ht->right = len_avail + hf->tree_used;
2854					ht = &(hf->tree[hf->tree_used++]);
2855					if (hf->tree_used > hf->tree_avail)
2856						return (0);/* Invalid */
2857					ht->left = 0;
2858					ht->right = 0;
2859				} else {
2860					ht = &(hf->tree[ht->right - len_avail]);
2861				}
2862			}
2863			bit >>= 1;
2864		}
2865		if (ptn & bit) {
2866			if (ht->left != 0)
2867				return (0);/* Invalid */
2868			ht->left = (uint16_t)i;
2869		} else {
2870			if (ht->right != 0)
2871				return (0);/* Invalid */
2872			ht->right = (uint16_t)i;
2873		}
2874	}
2875	return (1);
2876}
2877
2878static int
2879lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2880{
2881	struct htree_t *ht;
2882	int extlen;
2883
2884	ht = hf->tree;
2885	extlen = hf->shift_bits;
2886	while (c >= hf->len_avail) {
2887		c -= hf->len_avail;
2888		if (extlen-- <= 0 || c >= hf->tree_used)
2889			return (0);
2890		if (rbits & (1U << extlen))
2891			c = ht[c].left;
2892		else
2893			c = ht[c].right;
2894	}
2895	return (c);
2896}
2897
2898static inline int
2899lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2900{
2901	int c;
2902	/*
2903	 * At first search an index table for a bit pattern.
2904	 * If it fails, search a huffman tree for.
2905	 */
2906	c = hf->tbl[rbits >> hf->shift_bits];
2907	if (c < hf->len_avail || hf->len_avail == 0)
2908		return (c);
2909	/* This bit pattern needs to be found out at a huffman tree. */
2910	return (lzh_decode_huffman_tree(hf, rbits, c));
2911}
2912
2913