archive_read_support_format_lha.c revision 337352
1/*-
2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27
28#ifdef HAVE_ERRNO_H
29#include <errno.h>
30#endif
31#ifdef HAVE_LIMITS_H
32#include <limits.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_read_private.h"
46#include "archive_endian.h"
47
48
49#define MAXMATCH		256	/* Maximum match length. */
50#define MINMATCH		3	/* Minimum match length. */
51/*
52 * Literal table format:
53 * +0              +256                      +510
54 * +---------------+-------------------------+
55 * | literal code  |       match length      |
56 * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57 * +---------------+-------------------------+
58 *  <---          LT_BITLEN_SIZE         --->
59 */
60/* Literal table size. */
61#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62/* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64#define PT_BITLEN_SIZE		(3 + 16)
65
66struct lzh_dec {
67	/* Decoding status. */
68	int     		 state;
69
70	/*
71	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72	 * data.
73	 */
74	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	struct archive_string_conv *sconv;
179	struct archive_string_conv *opt_sconv;
180
181	struct archive_string 	 dirname;
182	struct archive_string 	 filename;
183	struct archive_wstring	 ws;
184
185	unsigned char		 dos_attr;
186
187	/* Flag to mark progress that an archive was read their first header.*/
188	char			 found_first_header;
189	/* Flag to mark that indicates an empty directory. */
190	char			 directory;
191
192	/* Flags to mark progress of decompression. */
193	char			 decompress_init;
194	char			 end_of_entry;
195	char			 end_of_entry_cleanup;
196	char			 entry_is_compressed;
197
198	char			 format_name[64];
199
200	struct lzh_stream	 strm;
201};
202
203/*
204 * LHA header common member offset.
205 */
206#define H_METHOD_OFFSET	2	/* Compress type. */
207#define H_ATTR_OFFSET	19	/* DOS attribute. */
208#define H_LEVEL_OFFSET	20	/* Header Level.  */
209#define H_SIZE		22	/* Minimum header size. */
210
211static int      archive_read_format_lha_bid(struct archive_read *, int);
212static int      archive_read_format_lha_options(struct archive_read *,
213		    const char *, const char *);
214static int	archive_read_format_lha_read_header(struct archive_read *,
215		    struct archive_entry *);
216static int	archive_read_format_lha_read_data(struct archive_read *,
217		    const void **, size_t *, int64_t *);
218static int	archive_read_format_lha_read_data_skip(struct archive_read *);
219static int	archive_read_format_lha_cleanup(struct archive_read *);
220
221static void	lha_replace_path_separator(struct lha *,
222		    struct archive_entry *);
223static int	lha_read_file_header_0(struct archive_read *, struct lha *);
224static int	lha_read_file_header_1(struct archive_read *, struct lha *);
225static int	lha_read_file_header_2(struct archive_read *, struct lha *);
226static int	lha_read_file_header_3(struct archive_read *, struct lha *);
227static int	lha_read_file_extended_header(struct archive_read *,
228		    struct lha *, uint16_t *, int, size_t, size_t *);
229static size_t	lha_check_header_format(const void *);
230static int	lha_skip_sfx(struct archive_read *);
231static time_t	lha_dos_time(const unsigned char *);
232static time_t	lha_win_time(uint64_t, long *);
233static unsigned char	lha_calcsum(unsigned char, const void *,
234		    int, size_t);
235static int	lha_parse_linkname(struct archive_string *,
236		    struct archive_string *);
237static int	lha_read_data_none(struct archive_read *, const void **,
238		    size_t *, int64_t *);
239static int	lha_read_data_lzh(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static void	lha_crc16_init(void);
242static uint16_t lha_crc16(uint16_t, const void *, size_t);
243static int	lzh_decode_init(struct lzh_stream *, const char *);
244static void	lzh_decode_free(struct lzh_stream *);
245static int	lzh_decode(struct lzh_stream *, int);
246static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247static int	lzh_huffman_init(struct huffman *, size_t, int);
248static void	lzh_huffman_free(struct huffman *);
249static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250static int	lzh_make_fake_table(struct huffman *, uint16_t);
251static int	lzh_make_huffman_table(struct huffman *);
252static inline int lzh_decode_huffman(struct huffman *, unsigned);
253static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256int
257archive_read_support_format_lha(struct archive *_a)
258{
259	struct archive_read *a = (struct archive_read *)_a;
260	struct lha *lha;
261	int r;
262
263	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266	lha = (struct lha *)calloc(1, sizeof(*lha));
267	if (lha == NULL) {
268		archive_set_error(&a->archive, ENOMEM,
269		    "Can't allocate lha data");
270		return (ARCHIVE_FATAL);
271	}
272	archive_string_init(&lha->ws);
273
274	r = __archive_read_register_format(a,
275	    lha,
276	    "lha",
277	    archive_read_format_lha_bid,
278	    archive_read_format_lha_options,
279	    archive_read_format_lha_read_header,
280	    archive_read_format_lha_read_data,
281	    archive_read_format_lha_read_data_skip,
282	    NULL,
283	    archive_read_format_lha_cleanup,
284	    NULL,
285	    NULL);
286
287	if (r != ARCHIVE_OK)
288		free(lha);
289	return (ARCHIVE_OK);
290}
291
292static size_t
293lha_check_header_format(const void *h)
294{
295	const unsigned char *p = h;
296	size_t next_skip_bytes;
297
298	switch (p[H_METHOD_OFFSET+3]) {
299	/*
300	 * "-lh0-" ... "-lh7-" "-lhd-"
301	 * "-lzs-" "-lz5-"
302	 */
303	case '0': case '1': case '2': case '3':
304	case '4': case '5': case '6': case '7':
305	case 'd':
306	case 's':
307		next_skip_bytes = 4;
308
309		/* b0 == 0 means the end of an LHa archive file.	*/
310		if (p[0] == 0)
311			break;
312		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313		    ||  p[H_METHOD_OFFSET+4] != '-')
314			break;
315
316		if (p[H_METHOD_OFFSET+2] == 'h') {
317			/* "-lh?-" */
318			if (p[H_METHOD_OFFSET+3] == 's')
319				break;
320			if (p[H_LEVEL_OFFSET] == 0)
321				return (0);
322			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323				return (0);
324		}
325		if (p[H_METHOD_OFFSET+2] == 'z') {
326			/* LArc extensions: -lzs-,-lz4- and -lz5- */
327			if (p[H_LEVEL_OFFSET] != 0)
328				break;
329			if (p[H_METHOD_OFFSET+3] == 's'
330			    || p[H_METHOD_OFFSET+3] == '4'
331			    || p[H_METHOD_OFFSET+3] == '5')
332				return (0);
333		}
334		break;
335	case 'h': next_skip_bytes = 1; break;
336	case 'z': next_skip_bytes = 1; break;
337	case 'l': next_skip_bytes = 2; break;
338	case '-': next_skip_bytes = 3; break;
339	default : next_skip_bytes = 4; break;
340	}
341
342	return (next_skip_bytes);
343}
344
345static int
346archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347{
348	const char *p;
349	const void *buff;
350	ssize_t bytes_avail, offset, window;
351	size_t next;
352
353	/* If there's already a better bid than we can ever
354	   make, don't bother testing. */
355	if (best_bid > 30)
356		return (-1);
357
358	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359		return (-1);
360
361	if (lha_check_header_format(p) == 0)
362		return (30);
363
364	if (p[0] == 'M' && p[1] == 'Z') {
365		/* PE file */
366		offset = 0;
367		window = 4096;
368		while (offset < (1024 * 20)) {
369			buff = __archive_read_ahead(a, offset + window,
370			    &bytes_avail);
371			if (buff == NULL) {
372				/* Remaining bytes are less than window. */
373				window >>= 1;
374				if (window < (H_SIZE + 3))
375					return (0);
376				continue;
377			}
378			p = (const char *)buff + offset;
379			while (p + H_SIZE < (const char *)buff + bytes_avail) {
380				if ((next = lha_check_header_format(p)) == 0)
381					return (30);
382				p += next;
383			}
384			offset = p - (const char *)buff;
385		}
386	}
387	return (0);
388}
389
390static int
391archive_read_format_lha_options(struct archive_read *a,
392    const char *key, const char *val)
393{
394	struct lha *lha;
395	int ret = ARCHIVE_FAILED;
396
397	lha = (struct lha *)(a->format->data);
398	if (strcmp(key, "hdrcharset")  == 0) {
399		if (val == NULL || val[0] == 0)
400			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401			    "lha: hdrcharset option needs a character-set name");
402		else {
403			lha->opt_sconv =
404			    archive_string_conversion_from_charset(
405				&a->archive, val, 0);
406			if (lha->opt_sconv != NULL)
407				ret = ARCHIVE_OK;
408			else
409				ret = ARCHIVE_FATAL;
410		}
411		return (ret);
412	}
413
414	/* Note: The "warn" return is just to inform the options
415	 * supervisor that we didn't handle it.  It will generate
416	 * a suitable error if no one used this option. */
417	return (ARCHIVE_WARN);
418}
419
420static int
421lha_skip_sfx(struct archive_read *a)
422{
423	const void *h;
424	const char *p, *q;
425	size_t next, skip;
426	ssize_t bytes, window;
427
428	window = 4096;
429	for (;;) {
430		h = __archive_read_ahead(a, window, &bytes);
431		if (h == NULL) {
432			/* Remaining bytes are less than window. */
433			window >>= 1;
434			if (window < (H_SIZE + 3))
435				goto fatal;
436			continue;
437		}
438		if (bytes < H_SIZE)
439			goto fatal;
440		p = h;
441		q = p + bytes;
442
443		/*
444		 * Scan ahead until we find something that looks
445		 * like the lha header.
446		 */
447		while (p + H_SIZE < q) {
448			if ((next = lha_check_header_format(p)) == 0) {
449				skip = p - (const char *)h;
450				__archive_read_consume(a, skip);
451				return (ARCHIVE_OK);
452			}
453			p += next;
454		}
455		skip = p - (const char *)h;
456		__archive_read_consume(a, skip);
457	}
458fatal:
459	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460	    "Couldn't find out LHa header");
461	return (ARCHIVE_FATAL);
462}
463
464static int
465truncated_error(struct archive_read *a)
466{
467	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468	    "Truncated LHa header");
469	return (ARCHIVE_FATAL);
470}
471
472static int
473archive_read_format_lha_read_header(struct archive_read *a,
474    struct archive_entry *entry)
475{
476	struct archive_string linkname;
477	struct archive_string pathname;
478	struct lha *lha;
479	const unsigned char *p;
480	const char *signature;
481	int err;
482
483	lha_crc16_init();
484
485	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486	if (a->archive.archive_format_name == NULL)
487		a->archive.archive_format_name = "lha";
488
489	lha = (struct lha *)(a->format->data);
490	lha->decompress_init = 0;
491	lha->end_of_entry = 0;
492	lha->end_of_entry_cleanup = 0;
493	lha->entry_unconsumed = 0;
494
495	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496		/*
497		 * LHa archiver added 0 to the tail of its archive file as
498		 * the mark of the end of the archive.
499		 */
500		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501		if (signature == NULL || signature[0] == 0)
502			return (ARCHIVE_EOF);
503		return (truncated_error(a));
504	}
505
506	signature = (const char *)p;
507	if (lha->found_first_header == 0 &&
508	    signature[0] == 'M' && signature[1] == 'Z') {
509                /* This is an executable?  Must be self-extracting... 	*/
510		err = lha_skip_sfx(a);
511		if (err < ARCHIVE_WARN)
512			return (err);
513
514		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515			return (truncated_error(a));
516		signature = (const char *)p;
517	}
518	/* signature[0] == 0 means the end of an LHa archive file. */
519	if (signature[0] == 0)
520		return (ARCHIVE_EOF);
521
522	/*
523	 * Check the header format and method type.
524	 */
525	if (lha_check_header_format(p) != 0) {
526		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527		    "Bad LHa file");
528		return (ARCHIVE_FATAL);
529	}
530
531	/* We've found the first header. */
532	lha->found_first_header = 1;
533	/* Set a default value and common data */
534	lha->header_size = 0;
535	lha->level = p[H_LEVEL_OFFSET];
536	lha->method[0] = p[H_METHOD_OFFSET+1];
537	lha->method[1] = p[H_METHOD_OFFSET+2];
538	lha->method[2] = p[H_METHOD_OFFSET+3];
539	if (memcmp(lha->method, "lhd", 3) == 0)
540		lha->directory = 1;
541	else
542		lha->directory = 0;
543	if (memcmp(lha->method, "lh0", 3) == 0 ||
544	    memcmp(lha->method, "lz4", 3) == 0)
545		lha->entry_is_compressed = 0;
546	else
547		lha->entry_is_compressed = 1;
548
549	lha->compsize = 0;
550	lha->origsize = 0;
551	lha->setflag = 0;
552	lha->birthtime = 0;
553	lha->birthtime_tv_nsec = 0;
554	lha->mtime = 0;
555	lha->mtime_tv_nsec = 0;
556	lha->atime = 0;
557	lha->atime_tv_nsec = 0;
558	lha->mode = (lha->directory)? 0777 : 0666;
559	lha->uid = 0;
560	lha->gid = 0;
561	archive_string_empty(&lha->dirname);
562	archive_string_empty(&lha->filename);
563	lha->dos_attr = 0;
564	if (lha->opt_sconv != NULL)
565		lha->sconv = lha->opt_sconv;
566	else
567		lha->sconv = NULL;
568
569	switch (p[H_LEVEL_OFFSET]) {
570	case 0:
571		err = lha_read_file_header_0(a, lha);
572		break;
573	case 1:
574		err = lha_read_file_header_1(a, lha);
575		break;
576	case 2:
577		err = lha_read_file_header_2(a, lha);
578		break;
579	case 3:
580		err = lha_read_file_header_3(a, lha);
581		break;
582	default:
583		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585		err = ARCHIVE_FATAL;
586		break;
587	}
588	if (err < ARCHIVE_WARN)
589		return (err);
590
591
592	if (!lha->directory && archive_strlen(&lha->filename) == 0)
593		/* The filename has not been set */
594		return (truncated_error(a));
595
596	/*
597	 * Make a pathname from a dirname and a filename.
598	 */
599	archive_string_concat(&lha->dirname, &lha->filename);
600	archive_string_init(&pathname);
601	archive_string_init(&linkname);
602	archive_string_copy(&pathname, &lha->dirname);
603
604	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605		/*
606	 	 * Extract the symlink-name if it's included in the pathname.
607	 	 */
608		if (!lha_parse_linkname(&linkname, &pathname)) {
609			/* We couldn't get the symlink-name. */
610			archive_set_error(&a->archive,
611		    	    ARCHIVE_ERRNO_FILE_FORMAT,
612			    "Unknown symlink-name");
613			archive_string_free(&pathname);
614			archive_string_free(&linkname);
615			return (ARCHIVE_FAILED);
616		}
617	} else {
618		/*
619		 * Make sure a file-type is set.
620		 * The mode has been overridden if it is in the extended data.
621		 */
622		lha->mode = (lha->mode & ~AE_IFMT) |
623		    ((lha->directory)? AE_IFDIR: AE_IFREG);
624	}
625	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626	    (lha->dos_attr & 1) != 0)
627		lha->mode &= ~(0222);/* read only. */
628
629	/*
630	 * Set basic file parameters.
631	 */
632	if (archive_entry_copy_pathname_l(entry, pathname.s,
633	    pathname.length, lha->sconv) != 0) {
634		if (errno == ENOMEM) {
635			archive_set_error(&a->archive, ENOMEM,
636			    "Can't allocate memory for Pathname");
637			return (ARCHIVE_FATAL);
638		}
639		archive_set_error(&a->archive,
640		    ARCHIVE_ERRNO_FILE_FORMAT,
641		    "Pathname cannot be converted "
642		    "from %s to current locale.",
643		    archive_string_conversion_charset_name(lha->sconv));
644		err = ARCHIVE_WARN;
645	}
646	archive_string_free(&pathname);
647	if (archive_strlen(&linkname) > 0) {
648		if (archive_entry_copy_symlink_l(entry, linkname.s,
649		    linkname.length, lha->sconv) != 0) {
650			if (errno == ENOMEM) {
651				archive_set_error(&a->archive, ENOMEM,
652				    "Can't allocate memory for Linkname");
653				return (ARCHIVE_FATAL);
654			}
655			archive_set_error(&a->archive,
656			    ARCHIVE_ERRNO_FILE_FORMAT,
657			    "Linkname cannot be converted "
658			    "from %s to current locale.",
659			    archive_string_conversion_charset_name(lha->sconv));
660			err = ARCHIVE_WARN;
661		}
662	} else
663		archive_entry_set_symlink(entry, NULL);
664	archive_string_free(&linkname);
665	/*
666	 * When a header level is 0, there is a possibility that
667	 * a pathname and a symlink has '\' character, a directory
668	 * separator in DOS/Windows. So we should convert it to '/'.
669	 */
670	if (p[H_LEVEL_OFFSET] == 0)
671		lha_replace_path_separator(lha, entry);
672
673	archive_entry_set_mode(entry, lha->mode);
674	archive_entry_set_uid(entry, lha->uid);
675	archive_entry_set_gid(entry, lha->gid);
676	if (archive_strlen(&lha->uname) > 0)
677		archive_entry_set_uname(entry, lha->uname.s);
678	if (archive_strlen(&lha->gname) > 0)
679		archive_entry_set_gname(entry, lha->gname.s);
680	if (lha->setflag & BIRTHTIME_IS_SET) {
681		archive_entry_set_birthtime(entry, lha->birthtime,
682		    lha->birthtime_tv_nsec);
683		archive_entry_set_ctime(entry, lha->birthtime,
684		    lha->birthtime_tv_nsec);
685	} else {
686		archive_entry_unset_birthtime(entry);
687		archive_entry_unset_ctime(entry);
688	}
689	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690	if (lha->setflag & ATIME_IS_SET)
691		archive_entry_set_atime(entry, lha->atime,
692		    lha->atime_tv_nsec);
693	else
694		archive_entry_unset_atime(entry);
695	if (lha->directory || archive_entry_symlink(entry) != NULL)
696		archive_entry_unset_size(entry);
697	else
698		archive_entry_set_size(entry, lha->origsize);
699
700	/*
701	 * Prepare variables used to read a file content.
702	 */
703	lha->entry_bytes_remaining = lha->compsize;
704	if (lha->entry_bytes_remaining < 0) {
705		archive_set_error(&a->archive,
706		    ARCHIVE_ERRNO_FILE_FORMAT,
707		    "Invalid LHa entry size");
708		return (ARCHIVE_FATAL);
709	}
710	lha->entry_offset = 0;
711	lha->entry_crc_calculated = 0;
712
713	/*
714	 * This file does not have a content.
715	 */
716	if (lha->directory || lha->compsize == 0)
717		lha->end_of_entry = 1;
718
719	sprintf(lha->format_name, "lha -%c%c%c-",
720	    lha->method[0], lha->method[1], lha->method[2]);
721	a->archive.archive_format_name = lha->format_name;
722
723	return (err);
724}
725
726/*
727 * Replace a DOS path separator '\' by a character '/'.
728 * Some multi-byte character set have  a character '\' in its second byte.
729 */
730static void
731lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
732{
733	const wchar_t *wp;
734	size_t i;
735
736	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
737		archive_wstrcpy(&(lha->ws), wp);
738		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
739			if (lha->ws.s[i] == L'\\')
740				lha->ws.s[i] = L'/';
741		}
742		archive_entry_copy_pathname_w(entry, lha->ws.s);
743	}
744
745	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
746		archive_wstrcpy(&(lha->ws), wp);
747		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
748			if (lha->ws.s[i] == L'\\')
749				lha->ws.s[i] = L'/';
750		}
751		archive_entry_copy_symlink_w(entry, lha->ws.s);
752	}
753}
754
755/*
756 * Header 0 format
757 *
758 * +0              +1         +2               +7                  +11
759 * +---------------+----------+----------------+-------------------+
760 * |header size(*1)|header sum|compression type|compressed size(*2)|
761 * +---------------+----------+----------------+-------------------+
762 *                             <---------------------(*1)----------*
763 *
764 * +11               +15       +17       +19            +20              +21
765 * +-----------------+---------+---------+--------------+----------------+
766 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
767 * +-----------------+---------+---------+--------------+----------------+
768 * *--------------------------------(*1)---------------------------------*
769 *
770 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
771 * +---------------+---------+----------+----------------+------------------+
772 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
773 * +---------------+---------+----------+----------------+------------------+
774 *                  <--(*3)->                             <------(*2)------>
775 * *----------------------(*1)-------------------------->
776 *
777 */
778#define H0_HEADER_SIZE_OFFSET	0
779#define H0_HEADER_SUM_OFFSET	1
780#define H0_COMP_SIZE_OFFSET	7
781#define H0_ORIG_SIZE_OFFSET	11
782#define H0_DOS_TIME_OFFSET	15
783#define H0_NAME_LEN_OFFSET	21
784#define H0_FILE_NAME_OFFSET	22
785#define H0_FIXED_SIZE		24
786static int
787lha_read_file_header_0(struct archive_read *a, struct lha *lha)
788{
789	const unsigned char *p;
790	int extdsize, namelen;
791	unsigned char headersum, sum_calculated;
792
793	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
794		return (truncated_error(a));
795	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
796	headersum = p[H0_HEADER_SUM_OFFSET];
797	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
798	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
799	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
800	namelen = p[H0_NAME_LEN_OFFSET];
801	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
802	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
803		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
804		    "Invalid LHa header");
805		return (ARCHIVE_FATAL);
806	}
807	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
808		return (truncated_error(a));
809
810	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
811	/* When extdsize == -2, A CRC16 value is not present in the header. */
812	if (extdsize >= 0) {
813		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
814		lha->setflag |= CRC_IS_SET;
815	}
816	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
817
818	/* Read an extended header */
819	if (extdsize > 0) {
820		/* This extended data is set by 'LHa for UNIX' only.
821		 * Maybe fixed size.
822		 */
823		p += H0_FILE_NAME_OFFSET + namelen + 2;
824		if (p[0] == 'U' && extdsize == 12) {
825			/* p[1] is a minor version. */
826			lha->mtime = archive_le32dec(&p[2]);
827			lha->mode = archive_le16dec(&p[6]);
828			lha->uid = archive_le16dec(&p[8]);
829			lha->gid = archive_le16dec(&p[10]);
830			lha->setflag |= UNIX_MODE_IS_SET;
831		}
832	}
833	__archive_read_consume(a, lha->header_size);
834
835	if (sum_calculated != headersum) {
836		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
837		    "LHa header sum error");
838		return (ARCHIVE_FATAL);
839	}
840
841	return (ARCHIVE_OK);
842}
843
844/*
845 * Header 1 format
846 *
847 * +0              +1         +2               +7            +11
848 * +---------------+----------+----------------+-------------+
849 * |header size(*1)|header sum|compression type|skip size(*2)|
850 * +---------------+----------+----------------+-------------+
851 *                             <---------------(*1)----------*
852 *
853 * +11               +15       +17       +19            +20              +21
854 * +-----------------+---------+---------+--------------+----------------+
855 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
856 * +-----------------+---------+---------+--------------+----------------+
857 * *-------------------------------(*1)----------------------------------*
858 *
859 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
860 * +---------------+---------+----------+-----------+-----------+
861 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
862 * +---------------+---------+----------+-----------+-----------+
863 *                  <--(*3)->
864 * *----------------------------(*1)----------------------------*
865 *
866 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
867 * +----------------+---------------------+------------------------+
868 * |next header size| extended header(*5) |     compressed data    |
869 * +----------------+---------------------+------------------------+
870 * *------(*1)-----> <--------------------(*2)-------------------->
871 */
872#define H1_HEADER_SIZE_OFFSET	0
873#define H1_HEADER_SUM_OFFSET	1
874#define H1_COMP_SIZE_OFFSET	7
875#define H1_ORIG_SIZE_OFFSET	11
876#define H1_DOS_TIME_OFFSET	15
877#define H1_NAME_LEN_OFFSET	21
878#define H1_FILE_NAME_OFFSET	22
879#define H1_FIXED_SIZE		27
880static int
881lha_read_file_header_1(struct archive_read *a, struct lha *lha)
882{
883	const unsigned char *p;
884	size_t extdsize;
885	int i, err, err2;
886	int namelen, padding;
887	unsigned char headersum, sum_calculated;
888
889	err = ARCHIVE_OK;
890
891	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
892		return (truncated_error(a));
893
894	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
895	headersum = p[H1_HEADER_SUM_OFFSET];
896	/* Note: An extended header size is included in a compsize. */
897	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
898	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
899	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
900	namelen = p[H1_NAME_LEN_OFFSET];
901	/* Calculate a padding size. The result will be normally 0 only(?) */
902	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
903
904	if (namelen > 230 || padding < 0)
905		goto invalid;
906
907	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
908		return (truncated_error(a));
909
910	for (i = 0; i < namelen; i++) {
911		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
912			goto invalid;/* Invalid filename. */
913	}
914	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
915	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
916	lha->setflag |= CRC_IS_SET;
917
918	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
919	/* Consume used bytes but not include `next header size' data
920	 * since it will be consumed in lha_read_file_extended_header(). */
921	__archive_read_consume(a, lha->header_size - 2);
922
923	/* Read extended headers */
924	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
925	    (size_t)(lha->compsize + 2), &extdsize);
926	if (err2 < ARCHIVE_WARN)
927		return (err2);
928	if (err2 < err)
929		err = err2;
930	/* Get a real compressed file size. */
931	lha->compsize -= extdsize - 2;
932
933	if (lha->compsize < 0)
934		goto invalid;	/* Invalid compressed file size */
935
936	if (sum_calculated != headersum) {
937		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
938		    "LHa header sum error");
939		return (ARCHIVE_FATAL);
940	}
941	return (err);
942invalid:
943	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
944	    "Invalid LHa header");
945	return (ARCHIVE_FATAL);
946}
947
948/*
949 * Header 2 format
950 *
951 * +0              +2               +7                  +11               +15
952 * +---------------+----------------+-------------------+-----------------+
953 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
954 * +---------------+----------------+-------------------+-----------------+
955 *  <--------------------------------(*1)---------------------------------*
956 *
957 * +15               +19          +20              +21        +23         +24
958 * +-----------------+------------+----------------+----------+-----------+
959 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
960 * +-----------------+------------+----------------+----------+-----------+
961 * *---------------------------------(*1)---------------------------------*
962 *
963 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
964 * +----------------+-------------------+-------------+-------------------+
965 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
966 * +----------------+-------------------+-------------+-------------------+
967 * *--------------------------(*1)-------------------> <------(*2)------->
968 *
969 */
970#define H2_HEADER_SIZE_OFFSET	0
971#define H2_COMP_SIZE_OFFSET	7
972#define H2_ORIG_SIZE_OFFSET	11
973#define H2_TIME_OFFSET		15
974#define H2_CRC_OFFSET		21
975#define H2_FIXED_SIZE		24
976static int
977lha_read_file_header_2(struct archive_read *a, struct lha *lha)
978{
979	const unsigned char *p;
980	size_t extdsize;
981	int err, padding;
982	uint16_t header_crc;
983
984	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
985		return (truncated_error(a));
986
987	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
988	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
989	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
990	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
991	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
992	lha->setflag |= CRC_IS_SET;
993
994	if (lha->header_size < H2_FIXED_SIZE) {
995		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
996		    "Invalid LHa header size");
997		return (ARCHIVE_FATAL);
998	}
999
1000	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1001	__archive_read_consume(a, H2_FIXED_SIZE);
1002
1003	/* Read extended headers */
1004	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1005		  lha->header_size - H2_FIXED_SIZE, &extdsize);
1006	if (err < ARCHIVE_WARN)
1007		return (err);
1008
1009	/* Calculate a padding size. The result will be normally 0 or 1. */
1010	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1011	if (padding > 0) {
1012		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1013			return (truncated_error(a));
1014		header_crc = lha_crc16(header_crc, p, padding);
1015		__archive_read_consume(a, padding);
1016	}
1017
1018	if (header_crc != lha->header_crc) {
1019		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1020		    "LHa header CRC error");
1021		return (ARCHIVE_FATAL);
1022	}
1023	return (err);
1024}
1025
1026/*
1027 * Header 3 format
1028 *
1029 * +0           +2               +7                  +11               +15
1030 * +------------+----------------+-------------------+-----------------+
1031 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1032 * +------------+----------------+-------------------+-----------------+
1033 *  <-------------------------------(*1)-------------------------------*
1034 *
1035 * +15               +19          +20              +21        +23         +24
1036 * +-----------------+------------+----------------+----------+-----------+
1037 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1038 * +-----------------+------------+----------------+----------+-----------+
1039 * *--------------------------------(*1)----------------------------------*
1040 *
1041 * +24             +28              +32                 +32+(*3)
1042 * +---------------+----------------+-------------------+-----------------+
1043 * |header size(*1)|next header size|extended header(*3)| compressed data |
1044 * +---------------+----------------+-------------------+-----------------+
1045 * *------------------------(*1)-----------------------> <------(*2)----->
1046 *
1047 */
1048#define H3_FIELD_LEN_OFFSET	0
1049#define H3_COMP_SIZE_OFFSET	7
1050#define H3_ORIG_SIZE_OFFSET	11
1051#define H3_TIME_OFFSET		15
1052#define H3_CRC_OFFSET		21
1053#define H3_HEADER_SIZE_OFFSET	24
1054#define H3_FIXED_SIZE		28
1055static int
1056lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1057{
1058	const unsigned char *p;
1059	size_t extdsize;
1060	int err;
1061	uint16_t header_crc;
1062
1063	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1064		return (truncated_error(a));
1065
1066	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1067		goto invalid;
1068	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1069	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1070	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1071	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1072	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1073	lha->setflag |= CRC_IS_SET;
1074
1075	if (lha->header_size < H3_FIXED_SIZE + 4)
1076		goto invalid;
1077	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1078	__archive_read_consume(a, H3_FIXED_SIZE);
1079
1080	/* Read extended headers */
1081	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1082		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1083	if (err < ARCHIVE_WARN)
1084		return (err);
1085
1086	if (header_crc != lha->header_crc) {
1087		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088		    "LHa header CRC error");
1089		return (ARCHIVE_FATAL);
1090	}
1091	return (err);
1092invalid:
1093	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1094	    "Invalid LHa header");
1095	return (ARCHIVE_FATAL);
1096}
1097
1098/*
1099 * Extended header format
1100 *
1101 * +0             +2        +3  -- used in header 1 and 2
1102 * +0             +4        +5  -- used in header 3
1103 * +--------------+---------+-------------------+--------------+--
1104 * |ex-header size|header id|        data       |ex-header size| .......
1105 * +--------------+---------+-------------------+--------------+--
1106 *  <-------------( ex-header size)------------> <-- next extended header --*
1107 *
1108 * If the ex-header size is zero, it is the make of the end of extended
1109 * headers.
1110 *
1111 */
1112static int
1113lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1114    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1115{
1116	const void *h;
1117	const unsigned char *extdheader;
1118	size_t	extdsize;
1119	size_t	datasize;
1120	unsigned int i;
1121	unsigned char extdtype;
1122
1123#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1124#define EXT_FILENAME		0x01		/* Filename 		    */
1125#define EXT_DIRECTORY		0x02		/* Directory name	    */
1126#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1127#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1128#define EXT_FILESIZE		0x42		/* Large file size	    */
1129#define EXT_TIMEZONE		0x43		/* Time zone		    */
1130#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1131#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1132#define EXT_CODEPAGE		0x46		/* Codepage		    */
1133#define EXT_UNIX_MODE		0x50		/* File permission	    */
1134#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1135#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1136#define EXT_UNIX_UNAME		0x53		/* User name		    */
1137#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1138#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1139#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1140
1141	*total_size = sizefield_length;
1142
1143	for (;;) {
1144		/* Read an extended header size. */
1145		if ((h =
1146		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1147			return (truncated_error(a));
1148		/* Check if the size is the zero indicates the end of the
1149		 * extended header. */
1150		if (sizefield_length == sizeof(uint16_t))
1151			extdsize = archive_le16dec(h);
1152		else
1153			extdsize = archive_le32dec(h);
1154		if (extdsize == 0) {
1155			/* End of extended header */
1156			if (crc != NULL)
1157				*crc = lha_crc16(*crc, h, sizefield_length);
1158			__archive_read_consume(a, sizefield_length);
1159			return (ARCHIVE_OK);
1160		}
1161
1162		/* Sanity check to the extended header size. */
1163		if (((uint64_t)*total_size + extdsize) >
1164				    (uint64_t)limitsize ||
1165		    extdsize <= (size_t)sizefield_length)
1166			goto invalid;
1167
1168		/* Read the extended header. */
1169		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1170			return (truncated_error(a));
1171		*total_size += extdsize;
1172
1173		extdheader = (const unsigned char *)h;
1174		/* Get the extended header type. */
1175		extdtype = extdheader[sizefield_length];
1176		/* Calculate an extended data size. */
1177		datasize = extdsize - (1 + sizefield_length);
1178		/* Skip an extended header size field and type field. */
1179		extdheader += sizefield_length + 1;
1180
1181		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1182			*crc = lha_crc16(*crc, h, extdsize);
1183		switch (extdtype) {
1184		case EXT_HEADER_CRC:
1185			/* We only use a header CRC. Following data will not
1186			 * be used. */
1187			if (datasize >= 2) {
1188				lha->header_crc = archive_le16dec(extdheader);
1189				if (crc != NULL) {
1190					static const char zeros[2] = {0, 0};
1191					*crc = lha_crc16(*crc, h,
1192					    extdsize - datasize);
1193					/* CRC value itself as zero */
1194					*crc = lha_crc16(*crc, zeros, 2);
1195					*crc = lha_crc16(*crc,
1196					    extdheader+2, datasize - 2);
1197				}
1198			}
1199			break;
1200		case EXT_FILENAME:
1201			if (datasize == 0) {
1202				/* maybe directory header */
1203				archive_string_empty(&lha->filename);
1204				break;
1205			}
1206			if (extdheader[0] == '\0')
1207				goto invalid;
1208			archive_strncpy(&lha->filename,
1209			    (const char *)extdheader, datasize);
1210			break;
1211		case EXT_DIRECTORY:
1212			if (datasize == 0 || extdheader[0] == '\0')
1213				/* no directory name data. exit this case. */
1214				goto invalid;
1215
1216			archive_strncpy(&lha->dirname,
1217		  	    (const char *)extdheader, datasize);
1218			/*
1219			 * Convert directory delimiter from 0xFF
1220			 * to '/' for local system.
1221	 		 */
1222			for (i = 0; i < lha->dirname.length; i++) {
1223				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1224					lha->dirname.s[i] = '/';
1225			}
1226			/* Is last character directory separator? */
1227			if (lha->dirname.s[lha->dirname.length-1] != '/')
1228				/* invalid directory data */
1229				goto invalid;
1230			break;
1231		case EXT_DOS_ATTR:
1232			if (datasize == 2)
1233				lha->dos_attr = (unsigned char)
1234				    (archive_le16dec(extdheader) & 0xff);
1235			break;
1236		case EXT_TIMESTAMP:
1237			if (datasize == (sizeof(uint64_t) * 3)) {
1238				lha->birthtime = lha_win_time(
1239				    archive_le64dec(extdheader),
1240				    &lha->birthtime_tv_nsec);
1241				extdheader += sizeof(uint64_t);
1242				lha->mtime = lha_win_time(
1243				    archive_le64dec(extdheader),
1244				    &lha->mtime_tv_nsec);
1245				extdheader += sizeof(uint64_t);
1246				lha->atime = lha_win_time(
1247				    archive_le64dec(extdheader),
1248				    &lha->atime_tv_nsec);
1249				lha->setflag |= BIRTHTIME_IS_SET |
1250				    ATIME_IS_SET;
1251			}
1252			break;
1253		case EXT_FILESIZE:
1254			if (datasize == sizeof(uint64_t) * 2) {
1255				lha->compsize = archive_le64dec(extdheader);
1256				extdheader += sizeof(uint64_t);
1257				lha->origsize = archive_le64dec(extdheader);
1258			}
1259			break;
1260		case EXT_CODEPAGE:
1261			/* Get an archived filename charset from codepage.
1262			 * This overwrites the charset specified by
1263			 * hdrcharset option. */
1264			if (datasize == sizeof(uint32_t)) {
1265				struct archive_string cp;
1266				const char *charset;
1267
1268				archive_string_init(&cp);
1269				switch (archive_le32dec(extdheader)) {
1270				case 65001: /* UTF-8 */
1271					charset = "UTF-8";
1272					break;
1273				default:
1274					archive_string_sprintf(&cp, "CP%d",
1275					    (int)archive_le32dec(extdheader));
1276					charset = cp.s;
1277					break;
1278				}
1279				lha->sconv =
1280				    archive_string_conversion_from_charset(
1281					&(a->archive), charset, 1);
1282				archive_string_free(&cp);
1283				if (lha->sconv == NULL)
1284					return (ARCHIVE_FATAL);
1285			}
1286			break;
1287		case EXT_UNIX_MODE:
1288			if (datasize == sizeof(uint16_t)) {
1289				lha->mode = archive_le16dec(extdheader);
1290				lha->setflag |= UNIX_MODE_IS_SET;
1291			}
1292			break;
1293		case EXT_UNIX_GID_UID:
1294			if (datasize == (sizeof(uint16_t) * 2)) {
1295				lha->gid = archive_le16dec(extdheader);
1296				lha->uid = archive_le16dec(extdheader+2);
1297			}
1298			break;
1299		case EXT_UNIX_GNAME:
1300			if (datasize > 0)
1301				archive_strncpy(&lha->gname,
1302				    (const char *)extdheader, datasize);
1303			break;
1304		case EXT_UNIX_UNAME:
1305			if (datasize > 0)
1306				archive_strncpy(&lha->uname,
1307				    (const char *)extdheader, datasize);
1308			break;
1309		case EXT_UNIX_MTIME:
1310			if (datasize == sizeof(uint32_t))
1311				lha->mtime = archive_le32dec(extdheader);
1312			break;
1313		case EXT_OS2_NEW_ATTR:
1314			/* This extended header is OS/2 depend. */
1315			if (datasize == 16) {
1316				lha->dos_attr = (unsigned char)
1317				    (archive_le16dec(extdheader) & 0xff);
1318				lha->mode = archive_le16dec(extdheader+2);
1319				lha->gid = archive_le16dec(extdheader+4);
1320				lha->uid = archive_le16dec(extdheader+6);
1321				lha->birthtime = archive_le32dec(extdheader+8);
1322				lha->atime = archive_le32dec(extdheader+12);
1323				lha->setflag |= UNIX_MODE_IS_SET
1324				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1325			}
1326			break;
1327		case EXT_NEW_ATTR:
1328			if (datasize == 20) {
1329				lha->mode = (mode_t)archive_le32dec(extdheader);
1330				lha->gid = archive_le32dec(extdheader+4);
1331				lha->uid = archive_le32dec(extdheader+8);
1332				lha->birthtime = archive_le32dec(extdheader+12);
1333				lha->atime = archive_le32dec(extdheader+16);
1334				lha->setflag |= UNIX_MODE_IS_SET
1335				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1336			}
1337			break;
1338		case EXT_TIMEZONE:		/* Not supported */
1339		case EXT_UTF16_FILENAME:	/* Not supported */
1340		case EXT_UTF16_DIRECTORY:	/* Not supported */
1341		default:
1342			break;
1343		}
1344
1345		__archive_read_consume(a, extdsize);
1346	}
1347invalid:
1348	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1349	    "Invalid extended LHa header");
1350	return (ARCHIVE_FATAL);
1351}
1352
1353static int
1354lha_end_of_entry(struct archive_read *a)
1355{
1356	struct lha *lha = (struct lha *)(a->format->data);
1357	int r = ARCHIVE_EOF;
1358
1359	if (!lha->end_of_entry_cleanup) {
1360		if ((lha->setflag & CRC_IS_SET) &&
1361		    lha->crc != lha->entry_crc_calculated) {
1362			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1363			    "LHa data CRC error");
1364			r = ARCHIVE_WARN;
1365		}
1366
1367		/* End-of-entry cleanup done. */
1368		lha->end_of_entry_cleanup = 1;
1369	}
1370	return (r);
1371}
1372
1373static int
1374archive_read_format_lha_read_data(struct archive_read *a,
1375    const void **buff, size_t *size, int64_t *offset)
1376{
1377	struct lha *lha = (struct lha *)(a->format->data);
1378	int r;
1379
1380	if (lha->entry_unconsumed) {
1381		/* Consume as much as the decompressor actually used. */
1382		__archive_read_consume(a, lha->entry_unconsumed);
1383		lha->entry_unconsumed = 0;
1384	}
1385	if (lha->end_of_entry) {
1386		*offset = lha->entry_offset;
1387		*size = 0;
1388		*buff = NULL;
1389		return (lha_end_of_entry(a));
1390	}
1391
1392	if (lha->entry_is_compressed)
1393		r =  lha_read_data_lzh(a, buff, size, offset);
1394	else
1395		/* No compression. */
1396		r =  lha_read_data_none(a, buff, size, offset);
1397	return (r);
1398}
1399
1400/*
1401 * Read a file content in no compression.
1402 *
1403 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1404 * lha->end_of_entry if it consumes all of the data.
1405 */
1406static int
1407lha_read_data_none(struct archive_read *a, const void **buff,
1408    size_t *size, int64_t *offset)
1409{
1410	struct lha *lha = (struct lha *)(a->format->data);
1411	ssize_t bytes_avail;
1412
1413	if (lha->entry_bytes_remaining == 0) {
1414		*buff = NULL;
1415		*size = 0;
1416		*offset = lha->entry_offset;
1417		lha->end_of_entry = 1;
1418		return (ARCHIVE_OK);
1419	}
1420	/*
1421	 * Note: '1' here is a performance optimization.
1422	 * Recall that the decompression layer returns a count of
1423	 * available bytes; asking for more than that forces the
1424	 * decompressor to combine reads by copying data.
1425	 */
1426	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1427	if (bytes_avail <= 0) {
1428		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1429		    "Truncated LHa file data");
1430		return (ARCHIVE_FATAL);
1431	}
1432	if (bytes_avail > lha->entry_bytes_remaining)
1433		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1434	lha->entry_crc_calculated =
1435	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1436	*size = bytes_avail;
1437	*offset = lha->entry_offset;
1438	lha->entry_offset += bytes_avail;
1439	lha->entry_bytes_remaining -= bytes_avail;
1440	if (lha->entry_bytes_remaining == 0)
1441		lha->end_of_entry = 1;
1442	lha->entry_unconsumed = bytes_avail;
1443	return (ARCHIVE_OK);
1444}
1445
1446/*
1447 * Read a file content in LZHUFF encoding.
1448 *
1449 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1450 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1451 * all of the data.
1452 */
1453static int
1454lha_read_data_lzh(struct archive_read *a, const void **buff,
1455    size_t *size, int64_t *offset)
1456{
1457	struct lha *lha = (struct lha *)(a->format->data);
1458	ssize_t bytes_avail;
1459	int r;
1460
1461	/* If we haven't yet read any data, initialize the decompressor. */
1462	if (!lha->decompress_init) {
1463		r = lzh_decode_init(&(lha->strm), lha->method);
1464		switch (r) {
1465		case ARCHIVE_OK:
1466			break;
1467		case ARCHIVE_FAILED:
1468        		/* Unsupported compression. */
1469			*buff = NULL;
1470			*size = 0;
1471			*offset = 0;
1472			archive_set_error(&a->archive,
1473			    ARCHIVE_ERRNO_FILE_FORMAT,
1474			    "Unsupported lzh compression method -%c%c%c-",
1475			    lha->method[0], lha->method[1], lha->method[2]);
1476			/* We know compressed size; just skip it. */
1477			archive_read_format_lha_read_data_skip(a);
1478			return (ARCHIVE_WARN);
1479		default:
1480			archive_set_error(&a->archive, ENOMEM,
1481			    "Couldn't allocate memory "
1482			    "for lzh decompression");
1483			return (ARCHIVE_FATAL);
1484		}
1485		/* We've initialized decompression for this stream. */
1486		lha->decompress_init = 1;
1487		lha->strm.avail_out = 0;
1488		lha->strm.total_out = 0;
1489	}
1490
1491	/*
1492	 * Note: '1' here is a performance optimization.
1493	 * Recall that the decompression layer returns a count of
1494	 * available bytes; asking for more than that forces the
1495	 * decompressor to combine reads by copying data.
1496	 */
1497	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1498	if (bytes_avail <= 0) {
1499		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1500		    "Truncated LHa file body");
1501		return (ARCHIVE_FATAL);
1502	}
1503	if (bytes_avail > lha->entry_bytes_remaining)
1504		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1505
1506	lha->strm.avail_in = (int)bytes_avail;
1507	lha->strm.total_in = 0;
1508	lha->strm.avail_out = 0;
1509
1510	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1511	switch (r) {
1512	case ARCHIVE_OK:
1513		break;
1514	case ARCHIVE_EOF:
1515		lha->end_of_entry = 1;
1516		break;
1517	default:
1518		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1519		    "Bad lzh data");
1520		return (ARCHIVE_FAILED);
1521	}
1522	lha->entry_unconsumed = lha->strm.total_in;
1523	lha->entry_bytes_remaining -= lha->strm.total_in;
1524
1525	if (lha->strm.avail_out) {
1526		*offset = lha->entry_offset;
1527		*size = lha->strm.avail_out;
1528		*buff = lha->strm.ref_ptr;
1529		lha->entry_crc_calculated =
1530		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1531		lha->entry_offset += *size;
1532	} else {
1533		*offset = lha->entry_offset;
1534		*size = 0;
1535		*buff = NULL;
1536		if (lha->end_of_entry)
1537			return (lha_end_of_entry(a));
1538	}
1539	return (ARCHIVE_OK);
1540}
1541
1542/*
1543 * Skip a file content.
1544 */
1545static int
1546archive_read_format_lha_read_data_skip(struct archive_read *a)
1547{
1548	struct lha *lha;
1549	int64_t bytes_skipped;
1550
1551	lha = (struct lha *)(a->format->data);
1552
1553	if (lha->entry_unconsumed) {
1554		/* Consume as much as the decompressor actually used. */
1555		__archive_read_consume(a, lha->entry_unconsumed);
1556		lha->entry_unconsumed = 0;
1557	}
1558
1559	/* if we've already read to end of data, we're done. */
1560	if (lha->end_of_entry_cleanup)
1561		return (ARCHIVE_OK);
1562
1563	/*
1564	 * If the length is at the beginning, we can skip the
1565	 * compressed data much more quickly.
1566	 */
1567	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1568	if (bytes_skipped < 0)
1569		return (ARCHIVE_FATAL);
1570
1571	/* This entry is finished and done. */
1572	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1573	return (ARCHIVE_OK);
1574}
1575
1576static int
1577archive_read_format_lha_cleanup(struct archive_read *a)
1578{
1579	struct lha *lha = (struct lha *)(a->format->data);
1580
1581	lzh_decode_free(&(lha->strm));
1582	archive_string_free(&(lha->dirname));
1583	archive_string_free(&(lha->filename));
1584	archive_string_free(&(lha->uname));
1585	archive_string_free(&(lha->gname));
1586	archive_wstring_free(&(lha->ws));
1587	free(lha);
1588	(a->format->data) = NULL;
1589	return (ARCHIVE_OK);
1590}
1591
1592/*
1593 * 'LHa for UNIX' utility has archived a symbolic-link name after
1594 * a pathname with '|' character.
1595 * This function extracts the symbolic-link name from the pathname.
1596 *
1597 * example.
1598 *   1. a symbolic-name is 'aaa/bb/cc'
1599 *   2. a filename is 'xxx/bbb'
1600 *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1601 */
1602static int
1603lha_parse_linkname(struct archive_string *linkname,
1604    struct archive_string *pathname)
1605{
1606	char *	linkptr;
1607	size_t 	symlen;
1608
1609	linkptr = strchr(pathname->s, '|');
1610	if (linkptr != NULL) {
1611		symlen = strlen(linkptr + 1);
1612		archive_strncpy(linkname, linkptr+1, symlen);
1613
1614		*linkptr = 0;
1615		pathname->length = strlen(pathname->s);
1616
1617		return (1);
1618	}
1619	return (0);
1620}
1621
1622/* Convert an MSDOS-style date/time into Unix-style time. */
1623static time_t
1624lha_dos_time(const unsigned char *p)
1625{
1626	int msTime, msDate;
1627	struct tm ts;
1628
1629	msTime = archive_le16dec(p);
1630	msDate = archive_le16dec(p+2);
1631
1632	memset(&ts, 0, sizeof(ts));
1633	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1634	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1635	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1636	ts.tm_hour = (msTime >> 11) & 0x1f;
1637	ts.tm_min = (msTime >> 5) & 0x3f;
1638	ts.tm_sec = (msTime << 1) & 0x3e;
1639	ts.tm_isdst = -1;
1640	return (mktime(&ts));
1641}
1642
1643/* Convert an MS-Windows-style date/time into Unix-style time. */
1644static time_t
1645lha_win_time(uint64_t wintime, long *ns)
1646{
1647#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1648
1649	if (wintime >= EPOC_TIME) {
1650		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1651		if (ns != NULL)
1652			*ns = (long)(wintime % 10000000) * 100;
1653		return (wintime / 10000000);
1654	} else {
1655		if (ns != NULL)
1656			*ns = 0;
1657		return (0);
1658	}
1659}
1660
1661static unsigned char
1662lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1663{
1664	unsigned char const *p = (unsigned char const *)pp;
1665
1666	p += offset;
1667	for (;size > 0; --size)
1668		sum += *p++;
1669	return (sum);
1670}
1671
1672static uint16_t crc16tbl[2][256];
1673static void
1674lha_crc16_init(void)
1675{
1676	unsigned int i;
1677	static int crc16init = 0;
1678
1679	if (crc16init)
1680		return;
1681	crc16init = 1;
1682
1683	for (i = 0; i < 256; i++) {
1684		unsigned int j;
1685		uint16_t crc = (uint16_t)i;
1686		for (j = 8; j; j--)
1687			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1688		crc16tbl[0][i] = crc;
1689	}
1690
1691	for (i = 0; i < 256; i++) {
1692		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1693			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1694	}
1695}
1696
1697static uint16_t
1698lha_crc16(uint16_t crc, const void *pp, size_t len)
1699{
1700	const unsigned char *p = (const unsigned char *)pp;
1701	const uint16_t *buff;
1702	const union {
1703		uint32_t i;
1704		char c[4];
1705	} u = { 0x01020304 };
1706
1707	if (len == 0)
1708		return crc;
1709
1710	/* Process unaligned address. */
1711	if (((uintptr_t)p) & (uintptr_t)0x1) {
1712		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1713		len--;
1714	}
1715	buff = (const uint16_t *)p;
1716	/*
1717	 * Modern C compiler such as GCC does not unroll automatically yet
1718	 * without unrolling pragma, and Clang is so. So we should
1719	 * unroll this loop for its performance.
1720	 */
1721	for (;len >= 8; len -= 8) {
1722		/* This if statement expects compiler optimization will
1723		 * remove the statement which will not be executed. */
1724#undef bswap16
1725#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1726#  define bswap16(x) _byteswap_ushort(x)
1727#elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1728/* GCC 4.8 and later has __builtin_bswap16() */
1729#  define bswap16(x) __builtin_bswap16(x)
1730#elif defined(__clang__)
1731/* All clang versions have __builtin_bswap16() */
1732#  define bswap16(x) __builtin_bswap16(x)
1733#else
1734#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1735#endif
1736#define CRC16W	do { 	\
1737		if(u.c[0] == 1) { /* Big endian */		\
1738			crc ^= bswap16(*buff); buff++;		\
1739		} else						\
1740			crc ^= *buff++;				\
1741		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1742} while (0)
1743		CRC16W;
1744		CRC16W;
1745		CRC16W;
1746		CRC16W;
1747#undef CRC16W
1748#undef bswap16
1749	}
1750
1751	p = (const unsigned char *)buff;
1752	for (;len; len--) {
1753		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1754	}
1755	return crc;
1756}
1757
1758/*
1759 * Initialize LZHUF decoder.
1760 *
1761 * Returns ARCHIVE_OK if initialization was successful.
1762 * Returns ARCHIVE_FAILED if method is unsupported.
1763 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1764 * error occurred.
1765 */
1766static int
1767lzh_decode_init(struct lzh_stream *strm, const char *method)
1768{
1769	struct lzh_dec *ds;
1770	int w_bits, w_size;
1771
1772	if (strm->ds == NULL) {
1773		strm->ds = calloc(1, sizeof(*strm->ds));
1774		if (strm->ds == NULL)
1775			return (ARCHIVE_FATAL);
1776	}
1777	ds = strm->ds;
1778	ds->error = ARCHIVE_FAILED;
1779	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1780		return (ARCHIVE_FAILED);
1781	switch (method[2]) {
1782	case '5':
1783		w_bits = 13;/* 8KiB for window */
1784		break;
1785	case '6':
1786		w_bits = 15;/* 32KiB for window */
1787		break;
1788	case '7':
1789		w_bits = 16;/* 64KiB for window */
1790		break;
1791	default:
1792		return (ARCHIVE_FAILED);/* Not supported. */
1793	}
1794	ds->error = ARCHIVE_FATAL;
1795	/* Expand a window size up to 128 KiB for decompressing process
1796	 * performance whatever its original window size is. */
1797	ds->w_size = 1U << 17;
1798	ds->w_mask = ds->w_size -1;
1799	if (ds->w_buff == NULL) {
1800		ds->w_buff = malloc(ds->w_size);
1801		if (ds->w_buff == NULL)
1802			return (ARCHIVE_FATAL);
1803	}
1804	w_size = 1U << w_bits;
1805	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1806	ds->w_pos = 0;
1807	ds->state = 0;
1808	ds->pos_pt_len_size = w_bits + 1;
1809	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1810	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1811	ds->literal_pt_len_bits = 5;
1812	ds->br.cache_buffer = 0;
1813	ds->br.cache_avail = 0;
1814
1815	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1816	    != ARCHIVE_OK)
1817		return (ARCHIVE_FATAL);
1818	ds->lt.len_bits = 9;
1819	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1820	    != ARCHIVE_OK)
1821		return (ARCHIVE_FATAL);
1822	ds->error = 0;
1823
1824	return (ARCHIVE_OK);
1825}
1826
1827/*
1828 * Release LZHUF decoder.
1829 */
1830static void
1831lzh_decode_free(struct lzh_stream *strm)
1832{
1833
1834	if (strm->ds == NULL)
1835		return;
1836	free(strm->ds->w_buff);
1837	lzh_huffman_free(&(strm->ds->lt));
1838	lzh_huffman_free(&(strm->ds->pt));
1839	free(strm->ds);
1840	strm->ds = NULL;
1841}
1842
1843/*
1844 * Bit stream reader.
1845 */
1846/* Check that the cache buffer has enough bits. */
1847#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1848/* Get compressed data by bit. */
1849#define lzh_br_bits(br, n)				\
1850	(((uint16_t)((br)->cache_buffer >>		\
1851		((br)->cache_avail - (n)))) & cache_masks[n])
1852#define lzh_br_bits_forced(br, n)			\
1853	(((uint16_t)((br)->cache_buffer <<		\
1854		((n) - (br)->cache_avail))) & cache_masks[n])
1855/* Read ahead to make sure the cache buffer has enough compressed data we
1856 * will use.
1857 *  True  : completed, there is enough data in the cache buffer.
1858 *  False : we met that strm->next_in is empty, we have to get following
1859 *          bytes. */
1860#define lzh_br_read_ahead_0(strm, br, n)	\
1861	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1862/*  True  : the cache buffer has some bits as much as we need.
1863 *  False : there are no enough bits in the cache buffer to be used,
1864 *          we have to get following bytes if we could. */
1865#define lzh_br_read_ahead(strm, br, n)	\
1866	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1867
1868/* Notify how many bits we consumed. */
1869#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1870#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1871
1872static const uint16_t cache_masks[] = {
1873	0x0000, 0x0001, 0x0003, 0x0007,
1874	0x000F, 0x001F, 0x003F, 0x007F,
1875	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1876	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1877	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1878};
1879
1880/*
1881 * Shift away used bits in the cache data and fill it up with following bits.
1882 * Call this when cache buffer does not have enough bits you need.
1883 *
1884 * Returns 1 if the cache buffer is full.
1885 * Returns 0 if the cache buffer is not full; input buffer is empty.
1886 */
1887static int
1888lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1889{
1890	int n = CACHE_BITS - br->cache_avail;
1891
1892	for (;;) {
1893		const int x = n >> 3;
1894		if (strm->avail_in >= x) {
1895			switch (x) {
1896			case 8:
1897				br->cache_buffer =
1898				    ((uint64_t)strm->next_in[0]) << 56 |
1899				    ((uint64_t)strm->next_in[1]) << 48 |
1900				    ((uint64_t)strm->next_in[2]) << 40 |
1901				    ((uint64_t)strm->next_in[3]) << 32 |
1902				    ((uint32_t)strm->next_in[4]) << 24 |
1903				    ((uint32_t)strm->next_in[5]) << 16 |
1904				    ((uint32_t)strm->next_in[6]) << 8 |
1905				     (uint32_t)strm->next_in[7];
1906				strm->next_in += 8;
1907				strm->avail_in -= 8;
1908				br->cache_avail += 8 * 8;
1909				return (1);
1910			case 7:
1911				br->cache_buffer =
1912		 		   (br->cache_buffer << 56) |
1913				    ((uint64_t)strm->next_in[0]) << 48 |
1914				    ((uint64_t)strm->next_in[1]) << 40 |
1915				    ((uint64_t)strm->next_in[2]) << 32 |
1916				    ((uint32_t)strm->next_in[3]) << 24 |
1917				    ((uint32_t)strm->next_in[4]) << 16 |
1918				    ((uint32_t)strm->next_in[5]) << 8 |
1919				     (uint32_t)strm->next_in[6];
1920				strm->next_in += 7;
1921				strm->avail_in -= 7;
1922				br->cache_avail += 7 * 8;
1923				return (1);
1924			case 6:
1925				br->cache_buffer =
1926		 		   (br->cache_buffer << 48) |
1927				    ((uint64_t)strm->next_in[0]) << 40 |
1928				    ((uint64_t)strm->next_in[1]) << 32 |
1929				    ((uint32_t)strm->next_in[2]) << 24 |
1930				    ((uint32_t)strm->next_in[3]) << 16 |
1931				    ((uint32_t)strm->next_in[4]) << 8 |
1932				     (uint32_t)strm->next_in[5];
1933				strm->next_in += 6;
1934				strm->avail_in -= 6;
1935				br->cache_avail += 6 * 8;
1936				return (1);
1937			case 0:
1938				/* We have enough compressed data in
1939				 * the cache buffer.*/
1940				return (1);
1941			default:
1942				break;
1943			}
1944		}
1945		if (strm->avail_in == 0) {
1946			/* There is not enough compressed data to fill up the
1947			 * cache buffer. */
1948			return (0);
1949		}
1950		br->cache_buffer =
1951		   (br->cache_buffer << 8) | *strm->next_in++;
1952		strm->avail_in--;
1953		br->cache_avail += 8;
1954		n -= 8;
1955	}
1956}
1957
1958/*
1959 * Decode LZHUF.
1960 *
1961 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1962 *    Please set available buffer and call this function again.
1963 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1964 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1965 *    is broken or you do not set 'last' flag properly.
1966 * 4. 'last' flag is very important, you must set 1 to the flag if there
1967 *    is no input data. The lha compressed data format does not provide how
1968 *    to know the compressed data is really finished.
1969 *    Note: lha command utility check if the total size of output bytes is
1970 *    reached the uncompressed size recorded in its header. it does not mind
1971 *    that the decoding process is properly finished.
1972 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1973 *    it handles EOF as null to fill read buffer with zero until the decoding
1974 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1975 *    zeros are treated as the mark of the end of the data although the zeros
1976 *    is dummy, not the file data.
1977 */
1978static int	lzh_read_blocks(struct lzh_stream *, int);
1979static int	lzh_decode_blocks(struct lzh_stream *, int);
1980#define ST_RD_BLOCK		0
1981#define ST_RD_PT_1		1
1982#define ST_RD_PT_2		2
1983#define ST_RD_PT_3		3
1984#define ST_RD_PT_4		4
1985#define ST_RD_LITERAL_1		5
1986#define ST_RD_LITERAL_2		6
1987#define ST_RD_LITERAL_3		7
1988#define ST_RD_POS_DATA_1	8
1989#define ST_GET_LITERAL		9
1990#define ST_GET_POS_1		10
1991#define ST_GET_POS_2		11
1992#define ST_COPY_DATA		12
1993
1994static int
1995lzh_decode(struct lzh_stream *strm, int last)
1996{
1997	struct lzh_dec *ds = strm->ds;
1998	int avail_in;
1999	int r;
2000
2001	if (ds->error)
2002		return (ds->error);
2003
2004	avail_in = strm->avail_in;
2005	do {
2006		if (ds->state < ST_GET_LITERAL)
2007			r = lzh_read_blocks(strm, last);
2008		else
2009			r = lzh_decode_blocks(strm, last);
2010	} while (r == 100);
2011	strm->total_in += avail_in - strm->avail_in;
2012	return (r);
2013}
2014
2015static void
2016lzh_emit_window(struct lzh_stream *strm, size_t s)
2017{
2018	strm->ref_ptr = strm->ds->w_buff;
2019	strm->avail_out = (int)s;
2020	strm->total_out += s;
2021}
2022
2023static int
2024lzh_read_blocks(struct lzh_stream *strm, int last)
2025{
2026	struct lzh_dec *ds = strm->ds;
2027	struct lzh_br *br = &(ds->br);
2028	int c = 0, i;
2029	unsigned rbits;
2030
2031	for (;;) {
2032		switch (ds->state) {
2033		case ST_RD_BLOCK:
2034			/*
2035			 * Read a block number indicates how many blocks
2036			 * we will handle. The block is composed of a
2037			 * literal and a match, sometimes a literal only
2038			 * in particular, there are no reference data at
2039			 * the beginning of the decompression.
2040			 */
2041			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2042				if (!last)
2043					/* We need following data. */
2044					return (ARCHIVE_OK);
2045				if (lzh_br_has(br, 8)) {
2046					/*
2047					 * It seems there are extra bits.
2048					 *  1. Compressed data is broken.
2049					 *  2. `last' flag does not properly
2050					 *     set.
2051					 */
2052					goto failed;
2053				}
2054				if (ds->w_pos > 0) {
2055					lzh_emit_window(strm, ds->w_pos);
2056					ds->w_pos = 0;
2057					return (ARCHIVE_OK);
2058				}
2059				/* End of compressed data; we have completely
2060				 * handled all compressed data. */
2061				return (ARCHIVE_EOF);
2062			}
2063			ds->blocks_avail = lzh_br_bits(br, 16);
2064			if (ds->blocks_avail == 0)
2065				goto failed;
2066			lzh_br_consume(br, 16);
2067			/*
2068			 * Read a literal table compressed in huffman
2069			 * coding.
2070			 */
2071			ds->pt.len_size = ds->literal_pt_len_size;
2072			ds->pt.len_bits = ds->literal_pt_len_bits;
2073			ds->reading_position = 0;
2074			/* FALL THROUGH */
2075		case ST_RD_PT_1:
2076			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2077			 * used in reading both a literal table and a
2078			 * position table. */
2079			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2080				if (last)
2081					goto failed;/* Truncated data. */
2082				ds->state = ST_RD_PT_1;
2083				return (ARCHIVE_OK);
2084			}
2085			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2086			lzh_br_consume(br, ds->pt.len_bits);
2087			/* FALL THROUGH */
2088		case ST_RD_PT_2:
2089			if (ds->pt.len_avail == 0) {
2090				/* There is no bitlen. */
2091				if (!lzh_br_read_ahead(strm, br,
2092				    ds->pt.len_bits)) {
2093					if (last)
2094						goto failed;/* Truncated data.*/
2095					ds->state = ST_RD_PT_2;
2096					return (ARCHIVE_OK);
2097				}
2098				if (!lzh_make_fake_table(&(ds->pt),
2099				    lzh_br_bits(br, ds->pt.len_bits)))
2100					goto failed;/* Invalid data. */
2101				lzh_br_consume(br, ds->pt.len_bits);
2102				if (ds->reading_position)
2103					ds->state = ST_GET_LITERAL;
2104				else
2105					ds->state = ST_RD_LITERAL_1;
2106				break;
2107			} else if (ds->pt.len_avail > ds->pt.len_size)
2108				goto failed;/* Invalid data. */
2109			ds->loop = 0;
2110			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2111			if (ds->pt.len_avail < 3 ||
2112			    ds->pt.len_size == ds->pos_pt_len_size) {
2113				ds->state = ST_RD_PT_4;
2114				break;
2115			}
2116			/* FALL THROUGH */
2117		case ST_RD_PT_3:
2118			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2119			if (ds->loop < 3) {
2120				if (ds->loop < 0 || last)
2121					goto failed;/* Invalid data. */
2122				/* Not completed, get following data. */
2123				ds->state = ST_RD_PT_3;
2124				return (ARCHIVE_OK);
2125			}
2126			/* There are some null in bitlen of the literal. */
2127			if (!lzh_br_read_ahead(strm, br, 2)) {
2128				if (last)
2129					goto failed;/* Truncated data. */
2130				ds->state = ST_RD_PT_3;
2131				return (ARCHIVE_OK);
2132			}
2133			c = lzh_br_bits(br, 2);
2134			lzh_br_consume(br, 2);
2135			if (c > ds->pt.len_avail - 3)
2136				goto failed;/* Invalid data. */
2137			for (i = 3; c-- > 0 ;)
2138				ds->pt.bitlen[i++] = 0;
2139			ds->loop = i;
2140			/* FALL THROUGH */
2141		case ST_RD_PT_4:
2142			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2143			    ds->pt.len_avail);
2144			if (ds->loop < ds->pt.len_avail) {
2145				if (ds->loop < 0 || last)
2146					goto failed;/* Invalid data. */
2147				/* Not completed, get following data. */
2148				ds->state = ST_RD_PT_4;
2149				return (ARCHIVE_OK);
2150			}
2151			if (!lzh_make_huffman_table(&(ds->pt)))
2152				goto failed;/* Invalid data */
2153			if (ds->reading_position) {
2154				ds->state = ST_GET_LITERAL;
2155				break;
2156			}
2157			/* FALL THROUGH */
2158		case ST_RD_LITERAL_1:
2159			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2160				if (last)
2161					goto failed;/* Truncated data. */
2162				ds->state = ST_RD_LITERAL_1;
2163				return (ARCHIVE_OK);
2164			}
2165			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2166			lzh_br_consume(br, ds->lt.len_bits);
2167			/* FALL THROUGH */
2168		case ST_RD_LITERAL_2:
2169			if (ds->lt.len_avail == 0) {
2170				/* There is no bitlen. */
2171				if (!lzh_br_read_ahead(strm, br,
2172				    ds->lt.len_bits)) {
2173					if (last)
2174						goto failed;/* Truncated data.*/
2175					ds->state = ST_RD_LITERAL_2;
2176					return (ARCHIVE_OK);
2177				}
2178				if (!lzh_make_fake_table(&(ds->lt),
2179				    lzh_br_bits(br, ds->lt.len_bits)))
2180					goto failed;/* Invalid data */
2181				lzh_br_consume(br, ds->lt.len_bits);
2182				ds->state = ST_RD_POS_DATA_1;
2183				break;
2184			} else if (ds->lt.len_avail > ds->lt.len_size)
2185				goto failed;/* Invalid data */
2186			ds->loop = 0;
2187			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2188			/* FALL THROUGH */
2189		case ST_RD_LITERAL_3:
2190			i = ds->loop;
2191			while (i < ds->lt.len_avail) {
2192				if (!lzh_br_read_ahead(strm, br,
2193				    ds->pt.max_bits)) {
2194					if (last)
2195						goto failed;/* Truncated data.*/
2196					ds->loop = i;
2197					ds->state = ST_RD_LITERAL_3;
2198					return (ARCHIVE_OK);
2199				}
2200				rbits = lzh_br_bits(br, ds->pt.max_bits);
2201				c = lzh_decode_huffman(&(ds->pt), rbits);
2202				if (c > 2) {
2203					/* Note: 'c' will never be more than
2204					 * eighteen since it's limited by
2205					 * PT_BITLEN_SIZE, which is being set
2206					 * to ds->pt.len_size through
2207					 * ds->literal_pt_len_size. */
2208					lzh_br_consume(br, ds->pt.bitlen[c]);
2209					c -= 2;
2210					ds->lt.freq[c]++;
2211					ds->lt.bitlen[i++] = c;
2212				} else if (c == 0) {
2213					lzh_br_consume(br, ds->pt.bitlen[c]);
2214					ds->lt.bitlen[i++] = 0;
2215				} else {
2216					/* c == 1 or c == 2 */
2217					int n = (c == 1)?4:9;
2218					if (!lzh_br_read_ahead(strm, br,
2219					     ds->pt.bitlen[c] + n)) {
2220						if (last) /* Truncated data. */
2221							goto failed;
2222						ds->loop = i;
2223						ds->state = ST_RD_LITERAL_3;
2224						return (ARCHIVE_OK);
2225					}
2226					lzh_br_consume(br, ds->pt.bitlen[c]);
2227					c = lzh_br_bits(br, n);
2228					lzh_br_consume(br, n);
2229					c += (n == 4)?3:20;
2230					if (i + c > ds->lt.len_avail)
2231						goto failed;/* Invalid data */
2232					memset(&(ds->lt.bitlen[i]), 0, c);
2233					i += c;
2234				}
2235			}
2236			if (i > ds->lt.len_avail ||
2237			    !lzh_make_huffman_table(&(ds->lt)))
2238				goto failed;/* Invalid data */
2239			/* FALL THROUGH */
2240		case ST_RD_POS_DATA_1:
2241			/*
2242			 * Read a position table compressed in huffman
2243			 * coding.
2244			 */
2245			ds->pt.len_size = ds->pos_pt_len_size;
2246			ds->pt.len_bits = ds->pos_pt_len_bits;
2247			ds->reading_position = 1;
2248			ds->state = ST_RD_PT_1;
2249			break;
2250		case ST_GET_LITERAL:
2251			return (100);
2252		}
2253	}
2254failed:
2255	return (ds->error = ARCHIVE_FAILED);
2256}
2257
2258static int
2259lzh_decode_blocks(struct lzh_stream *strm, int last)
2260{
2261	struct lzh_dec *ds = strm->ds;
2262	struct lzh_br bre = ds->br;
2263	struct huffman *lt = &(ds->lt);
2264	struct huffman *pt = &(ds->pt);
2265	unsigned char *w_buff = ds->w_buff;
2266	unsigned char *lt_bitlen = lt->bitlen;
2267	unsigned char *pt_bitlen = pt->bitlen;
2268	int blocks_avail = ds->blocks_avail, c = 0;
2269	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2270	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2271	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2272	int state = ds->state;
2273
2274	for (;;) {
2275		switch (state) {
2276		case ST_GET_LITERAL:
2277			for (;;) {
2278				if (blocks_avail == 0) {
2279					/* We have decoded all blocks.
2280					 * Let's handle next blocks. */
2281					ds->state = ST_RD_BLOCK;
2282					ds->br = bre;
2283					ds->blocks_avail = 0;
2284					ds->w_pos = w_pos;
2285					ds->copy_pos = 0;
2286					return (100);
2287				}
2288
2289				/* lzh_br_read_ahead() always try to fill the
2290				 * cache buffer up. In specific situation we
2291				 * are close to the end of the data, the cache
2292				 * buffer will not be full and thus we have to
2293				 * determine if the cache buffer has some bits
2294				 * as much as we need after lzh_br_read_ahead()
2295				 * failed. */
2296				if (!lzh_br_read_ahead(strm, &bre,
2297				    lt_max_bits)) {
2298					if (!last)
2299						goto next_data;
2300					/* Remaining bits are less than
2301					 * maximum bits(lt.max_bits) but maybe
2302					 * it still remains as much as we need,
2303					 * so we should try to use it with
2304					 * dummy bits. */
2305					c = lzh_decode_huffman(lt,
2306					      lzh_br_bits_forced(&bre,
2307					        lt_max_bits));
2308					lzh_br_consume(&bre, lt_bitlen[c]);
2309					if (!lzh_br_has(&bre, 0))
2310						goto failed;/* Over read. */
2311				} else {
2312					c = lzh_decode_huffman(lt,
2313					      lzh_br_bits(&bre, lt_max_bits));
2314					lzh_br_consume(&bre, lt_bitlen[c]);
2315				}
2316				blocks_avail--;
2317				if (c > UCHAR_MAX)
2318					/* Current block is a match data. */
2319					break;
2320				/*
2321				 * 'c' is exactly a literal code.
2322				 */
2323				/* Save a decoded code to reference it
2324				 * afterward. */
2325				w_buff[w_pos] = c;
2326				if (++w_pos >= w_size) {
2327					w_pos = 0;
2328					lzh_emit_window(strm, w_size);
2329					goto next_data;
2330				}
2331			}
2332			/* 'c' is the length of a match pattern we have
2333			 * already extracted, which has be stored in
2334			 * window(ds->w_buff). */
2335			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2336			/* FALL THROUGH */
2337		case ST_GET_POS_1:
2338			/*
2339			 * Get a reference position.
2340			 */
2341			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2342				if (!last) {
2343					state = ST_GET_POS_1;
2344					ds->copy_len = copy_len;
2345					goto next_data;
2346				}
2347				copy_pos = lzh_decode_huffman(pt,
2348				    lzh_br_bits_forced(&bre, pt_max_bits));
2349				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350				if (!lzh_br_has(&bre, 0))
2351					goto failed;/* Over read. */
2352			} else {
2353				copy_pos = lzh_decode_huffman(pt,
2354				    lzh_br_bits(&bre, pt_max_bits));
2355				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2356			}
2357			/* FALL THROUGH */
2358		case ST_GET_POS_2:
2359			if (copy_pos > 1) {
2360				/* We need an additional adjustment number to
2361				 * the position. */
2362				int p = copy_pos - 1;
2363				if (!lzh_br_read_ahead(strm, &bre, p)) {
2364					if (last)
2365						goto failed;/* Truncated data.*/
2366					state = ST_GET_POS_2;
2367					ds->copy_len = copy_len;
2368					ds->copy_pos = copy_pos;
2369					goto next_data;
2370				}
2371				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2372				lzh_br_consume(&bre, p);
2373			}
2374			/* The position is actually a distance from the last
2375			 * code we had extracted and thus we have to convert
2376			 * it to a position of the window. */
2377			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2378			/* FALL THROUGH */
2379		case ST_COPY_DATA:
2380			/*
2381			 * Copy `copy_len' bytes as extracted data from
2382			 * the window into the output buffer.
2383			 */
2384			for (;;) {
2385				int l;
2386
2387				l = copy_len;
2388				if (copy_pos > w_pos) {
2389					if (l > w_size - copy_pos)
2390						l = w_size - copy_pos;
2391				} else {
2392					if (l > w_size - w_pos)
2393						l = w_size - w_pos;
2394				}
2395				if ((copy_pos + l < w_pos)
2396				    || (w_pos + l < copy_pos)) {
2397					/* No overlap. */
2398					memcpy(w_buff + w_pos,
2399					    w_buff + copy_pos, l);
2400				} else {
2401					const unsigned char *s;
2402					unsigned char *d;
2403					int li;
2404
2405					d = w_buff + w_pos;
2406					s = w_buff + copy_pos;
2407					for (li = 0; li < l-1;) {
2408						d[li] = s[li];li++;
2409						d[li] = s[li];li++;
2410					}
2411					if (li < l)
2412						d[li] = s[li];
2413				}
2414				w_pos += l;
2415				if (w_pos == w_size) {
2416					w_pos = 0;
2417					lzh_emit_window(strm, w_size);
2418					if (copy_len <= l)
2419						state = ST_GET_LITERAL;
2420					else {
2421						state = ST_COPY_DATA;
2422						ds->copy_len = copy_len - l;
2423						ds->copy_pos =
2424						    (copy_pos + l) & w_mask;
2425					}
2426					goto next_data;
2427				}
2428				if (copy_len <= l)
2429					/* A copy of current pattern ended. */
2430					break;
2431				copy_len -= l;
2432				copy_pos = (copy_pos + l) & w_mask;
2433			}
2434			state = ST_GET_LITERAL;
2435			break;
2436		}
2437	}
2438failed:
2439	return (ds->error = ARCHIVE_FAILED);
2440next_data:
2441	ds->br = bre;
2442	ds->blocks_avail = blocks_avail;
2443	ds->state = state;
2444	ds->w_pos = w_pos;
2445	return (ARCHIVE_OK);
2446}
2447
2448static int
2449lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2450{
2451	int bits;
2452
2453	if (hf->bitlen == NULL) {
2454		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2455		if (hf->bitlen == NULL)
2456			return (ARCHIVE_FATAL);
2457	}
2458	if (hf->tbl == NULL) {
2459		if (tbl_bits < HTBL_BITS)
2460			bits = tbl_bits;
2461		else
2462			bits = HTBL_BITS;
2463		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2464		if (hf->tbl == NULL)
2465			return (ARCHIVE_FATAL);
2466	}
2467	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2468		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2469		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2470		if (hf->tree == NULL)
2471			return (ARCHIVE_FATAL);
2472	}
2473	hf->len_size = (int)len_size;
2474	hf->tbl_bits = tbl_bits;
2475	return (ARCHIVE_OK);
2476}
2477
2478static void
2479lzh_huffman_free(struct huffman *hf)
2480{
2481	free(hf->bitlen);
2482	free(hf->tbl);
2483	free(hf->tree);
2484}
2485
2486static const char bitlen_tbl[0x400] = {
2487	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2507	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2508	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2509	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2510	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2511	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2512	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2513	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2514	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2515	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2516	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2517	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2518	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2519	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2523	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2524	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2525	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2526	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2527	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2528	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2529	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2530	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2531	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2532	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2533	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2534	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2535	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2536	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2537	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2538	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2539	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2540	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2541	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2542	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2543	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2544	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2545	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2546	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2547	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2548	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2549	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2550	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2551};
2552static int
2553lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2554{
2555	struct lzh_dec *ds = strm->ds;
2556	struct lzh_br *br = &(ds->br);
2557	int c, i;
2558
2559	for (i = start; i < end; ) {
2560		/*
2561		 *  bit pattern     the number we need
2562		 *     000           ->  0
2563		 *     001           ->  1
2564		 *     010           ->  2
2565		 *     ...
2566		 *     110           ->  6
2567		 *     1110          ->  7
2568		 *     11110         ->  8
2569		 *     ...
2570		 *     1111111111110 ->  16
2571		 */
2572		if (!lzh_br_read_ahead(strm, br, 3))
2573			return (i);
2574		if ((c = lzh_br_bits(br, 3)) == 7) {
2575			if (!lzh_br_read_ahead(strm, br, 13))
2576				return (i);
2577			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2578			if (c)
2579				lzh_br_consume(br, c - 3);
2580			else
2581				return (-1);/* Invalid data. */
2582		} else
2583			lzh_br_consume(br, 3);
2584		ds->pt.bitlen[i++] = c;
2585		ds->pt.freq[c]++;
2586	}
2587	return (i);
2588}
2589
2590static int
2591lzh_make_fake_table(struct huffman *hf, uint16_t c)
2592{
2593	if (c >= hf->len_size)
2594		return (0);
2595	hf->tbl[0] = c;
2596	hf->max_bits = 0;
2597	hf->shift_bits = 0;
2598	hf->bitlen[hf->tbl[0]] = 0;
2599	return (1);
2600}
2601
2602/*
2603 * Make a huffman coding table.
2604 */
2605static int
2606lzh_make_huffman_table(struct huffman *hf)
2607{
2608	uint16_t *tbl;
2609	const unsigned char *bitlen;
2610	int bitptn[17], weight[17];
2611	int i, maxbits = 0, ptn, tbl_size, w;
2612	int diffbits, len_avail;
2613
2614	/*
2615	 * Initialize bit patterns.
2616	 */
2617	ptn = 0;
2618	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2619		bitptn[i] = ptn;
2620		weight[i] = w;
2621		if (hf->freq[i]) {
2622			ptn += hf->freq[i] * w;
2623			maxbits = i;
2624		}
2625	}
2626	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2627		return (0);/* Invalid */
2628
2629	hf->max_bits = maxbits;
2630
2631	/*
2632	 * Cut out extra bits which we won't house in the table.
2633	 * This preparation reduces the same calculation in the for-loop
2634	 * making the table.
2635	 */
2636	if (maxbits < 16) {
2637		int ebits = 16 - maxbits;
2638		for (i = 1; i <= maxbits; i++) {
2639			bitptn[i] >>= ebits;
2640			weight[i] >>= ebits;
2641		}
2642	}
2643	if (maxbits > HTBL_BITS) {
2644		unsigned htbl_max;
2645		uint16_t *p;
2646
2647		diffbits = maxbits - HTBL_BITS;
2648		for (i = 1; i <= HTBL_BITS; i++) {
2649			bitptn[i] >>= diffbits;
2650			weight[i] >>= diffbits;
2651		}
2652		htbl_max = bitptn[HTBL_BITS] +
2653		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2654		p = &(hf->tbl[htbl_max]);
2655		while (p < &hf->tbl[1U<<HTBL_BITS])
2656			*p++ = 0;
2657	} else
2658		diffbits = 0;
2659	hf->shift_bits = diffbits;
2660
2661	/*
2662	 * Make the table.
2663	 */
2664	tbl_size = 1 << HTBL_BITS;
2665	tbl = hf->tbl;
2666	bitlen = hf->bitlen;
2667	len_avail = hf->len_avail;
2668	hf->tree_used = 0;
2669	for (i = 0; i < len_avail; i++) {
2670		uint16_t *p;
2671		int len, cnt;
2672		uint16_t bit;
2673		int extlen;
2674		struct htree_t *ht;
2675
2676		if (bitlen[i] == 0)
2677			continue;
2678		/* Get a bit pattern */
2679		len = bitlen[i];
2680		ptn = bitptn[len];
2681		cnt = weight[len];
2682		if (len <= HTBL_BITS) {
2683			/* Calculate next bit pattern */
2684			if ((bitptn[len] = ptn + cnt) > tbl_size)
2685				return (0);/* Invalid */
2686			/* Update the table */
2687			p = &(tbl[ptn]);
2688			if (cnt > 7) {
2689				uint16_t *pc;
2690
2691				cnt -= 8;
2692				pc = &p[cnt];
2693				pc[0] = (uint16_t)i;
2694				pc[1] = (uint16_t)i;
2695				pc[2] = (uint16_t)i;
2696				pc[3] = (uint16_t)i;
2697				pc[4] = (uint16_t)i;
2698				pc[5] = (uint16_t)i;
2699				pc[6] = (uint16_t)i;
2700				pc[7] = (uint16_t)i;
2701				if (cnt > 7) {
2702					cnt -= 8;
2703					memcpy(&p[cnt], pc,
2704						8 * sizeof(uint16_t));
2705					pc = &p[cnt];
2706					while (cnt > 15) {
2707						cnt -= 16;
2708						memcpy(&p[cnt], pc,
2709							16 * sizeof(uint16_t));
2710					}
2711				}
2712				if (cnt)
2713					memcpy(p, pc, cnt * sizeof(uint16_t));
2714			} else {
2715				while (cnt > 1) {
2716					p[--cnt] = (uint16_t)i;
2717					p[--cnt] = (uint16_t)i;
2718				}
2719				if (cnt)
2720					p[--cnt] = (uint16_t)i;
2721			}
2722			continue;
2723		}
2724
2725		/*
2726		 * A bit length is too big to be housed to a direct table,
2727		 * so we use a tree model for its extra bits.
2728		 */
2729		bitptn[len] = ptn + cnt;
2730		bit = 1U << (diffbits -1);
2731		extlen = len - HTBL_BITS;
2732
2733		p = &(tbl[ptn >> diffbits]);
2734		if (*p == 0) {
2735			*p = len_avail + hf->tree_used;
2736			ht = &(hf->tree[hf->tree_used++]);
2737			if (hf->tree_used > hf->tree_avail)
2738				return (0);/* Invalid */
2739			ht->left = 0;
2740			ht->right = 0;
2741		} else {
2742			if (*p < len_avail ||
2743			    *p >= (len_avail + hf->tree_used))
2744				return (0);/* Invalid */
2745			ht = &(hf->tree[*p - len_avail]);
2746		}
2747		while (--extlen > 0) {
2748			if (ptn & bit) {
2749				if (ht->left < len_avail) {
2750					ht->left = len_avail + hf->tree_used;
2751					ht = &(hf->tree[hf->tree_used++]);
2752					if (hf->tree_used > hf->tree_avail)
2753						return (0);/* Invalid */
2754					ht->left = 0;
2755					ht->right = 0;
2756				} else {
2757					ht = &(hf->tree[ht->left - len_avail]);
2758				}
2759			} else {
2760				if (ht->right < len_avail) {
2761					ht->right = len_avail + hf->tree_used;
2762					ht = &(hf->tree[hf->tree_used++]);
2763					if (hf->tree_used > hf->tree_avail)
2764						return (0);/* Invalid */
2765					ht->left = 0;
2766					ht->right = 0;
2767				} else {
2768					ht = &(hf->tree[ht->right - len_avail]);
2769				}
2770			}
2771			bit >>= 1;
2772		}
2773		if (ptn & bit) {
2774			if (ht->left != 0)
2775				return (0);/* Invalid */
2776			ht->left = (uint16_t)i;
2777		} else {
2778			if (ht->right != 0)
2779				return (0);/* Invalid */
2780			ht->right = (uint16_t)i;
2781		}
2782	}
2783	return (1);
2784}
2785
2786static int
2787lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2788{
2789	struct htree_t *ht;
2790	int extlen;
2791
2792	ht = hf->tree;
2793	extlen = hf->shift_bits;
2794	while (c >= hf->len_avail) {
2795		c -= hf->len_avail;
2796		if (extlen-- <= 0 || c >= hf->tree_used)
2797			return (0);
2798		if (rbits & (1U << extlen))
2799			c = ht[c].left;
2800		else
2801			c = ht[c].right;
2802	}
2803	return (c);
2804}
2805
2806static inline int
2807lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2808{
2809	int c;
2810	/*
2811	 * At first search an index table for a bit pattern.
2812	 * If it fails, search a huffman tree for.
2813	 */
2814	c = hf->tbl[rbits >> hf->shift_bits];
2815	if (c < hf->len_avail || hf->len_avail == 0)
2816		return (c);
2817	/* This bit pattern needs to be found out at a huffman tree. */
2818	return (lzh_decode_huffman_tree(hf, rbits, c));
2819}
2820
2821