archive_read_support_format_lha.c revision 302001
1145519Sdarrenr/*-
2145510Sdarrenr * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3170268Sdarrenr * All rights reserved.
4255332Scy *
5255332Scy * Redistribution and use in source and binary forms, with or without
6255332Scy * modification, are permitted provided that the following conditions
7255332Scy * are met:
8255332Scy * 1. Redistributions of source code must retain the above copyright
9255332Scy *    notice, this list of conditions and the following disclaimer.
10170268Sdarrenr * 2. Redistributions in binary form must reproduce the above copyright
11145510Sdarrenr *    notice, this list of conditions and the following disclaimer in the
12145547Sdarrenr *    documentation and/or other materials provided with the distribution.
13145510Sdarrenr *
14145510Sdarrenr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15145510Sdarrenr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16255332Scy * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17255332Scy * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18255332Scy * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19145510Sdarrenr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20145510Sdarrenr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21145510Sdarrenr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22145510Sdarrenr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23145510Sdarrenr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24145510Sdarrenr */
25145510Sdarrenr
26145510Sdarrenr#include "archive_platform.h"
27145510Sdarrenr
28145510Sdarrenr#ifdef HAVE_ERRNO_H
29145510Sdarrenr#include <errno.h>
30145510Sdarrenr#endif
31145510Sdarrenr#ifdef HAVE_LIMITS_H
32145510Sdarrenr#include <limits.h>
33145510Sdarrenr#endif
34255332Scy#ifdef HAVE_STDLIB_H
35255332Scy#include <stdlib.h>
36145510Sdarrenr#endif
37145510Sdarrenr#ifdef HAVE_STRING_H
38145510Sdarrenr#include <string.h>
39145510Sdarrenr#endif
40145510Sdarrenr
41145510Sdarrenr#include "archive.h"
42145510Sdarrenr#include "archive_entry.h"
43145510Sdarrenr#include "archive_entry_locale.h"
44145510Sdarrenr#include "archive_private.h"
45147547Sdarrenr#include "archive_read_private.h"
46145510Sdarrenr#include "archive_endian.h"
47145510Sdarrenr
48145510Sdarrenr
49145510Sdarrenr#define MAXMATCH		256	/* Maximum match length. */
50255332Scy#define MINMATCH		3	/* Minimum match length. */
51255332Scy/*
52145510Sdarrenr * Literal table format:
53145510Sdarrenr * +0              +256                      +510
54145510Sdarrenr * +---------------+-------------------------+
55145510Sdarrenr * | literal code  |       match length      |
56145510Sdarrenr * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57145510Sdarrenr * +---------------+-------------------------+
58145510Sdarrenr *  <---          LT_BITLEN_SIZE         --->
59147547Sdarrenr */
60145510Sdarrenr/* Literal table size. */
61145510Sdarrenr#define LT_BITLEN_SIZE		(UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62255332Scy/* Position table size.
63145510Sdarrenr * Note: this used for both position table and pre literal table.*/
64145510Sdarrenr#define PT_BITLEN_SIZE		(3 + 16)
65147547Sdarrenr
66147547Sdarrenrstruct lzh_dec {
67145510Sdarrenr	/* Decoding status. */
68147547Sdarrenr	int     		 state;
69145510Sdarrenr
70145510Sdarrenr	/*
71145510Sdarrenr	 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72145510Sdarrenr	 * data.
73145510Sdarrenr	 */
74145510Sdarrenr	int			 w_size;
75	int			 w_mask;
76	/* Window buffer, which is a loop buffer. */
77	unsigned char		*w_buff;
78	/* The insert position to the window. */
79	int			 w_pos;
80	/* The position where we can copy decoded code from the window. */
81	int     		 copy_pos;
82	/* The length how many bytes we can copy decoded code from
83	 * the window. */
84	int     		 copy_len;
85
86	/*
87	 * Bit stream reader.
88	 */
89	struct lzh_br {
90#define CACHE_TYPE		uint64_t
91#define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
92	 	/* Cache buffer. */
93		CACHE_TYPE	 cache_buffer;
94		/* Indicates how many bits avail in cache_buffer. */
95		int		 cache_avail;
96	} br;
97
98	/*
99	 * Huffman coding.
100	 */
101	struct huffman {
102		int		 len_size;
103		int		 len_avail;
104		int		 len_bits;
105		int		 freq[17];
106		unsigned char	*bitlen;
107
108		/*
109		 * Use a index table. It's faster than searching a huffman
110		 * coding tree, which is a binary tree. But a use of a large
111		 * index table causes L1 cache read miss many times.
112		 */
113#define HTBL_BITS	10
114		int		 max_bits;
115		int		 shift_bits;
116		int		 tbl_bits;
117		int		 tree_used;
118		int		 tree_avail;
119		/* Direct access table. */
120		uint16_t	*tbl;
121		/* Binary tree table for extra bits over the direct access. */
122		struct htree_t {
123			uint16_t left;
124			uint16_t right;
125		}		*tree;
126	}			 lt, pt;
127
128	int			 blocks_avail;
129	int			 pos_pt_len_size;
130	int			 pos_pt_len_bits;
131	int			 literal_pt_len_size;
132	int			 literal_pt_len_bits;
133	int			 reading_position;
134	int			 loop;
135	int			 error;
136};
137
138struct lzh_stream {
139	const unsigned char	*next_in;
140	int			 avail_in;
141	int64_t			 total_in;
142	const unsigned char	*ref_ptr;
143	int			 avail_out;
144	int64_t			 total_out;
145	struct lzh_dec		*ds;
146};
147
148struct lha {
149	/* entry_bytes_remaining is the number of bytes we expect.	    */
150	int64_t                  entry_offset;
151	int64_t                  entry_bytes_remaining;
152	int64_t			 entry_unconsumed;
153	uint16_t		 entry_crc_calculated;
154
155	size_t			 header_size;	/* header size		    */
156	unsigned char		 level;		/* header level		    */
157	char			 method[3];	/* compress type	    */
158	int64_t			 compsize;	/* compressed data size	    */
159	int64_t			 origsize;	/* original file size	    */
160	int			 setflag;
161#define BIRTHTIME_IS_SET	1
162#define ATIME_IS_SET		2
163#define UNIX_MODE_IS_SET	4
164#define CRC_IS_SET		8
165	time_t			 birthtime;
166	long			 birthtime_tv_nsec;
167	time_t			 mtime;
168	long			 mtime_tv_nsec;
169	time_t			 atime;
170	long			 atime_tv_nsec;
171	mode_t			 mode;
172	int64_t			 uid;
173	int64_t			 gid;
174	struct archive_string 	 uname;
175	struct archive_string 	 gname;
176	uint16_t		 header_crc;
177	uint16_t		 crc;
178	struct archive_string_conv *sconv;
179	struct archive_string_conv *opt_sconv;
180
181	struct archive_string 	 dirname;
182	struct archive_string 	 filename;
183	struct archive_wstring	 ws;
184
185	unsigned char		 dos_attr;
186
187	/* Flag to mark progress that an archive was read their first header.*/
188	char			 found_first_header;
189	/* Flag to mark that indicates an empty directory. */
190	char			 directory;
191
192	/* Flags to mark progress of decompression. */
193	char			 decompress_init;
194	char			 end_of_entry;
195	char			 end_of_entry_cleanup;
196	char			 entry_is_compressed;
197
198	char			 format_name[64];
199
200	struct lzh_stream	 strm;
201};
202
203/*
204 * LHA header common member offset.
205 */
206#define H_METHOD_OFFSET	2	/* Compress type. */
207#define H_ATTR_OFFSET	19	/* DOS attribute. */
208#define H_LEVEL_OFFSET	20	/* Header Level.  */
209#define H_SIZE		22	/* Minimum header size. */
210
211static int      archive_read_format_lha_bid(struct archive_read *, int);
212static int      archive_read_format_lha_options(struct archive_read *,
213		    const char *, const char *);
214static int	archive_read_format_lha_read_header(struct archive_read *,
215		    struct archive_entry *);
216static int	archive_read_format_lha_read_data(struct archive_read *,
217		    const void **, size_t *, int64_t *);
218static int	archive_read_format_lha_read_data_skip(struct archive_read *);
219static int	archive_read_format_lha_cleanup(struct archive_read *);
220
221static void	lha_replace_path_separator(struct lha *,
222		    struct archive_entry *);
223static int	lha_read_file_header_0(struct archive_read *, struct lha *);
224static int	lha_read_file_header_1(struct archive_read *, struct lha *);
225static int	lha_read_file_header_2(struct archive_read *, struct lha *);
226static int	lha_read_file_header_3(struct archive_read *, struct lha *);
227static int	lha_read_file_extended_header(struct archive_read *,
228		    struct lha *, uint16_t *, int, size_t, size_t *);
229static size_t	lha_check_header_format(const void *);
230static int	lha_skip_sfx(struct archive_read *);
231static time_t	lha_dos_time(const unsigned char *);
232static time_t	lha_win_time(uint64_t, long *);
233static unsigned char	lha_calcsum(unsigned char, const void *,
234		    int, size_t);
235static int	lha_parse_linkname(struct archive_string *,
236		    struct archive_string *);
237static int	lha_read_data_none(struct archive_read *, const void **,
238		    size_t *, int64_t *);
239static int	lha_read_data_lzh(struct archive_read *, const void **,
240		    size_t *, int64_t *);
241static void	lha_crc16_init(void);
242static uint16_t lha_crc16(uint16_t, const void *, size_t);
243static int	lzh_decode_init(struct lzh_stream *, const char *);
244static void	lzh_decode_free(struct lzh_stream *);
245static int	lzh_decode(struct lzh_stream *, int);
246static int	lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247static int	lzh_huffman_init(struct huffman *, size_t, int);
248static void	lzh_huffman_free(struct huffman *);
249static int	lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250static int	lzh_make_fake_table(struct huffman *, uint16_t);
251static int	lzh_make_huffman_table(struct huffman *);
252static inline int lzh_decode_huffman(struct huffman *, unsigned);
253static int	lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256int
257archive_read_support_format_lha(struct archive *_a)
258{
259	struct archive_read *a = (struct archive_read *)_a;
260	struct lha *lha;
261	int r;
262
263	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264	    ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266	lha = (struct lha *)calloc(1, sizeof(*lha));
267	if (lha == NULL) {
268		archive_set_error(&a->archive, ENOMEM,
269		    "Can't allocate lha data");
270		return (ARCHIVE_FATAL);
271	}
272	archive_string_init(&lha->ws);
273
274	r = __archive_read_register_format(a,
275	    lha,
276	    "lha",
277	    archive_read_format_lha_bid,
278	    archive_read_format_lha_options,
279	    archive_read_format_lha_read_header,
280	    archive_read_format_lha_read_data,
281	    archive_read_format_lha_read_data_skip,
282	    NULL,
283	    archive_read_format_lha_cleanup,
284	    NULL,
285	    NULL);
286
287	if (r != ARCHIVE_OK)
288		free(lha);
289	return (ARCHIVE_OK);
290}
291
292static size_t
293lha_check_header_format(const void *h)
294{
295	const unsigned char *p = h;
296	size_t next_skip_bytes;
297
298	switch (p[H_METHOD_OFFSET+3]) {
299	/*
300	 * "-lh0-" ... "-lh7-" "-lhd-"
301	 * "-lzs-" "-lz5-"
302	 */
303	case '0': case '1': case '2': case '3':
304	case '4': case '5': case '6': case '7':
305	case 'd':
306	case 's':
307		next_skip_bytes = 4;
308
309		/* b0 == 0 means the end of an LHa archive file.	*/
310		if (p[0] == 0)
311			break;
312		if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313		    ||  p[H_METHOD_OFFSET+4] != '-')
314			break;
315
316		if (p[H_METHOD_OFFSET+2] == 'h') {
317			/* "-lh?-" */
318			if (p[H_METHOD_OFFSET+3] == 's')
319				break;
320			if (p[H_LEVEL_OFFSET] == 0)
321				return (0);
322			if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323				return (0);
324		}
325		if (p[H_METHOD_OFFSET+2] == 'z') {
326			/* LArc extensions: -lzs-,-lz4- and -lz5- */
327			if (p[H_LEVEL_OFFSET] != 0)
328				break;
329			if (p[H_METHOD_OFFSET+3] == 's'
330			    || p[H_METHOD_OFFSET+3] == '4'
331			    || p[H_METHOD_OFFSET+3] == '5')
332				return (0);
333		}
334		break;
335	case 'h': next_skip_bytes = 1; break;
336	case 'z': next_skip_bytes = 1; break;
337	case 'l': next_skip_bytes = 2; break;
338	case '-': next_skip_bytes = 3; break;
339	default : next_skip_bytes = 4; break;
340	}
341
342	return (next_skip_bytes);
343}
344
345static int
346archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347{
348	const char *p;
349	const void *buff;
350	ssize_t bytes_avail, offset, window;
351	size_t next;
352
353	/* If there's already a better bid than we can ever
354	   make, don't bother testing. */
355	if (best_bid > 30)
356		return (-1);
357
358	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359		return (-1);
360
361	if (lha_check_header_format(p) == 0)
362		return (30);
363
364	if (p[0] == 'M' && p[1] == 'Z') {
365		/* PE file */
366		offset = 0;
367		window = 4096;
368		while (offset < (1024 * 20)) {
369			buff = __archive_read_ahead(a, offset + window,
370			    &bytes_avail);
371			if (buff == NULL) {
372				/* Remaining bytes are less than window. */
373				window >>= 1;
374				if (window < (H_SIZE + 3))
375					return (0);
376				continue;
377			}
378			p = (const char *)buff + offset;
379			while (p + H_SIZE < (const char *)buff + bytes_avail) {
380				if ((next = lha_check_header_format(p)) == 0)
381					return (30);
382				p += next;
383			}
384			offset = p - (const char *)buff;
385		}
386	}
387	return (0);
388}
389
390static int
391archive_read_format_lha_options(struct archive_read *a,
392    const char *key, const char *val)
393{
394	struct lha *lha;
395	int ret = ARCHIVE_FAILED;
396
397	lha = (struct lha *)(a->format->data);
398	if (strcmp(key, "hdrcharset")  == 0) {
399		if (val == NULL || val[0] == 0)
400			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401			    "lha: hdrcharset option needs a character-set name");
402		else {
403			lha->opt_sconv =
404			    archive_string_conversion_from_charset(
405				&a->archive, val, 0);
406			if (lha->opt_sconv != NULL)
407				ret = ARCHIVE_OK;
408			else
409				ret = ARCHIVE_FATAL;
410		}
411		return (ret);
412	}
413
414	/* Note: The "warn" return is just to inform the options
415	 * supervisor that we didn't handle it.  It will generate
416	 * a suitable error if no one used this option. */
417	return (ARCHIVE_WARN);
418}
419
420static int
421lha_skip_sfx(struct archive_read *a)
422{
423	const void *h;
424	const char *p, *q;
425	size_t next, skip;
426	ssize_t bytes, window;
427
428	window = 4096;
429	for (;;) {
430		h = __archive_read_ahead(a, window, &bytes);
431		if (h == NULL) {
432			/* Remaining bytes are less than window. */
433			window >>= 1;
434			if (window < (H_SIZE + 3))
435				goto fatal;
436			continue;
437		}
438		if (bytes < H_SIZE)
439			goto fatal;
440		p = h;
441		q = p + bytes;
442
443		/*
444		 * Scan ahead until we find something that looks
445		 * like the lha header.
446		 */
447		while (p + H_SIZE < q) {
448			if ((next = lha_check_header_format(p)) == 0) {
449				skip = p - (const char *)h;
450				__archive_read_consume(a, skip);
451				return (ARCHIVE_OK);
452			}
453			p += next;
454		}
455		skip = p - (const char *)h;
456		__archive_read_consume(a, skip);
457	}
458fatal:
459	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460	    "Couldn't find out LHa header");
461	return (ARCHIVE_FATAL);
462}
463
464static int
465truncated_error(struct archive_read *a)
466{
467	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468	    "Truncated LHa header");
469	return (ARCHIVE_FATAL);
470}
471
472static int
473archive_read_format_lha_read_header(struct archive_read *a,
474    struct archive_entry *entry)
475{
476	struct archive_string linkname;
477	struct archive_string pathname;
478	struct lha *lha;
479	const unsigned char *p;
480	const char *signature;
481	int err;
482
483	lha_crc16_init();
484
485	a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486	if (a->archive.archive_format_name == NULL)
487		a->archive.archive_format_name = "lha";
488
489	lha = (struct lha *)(a->format->data);
490	lha->decompress_init = 0;
491	lha->end_of_entry = 0;
492	lha->end_of_entry_cleanup = 0;
493	lha->entry_unconsumed = 0;
494
495	if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496		/*
497		 * LHa archiver added 0 to the tail of its archive file as
498		 * the mark of the end of the archive.
499		 */
500		signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501		if (signature == NULL || signature[0] == 0)
502			return (ARCHIVE_EOF);
503		return (truncated_error(a));
504	}
505
506	signature = (const char *)p;
507	if (lha->found_first_header == 0 &&
508	    signature[0] == 'M' && signature[1] == 'Z') {
509                /* This is an executable?  Must be self-extracting... 	*/
510		err = lha_skip_sfx(a);
511		if (err < ARCHIVE_WARN)
512			return (err);
513
514		if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515			return (truncated_error(a));
516		signature = (const char *)p;
517	}
518	/* signature[0] == 0 means the end of an LHa archive file. */
519	if (signature[0] == 0)
520		return (ARCHIVE_EOF);
521
522	/*
523	 * Check the header format and method type.
524	 */
525	if (lha_check_header_format(p) != 0) {
526		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527		    "Bad LHa file");
528		return (ARCHIVE_FATAL);
529	}
530
531	/* We've found the first header. */
532	lha->found_first_header = 1;
533	/* Set a default value and common data */
534	lha->header_size = 0;
535	lha->level = p[H_LEVEL_OFFSET];
536	lha->method[0] = p[H_METHOD_OFFSET+1];
537	lha->method[1] = p[H_METHOD_OFFSET+2];
538	lha->method[2] = p[H_METHOD_OFFSET+3];
539	if (memcmp(lha->method, "lhd", 3) == 0)
540		lha->directory = 1;
541	else
542		lha->directory = 0;
543	if (memcmp(lha->method, "lh0", 3) == 0 ||
544	    memcmp(lha->method, "lz4", 3) == 0)
545		lha->entry_is_compressed = 0;
546	else
547		lha->entry_is_compressed = 1;
548
549	lha->compsize = 0;
550	lha->origsize = 0;
551	lha->setflag = 0;
552	lha->birthtime = 0;
553	lha->birthtime_tv_nsec = 0;
554	lha->mtime = 0;
555	lha->mtime_tv_nsec = 0;
556	lha->atime = 0;
557	lha->atime_tv_nsec = 0;
558	lha->mode = (lha->directory)? 0777 : 0666;
559	lha->uid = 0;
560	lha->gid = 0;
561	archive_string_empty(&lha->dirname);
562	archive_string_empty(&lha->filename);
563	lha->dos_attr = 0;
564	if (lha->opt_sconv != NULL)
565		lha->sconv = lha->opt_sconv;
566	else
567		lha->sconv = NULL;
568
569	switch (p[H_LEVEL_OFFSET]) {
570	case 0:
571		err = lha_read_file_header_0(a, lha);
572		break;
573	case 1:
574		err = lha_read_file_header_1(a, lha);
575		break;
576	case 2:
577		err = lha_read_file_header_2(a, lha);
578		break;
579	case 3:
580		err = lha_read_file_header_3(a, lha);
581		break;
582	default:
583		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584		    "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585		err = ARCHIVE_FATAL;
586		break;
587	}
588	if (err < ARCHIVE_WARN)
589		return (err);
590
591
592	if (!lha->directory && archive_strlen(&lha->filename) == 0)
593		/* The filename has not been set */
594		return (truncated_error(a));
595
596	/*
597	 * Make a pathname from a dirname and a filename.
598	 */
599	archive_string_concat(&lha->dirname, &lha->filename);
600	archive_string_init(&pathname);
601	archive_string_init(&linkname);
602	archive_string_copy(&pathname, &lha->dirname);
603
604	if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605		/*
606	 	 * Extract the symlink-name if it's included in the pathname.
607	 	 */
608		if (!lha_parse_linkname(&linkname, &pathname)) {
609			/* We couldn't get the symlink-name. */
610			archive_set_error(&a->archive,
611		    	    ARCHIVE_ERRNO_FILE_FORMAT,
612			    "Unknown symlink-name");
613			archive_string_free(&pathname);
614			archive_string_free(&linkname);
615			return (ARCHIVE_FAILED);
616		}
617	} else {
618		/*
619		 * Make sure a file-type is set.
620		 * The mode has been overridden if it is in the extended data.
621		 */
622		lha->mode = (lha->mode & ~AE_IFMT) |
623		    ((lha->directory)? AE_IFDIR: AE_IFREG);
624	}
625	if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626	    (lha->dos_attr & 1) != 0)
627		lha->mode &= ~(0222);/* read only. */
628
629	/*
630	 * Set basic file parameters.
631	 */
632	if (archive_entry_copy_pathname_l(entry, pathname.s,
633	    pathname.length, lha->sconv) != 0) {
634		if (errno == ENOMEM) {
635			archive_set_error(&a->archive, ENOMEM,
636			    "Can't allocate memory for Pathname");
637			return (ARCHIVE_FATAL);
638		}
639		archive_set_error(&a->archive,
640		    ARCHIVE_ERRNO_FILE_FORMAT,
641		    "Pathname cannot be converted "
642		    "from %s to current locale.",
643		    archive_string_conversion_charset_name(lha->sconv));
644		err = ARCHIVE_WARN;
645	}
646	archive_string_free(&pathname);
647	if (archive_strlen(&linkname) > 0) {
648		if (archive_entry_copy_symlink_l(entry, linkname.s,
649		    linkname.length, lha->sconv) != 0) {
650			if (errno == ENOMEM) {
651				archive_set_error(&a->archive, ENOMEM,
652				    "Can't allocate memory for Linkname");
653				return (ARCHIVE_FATAL);
654			}
655			archive_set_error(&a->archive,
656			    ARCHIVE_ERRNO_FILE_FORMAT,
657			    "Linkname cannot be converted "
658			    "from %s to current locale.",
659			    archive_string_conversion_charset_name(lha->sconv));
660			err = ARCHIVE_WARN;
661		}
662	} else
663		archive_entry_set_symlink(entry, NULL);
664	archive_string_free(&linkname);
665	/*
666	 * When a header level is 0, there is a possibility that
667	 * a pathname and a symlink has '\' character, a directory
668	 * separator in DOS/Windows. So we should convert it to '/'.
669	 */
670	if (p[H_LEVEL_OFFSET] == 0)
671		lha_replace_path_separator(lha, entry);
672
673	archive_entry_set_mode(entry, lha->mode);
674	archive_entry_set_uid(entry, lha->uid);
675	archive_entry_set_gid(entry, lha->gid);
676	if (archive_strlen(&lha->uname) > 0)
677		archive_entry_set_uname(entry, lha->uname.s);
678	if (archive_strlen(&lha->gname) > 0)
679		archive_entry_set_gname(entry, lha->gname.s);
680	if (lha->setflag & BIRTHTIME_IS_SET) {
681		archive_entry_set_birthtime(entry, lha->birthtime,
682		    lha->birthtime_tv_nsec);
683		archive_entry_set_ctime(entry, lha->birthtime,
684		    lha->birthtime_tv_nsec);
685	} else {
686		archive_entry_unset_birthtime(entry);
687		archive_entry_unset_ctime(entry);
688	}
689	archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690	if (lha->setflag & ATIME_IS_SET)
691		archive_entry_set_atime(entry, lha->atime,
692		    lha->atime_tv_nsec);
693	else
694		archive_entry_unset_atime(entry);
695	if (lha->directory || archive_entry_symlink(entry) != NULL)
696		archive_entry_unset_size(entry);
697	else
698		archive_entry_set_size(entry, lha->origsize);
699
700	/*
701	 * Prepare variables used to read a file content.
702	 */
703	lha->entry_bytes_remaining = lha->compsize;
704	lha->entry_offset = 0;
705	lha->entry_crc_calculated = 0;
706
707	/*
708	 * This file does not have a content.
709	 */
710	if (lha->directory || lha->compsize == 0)
711		lha->end_of_entry = 1;
712
713	sprintf(lha->format_name, "lha -%c%c%c-",
714	    lha->method[0], lha->method[1], lha->method[2]);
715	a->archive.archive_format_name = lha->format_name;
716
717	return (err);
718}
719
720/*
721 * Replace a DOS path separator '\' by a character '/'.
722 * Some multi-byte character set have  a character '\' in its second byte.
723 */
724static void
725lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726{
727	const wchar_t *wp;
728	size_t i;
729
730	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731		archive_wstrcpy(&(lha->ws), wp);
732		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733			if (lha->ws.s[i] == L'\\')
734				lha->ws.s[i] = L'/';
735		}
736		archive_entry_copy_pathname_w(entry, lha->ws.s);
737	}
738
739	if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740		archive_wstrcpy(&(lha->ws), wp);
741		for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742			if (lha->ws.s[i] == L'\\')
743				lha->ws.s[i] = L'/';
744		}
745		archive_entry_copy_symlink_w(entry, lha->ws.s);
746	}
747}
748
749/*
750 * Header 0 format
751 *
752 * +0              +1         +2               +7                  +11
753 * +---------------+----------+----------------+-------------------+
754 * |header size(*1)|header sum|compression type|compressed size(*2)|
755 * +---------------+----------+----------------+-------------------+
756 *                             <---------------------(*1)----------*
757 *
758 * +11               +15       +17       +19            +20              +21
759 * +-----------------+---------+---------+--------------+----------------+
760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761 * +-----------------+---------+---------+--------------+----------------+
762 * *--------------------------------(*1)---------------------------------*
763 *
764 * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
765 * +---------------+---------+----------+----------------+------------------+
766 * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
767 * +---------------+---------+----------+----------------+------------------+
768 *                  <--(*3)->                             <------(*2)------>
769 * *----------------------(*1)-------------------------->
770 *
771 */
772#define H0_HEADER_SIZE_OFFSET	0
773#define H0_HEADER_SUM_OFFSET	1
774#define H0_COMP_SIZE_OFFSET	7
775#define H0_ORIG_SIZE_OFFSET	11
776#define H0_DOS_TIME_OFFSET	15
777#define H0_NAME_LEN_OFFSET	21
778#define H0_FILE_NAME_OFFSET	22
779#define H0_FIXED_SIZE		24
780static int
781lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782{
783	const unsigned char *p;
784	int extdsize, namelen;
785	unsigned char headersum, sum_calculated;
786
787	if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788		return (truncated_error(a));
789	lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790	headersum = p[H0_HEADER_SUM_OFFSET];
791	lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792	lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793	lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794	namelen = p[H0_NAME_LEN_OFFSET];
795	extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796	if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798		    "Invalid LHa header");
799		return (ARCHIVE_FATAL);
800	}
801	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802		return (truncated_error(a));
803
804	archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805	/* When extdsize == -2, A CRC16 value is not present in the header. */
806	if (extdsize >= 0) {
807		lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808		lha->setflag |= CRC_IS_SET;
809	}
810	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812	/* Read an extended header */
813	if (extdsize > 0) {
814		/* This extended data is set by 'LHa for UNIX' only.
815		 * Maybe fixed size.
816		 */
817		p += H0_FILE_NAME_OFFSET + namelen + 2;
818		if (p[0] == 'U' && extdsize == 12) {
819			/* p[1] is a minor version. */
820			lha->mtime = archive_le32dec(&p[2]);
821			lha->mode = archive_le16dec(&p[6]);
822			lha->uid = archive_le16dec(&p[8]);
823			lha->gid = archive_le16dec(&p[10]);
824			lha->setflag |= UNIX_MODE_IS_SET;
825		}
826	}
827	__archive_read_consume(a, lha->header_size);
828
829	if (sum_calculated != headersum) {
830		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831		    "LHa header sum error");
832		return (ARCHIVE_FATAL);
833	}
834
835	return (ARCHIVE_OK);
836}
837
838/*
839 * Header 1 format
840 *
841 * +0              +1         +2               +7            +11
842 * +---------------+----------+----------------+-------------+
843 * |header size(*1)|header sum|compression type|skip size(*2)|
844 * +---------------+----------+----------------+-------------+
845 *                             <---------------(*1)----------*
846 *
847 * +11               +15       +17       +19            +20              +21
848 * +-----------------+---------+---------+--------------+----------------+
849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850 * +-----------------+---------+---------+--------------+----------------+
851 * *-------------------------------(*1)----------------------------------*
852 *
853 * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
854 * +---------------+---------+----------+-----------+-----------+
855 * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
856 * +---------------+---------+----------+-----------+-----------+
857 *                  <--(*3)->
858 * *----------------------------(*1)----------------------------*
859 *
860 * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
861 * +----------------+---------------------+------------------------+
862 * |next header size| extended header(*5) |     compressed data    |
863 * +----------------+---------------------+------------------------+
864 * *------(*1)-----> <--------------------(*2)-------------------->
865 */
866#define H1_HEADER_SIZE_OFFSET	0
867#define H1_HEADER_SUM_OFFSET	1
868#define H1_COMP_SIZE_OFFSET	7
869#define H1_ORIG_SIZE_OFFSET	11
870#define H1_DOS_TIME_OFFSET	15
871#define H1_NAME_LEN_OFFSET	21
872#define H1_FILE_NAME_OFFSET	22
873#define H1_FIXED_SIZE		27
874static int
875lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876{
877	const unsigned char *p;
878	size_t extdsize;
879	int i, err, err2;
880	int namelen, padding;
881	unsigned char headersum, sum_calculated;
882
883	err = ARCHIVE_OK;
884
885	if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886		return (truncated_error(a));
887
888	lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889	headersum = p[H1_HEADER_SUM_OFFSET];
890	/* Note: An extended header size is included in a compsize. */
891	lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892	lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893	lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894	namelen = p[H1_NAME_LEN_OFFSET];
895	/* Calculate a padding size. The result will be normally 0 only(?) */
896	padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898	if (namelen > 230 || padding < 0)
899		goto invalid;
900
901	if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902		return (truncated_error(a));
903
904	for (i = 0; i < namelen; i++) {
905		if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906			goto invalid;/* Invalid filename. */
907	}
908	archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909	lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910	lha->setflag |= CRC_IS_SET;
911
912	sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913	/* Consume used bytes but not include `next header size' data
914	 * since it will be consumed in lha_read_file_extended_header(). */
915	__archive_read_consume(a, lha->header_size - 2);
916
917	/* Read extended headers */
918	err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919	    (size_t)(lha->compsize + 2), &extdsize);
920	if (err2 < ARCHIVE_WARN)
921		return (err2);
922	if (err2 < err)
923		err = err2;
924	/* Get a real compressed file size. */
925	lha->compsize -= extdsize - 2;
926
927	if (sum_calculated != headersum) {
928		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
929		    "LHa header sum error");
930		return (ARCHIVE_FATAL);
931	}
932	return (err);
933invalid:
934	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
935	    "Invalid LHa header");
936	return (ARCHIVE_FATAL);
937}
938
939/*
940 * Header 2 format
941 *
942 * +0              +2               +7                  +11               +15
943 * +---------------+----------------+-------------------+-----------------+
944 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
945 * +---------------+----------------+-------------------+-----------------+
946 *  <--------------------------------(*1)---------------------------------*
947 *
948 * +15               +19          +20              +21        +23         +24
949 * +-----------------+------------+----------------+----------+-----------+
950 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
951 * +-----------------+------------+----------------+----------+-----------+
952 * *---------------------------------(*1)---------------------------------*
953 *
954 * +24              +26                 +26+(*3)      +26+(*3)+(*4)
955 * +----------------+-------------------+-------------+-------------------+
956 * |next header size|extended header(*3)| padding(*4) |  compressed data  |
957 * +----------------+-------------------+-------------+-------------------+
958 * *--------------------------(*1)-------------------> <------(*2)------->
959 *
960 */
961#define H2_HEADER_SIZE_OFFSET	0
962#define H2_COMP_SIZE_OFFSET	7
963#define H2_ORIG_SIZE_OFFSET	11
964#define H2_TIME_OFFSET		15
965#define H2_CRC_OFFSET		21
966#define H2_FIXED_SIZE		24
967static int
968lha_read_file_header_2(struct archive_read *a, struct lha *lha)
969{
970	const unsigned char *p;
971	size_t extdsize;
972	int err, padding;
973	uint16_t header_crc;
974
975	if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
976		return (truncated_error(a));
977
978	lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
979	lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
980	lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
981	lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
982	lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
983	lha->setflag |= CRC_IS_SET;
984
985	if (lha->header_size < H2_FIXED_SIZE) {
986		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
987		    "Invalid LHa header size");
988		return (ARCHIVE_FATAL);
989	}
990
991	header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
992	__archive_read_consume(a, H2_FIXED_SIZE);
993
994	/* Read extended headers */
995	err = lha_read_file_extended_header(a, lha, &header_crc, 2,
996		  lha->header_size - H2_FIXED_SIZE, &extdsize);
997	if (err < ARCHIVE_WARN)
998		return (err);
999
1000	/* Calculate a padding size. The result will be normally 0 or 1. */
1001	padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1002	if (padding > 0) {
1003		if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1004			return (truncated_error(a));
1005		header_crc = lha_crc16(header_crc, p, padding);
1006		__archive_read_consume(a, padding);
1007	}
1008
1009	if (header_crc != lha->header_crc) {
1010		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1011		    "LHa header CRC error");
1012		return (ARCHIVE_FATAL);
1013	}
1014	return (err);
1015}
1016
1017/*
1018 * Header 3 format
1019 *
1020 * +0           +2               +7                  +11               +15
1021 * +------------+----------------+-------------------+-----------------+
1022 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1023 * +------------+----------------+-------------------+-----------------+
1024 *  <-------------------------------(*1)-------------------------------*
1025 *
1026 * +15               +19          +20              +21        +23         +24
1027 * +-----------------+------------+----------------+----------+-----------+
1028 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1029 * +-----------------+------------+----------------+----------+-----------+
1030 * *--------------------------------(*1)----------------------------------*
1031 *
1032 * +24             +28              +32                 +32+(*3)
1033 * +---------------+----------------+-------------------+-----------------+
1034 * |header size(*1)|next header size|extended header(*3)| compressed data |
1035 * +---------------+----------------+-------------------+-----------------+
1036 * *------------------------(*1)-----------------------> <------(*2)----->
1037 *
1038 */
1039#define H3_FIELD_LEN_OFFSET	0
1040#define H3_COMP_SIZE_OFFSET	7
1041#define H3_ORIG_SIZE_OFFSET	11
1042#define H3_TIME_OFFSET		15
1043#define H3_CRC_OFFSET		21
1044#define H3_HEADER_SIZE_OFFSET	24
1045#define H3_FIXED_SIZE		28
1046static int
1047lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1048{
1049	const unsigned char *p;
1050	size_t extdsize;
1051	int err;
1052	uint16_t header_crc;
1053
1054	if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1055		return (truncated_error(a));
1056
1057	if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1058		goto invalid;
1059	lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1060	lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1061	lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1062	lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1063	lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1064	lha->setflag |= CRC_IS_SET;
1065
1066	if (lha->header_size < H3_FIXED_SIZE + 4)
1067		goto invalid;
1068	header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1069	__archive_read_consume(a, H3_FIXED_SIZE);
1070
1071	/* Read extended headers */
1072	err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1073		  lha->header_size - H3_FIXED_SIZE, &extdsize);
1074	if (err < ARCHIVE_WARN)
1075		return (err);
1076
1077	if (header_crc != lha->header_crc) {
1078		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1079		    "LHa header CRC error");
1080		return (ARCHIVE_FATAL);
1081	}
1082	return (err);
1083invalid:
1084	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1085	    "Invalid LHa header");
1086	return (ARCHIVE_FATAL);
1087}
1088
1089/*
1090 * Extended header format
1091 *
1092 * +0             +2        +3  -- used in header 1 and 2
1093 * +0             +4        +5  -- used in header 3
1094 * +--------------+---------+-------------------+--------------+--
1095 * |ex-header size|header id|        data       |ex-header size| .......
1096 * +--------------+---------+-------------------+--------------+--
1097 *  <-------------( ex-header size)------------> <-- next extended header --*
1098 *
1099 * If the ex-header size is zero, it is the make of the end of extended
1100 * headers.
1101 *
1102 */
1103static int
1104lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1105    uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1106{
1107	const void *h;
1108	const unsigned char *extdheader;
1109	size_t	extdsize;
1110	size_t	datasize;
1111	unsigned int i;
1112	unsigned char extdtype;
1113
1114#define EXT_HEADER_CRC		0x00		/* Header CRC and information*/
1115#define EXT_FILENAME		0x01		/* Filename 		    */
1116#define EXT_DIRECTORY		0x02		/* Directory name	    */
1117#define EXT_DOS_ATTR		0x40		/* MS-DOS attribute	    */
1118#define EXT_TIMESTAMP		0x41		/* Windows time stamp	    */
1119#define EXT_FILESIZE		0x42		/* Large file size	    */
1120#define EXT_TIMEZONE		0x43		/* Time zone		    */
1121#define EXT_UTF16_FILENAME	0x44		/* UTF-16 filename 	    */
1122#define EXT_UTF16_DIRECTORY	0x45		/* UTF-16 directory name    */
1123#define EXT_CODEPAGE		0x46		/* Codepage		    */
1124#define EXT_UNIX_MODE		0x50		/* File permission	    */
1125#define EXT_UNIX_GID_UID	0x51		/* gid,uid		    */
1126#define EXT_UNIX_GNAME		0x52		/* Group name		    */
1127#define EXT_UNIX_UNAME		0x53		/* User name		    */
1128#define EXT_UNIX_MTIME		0x54		/* Modified time	    */
1129#define EXT_OS2_NEW_ATTR	0x7f		/* new attribute(OS/2 only) */
1130#define EXT_NEW_ATTR		0xff		/* new attribute	    */
1131
1132	*total_size = sizefield_length;
1133
1134	for (;;) {
1135		/* Read an extended header size. */
1136		if ((h =
1137		    __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1138			return (truncated_error(a));
1139		/* Check if the size is the zero indicates the end of the
1140		 * extended header. */
1141		if (sizefield_length == sizeof(uint16_t))
1142			extdsize = archive_le16dec(h);
1143		else
1144			extdsize = archive_le32dec(h);
1145		if (extdsize == 0) {
1146			/* End of extended header */
1147			if (crc != NULL)
1148				*crc = lha_crc16(*crc, h, sizefield_length);
1149			__archive_read_consume(a, sizefield_length);
1150			return (ARCHIVE_OK);
1151		}
1152
1153		/* Sanity check to the extended header size. */
1154		if (((uint64_t)*total_size + extdsize) >
1155				    (uint64_t)limitsize ||
1156		    extdsize <= (size_t)sizefield_length)
1157			goto invalid;
1158
1159		/* Read the extended header. */
1160		if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1161			return (truncated_error(a));
1162		*total_size += extdsize;
1163
1164		extdheader = (const unsigned char *)h;
1165		/* Get the extended header type. */
1166		extdtype = extdheader[sizefield_length];
1167		/* Calculate an extended data size. */
1168		datasize = extdsize - (1 + sizefield_length);
1169		/* Skip an extended header size field and type field. */
1170		extdheader += sizefield_length + 1;
1171
1172		if (crc != NULL && extdtype != EXT_HEADER_CRC)
1173			*crc = lha_crc16(*crc, h, extdsize);
1174		switch (extdtype) {
1175		case EXT_HEADER_CRC:
1176			/* We only use a header CRC. Following data will not
1177			 * be used. */
1178			if (datasize >= 2) {
1179				lha->header_crc = archive_le16dec(extdheader);
1180				if (crc != NULL) {
1181					static const char zeros[2] = {0, 0};
1182					*crc = lha_crc16(*crc, h,
1183					    extdsize - datasize);
1184					/* CRC value itself as zero */
1185					*crc = lha_crc16(*crc, zeros, 2);
1186					*crc = lha_crc16(*crc,
1187					    extdheader+2, datasize - 2);
1188				}
1189			}
1190			break;
1191		case EXT_FILENAME:
1192			if (datasize == 0) {
1193				/* maybe directory header */
1194				archive_string_empty(&lha->filename);
1195				break;
1196			}
1197			if (extdheader[0] == '\0')
1198				goto invalid;
1199			archive_strncpy(&lha->filename,
1200			    (const char *)extdheader, datasize);
1201			break;
1202		case EXT_DIRECTORY:
1203			if (datasize == 0 || extdheader[0] == '\0')
1204				/* no directory name data. exit this case. */
1205				goto invalid;
1206
1207			archive_strncpy(&lha->dirname,
1208		  	    (const char *)extdheader, datasize);
1209			/*
1210			 * Convert directory delimiter from 0xFF
1211			 * to '/' for local system.
1212	 		 */
1213			for (i = 0; i < lha->dirname.length; i++) {
1214				if ((unsigned char)lha->dirname.s[i] == 0xFF)
1215					lha->dirname.s[i] = '/';
1216			}
1217			/* Is last character directory separator? */
1218			if (lha->dirname.s[lha->dirname.length-1] != '/')
1219				/* invalid directory data */
1220				goto invalid;
1221			break;
1222		case EXT_DOS_ATTR:
1223			if (datasize == 2)
1224				lha->dos_attr = (unsigned char)
1225				    (archive_le16dec(extdheader) & 0xff);
1226			break;
1227		case EXT_TIMESTAMP:
1228			if (datasize == (sizeof(uint64_t) * 3)) {
1229				lha->birthtime = lha_win_time(
1230				    archive_le64dec(extdheader),
1231				    &lha->birthtime_tv_nsec);
1232				extdheader += sizeof(uint64_t);
1233				lha->mtime = lha_win_time(
1234				    archive_le64dec(extdheader),
1235				    &lha->mtime_tv_nsec);
1236				extdheader += sizeof(uint64_t);
1237				lha->atime = lha_win_time(
1238				    archive_le64dec(extdheader),
1239				    &lha->atime_tv_nsec);
1240				lha->setflag |= BIRTHTIME_IS_SET |
1241				    ATIME_IS_SET;
1242			}
1243			break;
1244		case EXT_FILESIZE:
1245			if (datasize == sizeof(uint64_t) * 2) {
1246				lha->compsize = archive_le64dec(extdheader);
1247				extdheader += sizeof(uint64_t);
1248				lha->origsize = archive_le64dec(extdheader);
1249			}
1250			break;
1251		case EXT_CODEPAGE:
1252			/* Get an archived filename charset from codepage.
1253			 * This overwrites the charset specified by
1254			 * hdrcharset option. */
1255			if (datasize == sizeof(uint32_t)) {
1256				struct archive_string cp;
1257				const char *charset;
1258
1259				archive_string_init(&cp);
1260				switch (archive_le32dec(extdheader)) {
1261				case 65001: /* UTF-8 */
1262					charset = "UTF-8";
1263					break;
1264				default:
1265					archive_string_sprintf(&cp, "CP%d",
1266					    (int)archive_le32dec(extdheader));
1267					charset = cp.s;
1268					break;
1269				}
1270				lha->sconv =
1271				    archive_string_conversion_from_charset(
1272					&(a->archive), charset, 1);
1273				archive_string_free(&cp);
1274				if (lha->sconv == NULL)
1275					return (ARCHIVE_FATAL);
1276			}
1277			break;
1278		case EXT_UNIX_MODE:
1279			if (datasize == sizeof(uint16_t)) {
1280				lha->mode = archive_le16dec(extdheader);
1281				lha->setflag |= UNIX_MODE_IS_SET;
1282			}
1283			break;
1284		case EXT_UNIX_GID_UID:
1285			if (datasize == (sizeof(uint16_t) * 2)) {
1286				lha->gid = archive_le16dec(extdheader);
1287				lha->uid = archive_le16dec(extdheader+2);
1288			}
1289			break;
1290		case EXT_UNIX_GNAME:
1291			if (datasize > 0)
1292				archive_strncpy(&lha->gname,
1293				    (const char *)extdheader, datasize);
1294			break;
1295		case EXT_UNIX_UNAME:
1296			if (datasize > 0)
1297				archive_strncpy(&lha->uname,
1298				    (const char *)extdheader, datasize);
1299			break;
1300		case EXT_UNIX_MTIME:
1301			if (datasize == sizeof(uint32_t))
1302				lha->mtime = archive_le32dec(extdheader);
1303			break;
1304		case EXT_OS2_NEW_ATTR:
1305			/* This extended header is OS/2 depend. */
1306			if (datasize == 16) {
1307				lha->dos_attr = (unsigned char)
1308				    (archive_le16dec(extdheader) & 0xff);
1309				lha->mode = archive_le16dec(extdheader+2);
1310				lha->gid = archive_le16dec(extdheader+4);
1311				lha->uid = archive_le16dec(extdheader+6);
1312				lha->birthtime = archive_le32dec(extdheader+8);
1313				lha->atime = archive_le32dec(extdheader+12);
1314				lha->setflag |= UNIX_MODE_IS_SET
1315				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1316			}
1317			break;
1318		case EXT_NEW_ATTR:
1319			if (datasize == 20) {
1320				lha->mode = (mode_t)archive_le32dec(extdheader);
1321				lha->gid = archive_le32dec(extdheader+4);
1322				lha->uid = archive_le32dec(extdheader+8);
1323				lha->birthtime = archive_le32dec(extdheader+12);
1324				lha->atime = archive_le32dec(extdheader+16);
1325				lha->setflag |= UNIX_MODE_IS_SET
1326				    | BIRTHTIME_IS_SET | ATIME_IS_SET;
1327			}
1328			break;
1329		case EXT_TIMEZONE:		/* Not supported */
1330		case EXT_UTF16_FILENAME:	/* Not supported */
1331		case EXT_UTF16_DIRECTORY:	/* Not supported */
1332		default:
1333			break;
1334		}
1335
1336		__archive_read_consume(a, extdsize);
1337	}
1338invalid:
1339	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1340	    "Invalid extended LHa header");
1341	return (ARCHIVE_FATAL);
1342}
1343
1344static int
1345lha_end_of_entry(struct archive_read *a)
1346{
1347	struct lha *lha = (struct lha *)(a->format->data);
1348	int r = ARCHIVE_EOF;
1349
1350	if (!lha->end_of_entry_cleanup) {
1351		if ((lha->setflag & CRC_IS_SET) &&
1352		    lha->crc != lha->entry_crc_calculated) {
1353			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1354			    "LHa data CRC error");
1355			r = ARCHIVE_WARN;
1356		}
1357
1358		/* End-of-entry cleanup done. */
1359		lha->end_of_entry_cleanup = 1;
1360	}
1361	return (r);
1362}
1363
1364static int
1365archive_read_format_lha_read_data(struct archive_read *a,
1366    const void **buff, size_t *size, int64_t *offset)
1367{
1368	struct lha *lha = (struct lha *)(a->format->data);
1369	int r;
1370
1371	if (lha->entry_unconsumed) {
1372		/* Consume as much as the decompressor actually used. */
1373		__archive_read_consume(a, lha->entry_unconsumed);
1374		lha->entry_unconsumed = 0;
1375	}
1376	if (lha->end_of_entry) {
1377		*offset = lha->entry_offset;
1378		*size = 0;
1379		*buff = NULL;
1380		return (lha_end_of_entry(a));
1381	}
1382
1383	if (lha->entry_is_compressed)
1384		r =  lha_read_data_lzh(a, buff, size, offset);
1385	else
1386		/* No compression. */
1387		r =  lha_read_data_none(a, buff, size, offset);
1388	return (r);
1389}
1390
1391/*
1392 * Read a file content in no compression.
1393 *
1394 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1395 * lha->end_of_entry if it consumes all of the data.
1396 */
1397static int
1398lha_read_data_none(struct archive_read *a, const void **buff,
1399    size_t *size, int64_t *offset)
1400{
1401	struct lha *lha = (struct lha *)(a->format->data);
1402	ssize_t bytes_avail;
1403
1404	if (lha->entry_bytes_remaining == 0) {
1405		*buff = NULL;
1406		*size = 0;
1407		*offset = lha->entry_offset;
1408		lha->end_of_entry = 1;
1409		return (ARCHIVE_OK);
1410	}
1411	/*
1412	 * Note: '1' here is a performance optimization.
1413	 * Recall that the decompression layer returns a count of
1414	 * available bytes; asking for more than that forces the
1415	 * decompressor to combine reads by copying data.
1416	 */
1417	*buff = __archive_read_ahead(a, 1, &bytes_avail);
1418	if (bytes_avail <= 0) {
1419		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1420		    "Truncated LHa file data");
1421		return (ARCHIVE_FATAL);
1422	}
1423	if (bytes_avail > lha->entry_bytes_remaining)
1424		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1425	lha->entry_crc_calculated =
1426	    lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1427	*size = bytes_avail;
1428	*offset = lha->entry_offset;
1429	lha->entry_offset += bytes_avail;
1430	lha->entry_bytes_remaining -= bytes_avail;
1431	if (lha->entry_bytes_remaining == 0)
1432		lha->end_of_entry = 1;
1433	lha->entry_unconsumed = bytes_avail;
1434	return (ARCHIVE_OK);
1435}
1436
1437/*
1438 * Read a file content in LZHUFF encoding.
1439 *
1440 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1441 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1442 * all of the data.
1443 */
1444static int
1445lha_read_data_lzh(struct archive_read *a, const void **buff,
1446    size_t *size, int64_t *offset)
1447{
1448	struct lha *lha = (struct lha *)(a->format->data);
1449	ssize_t bytes_avail;
1450	int r;
1451
1452	/* If we haven't yet read any data, initialize the decompressor. */
1453	if (!lha->decompress_init) {
1454		r = lzh_decode_init(&(lha->strm), lha->method);
1455		switch (r) {
1456		case ARCHIVE_OK:
1457			break;
1458		case ARCHIVE_FAILED:
1459        		/* Unsupported compression. */
1460			*buff = NULL;
1461			*size = 0;
1462			*offset = 0;
1463			archive_set_error(&a->archive,
1464			    ARCHIVE_ERRNO_FILE_FORMAT,
1465			    "Unsupported lzh compression method -%c%c%c-",
1466			    lha->method[0], lha->method[1], lha->method[2]);
1467			/* We know compressed size; just skip it. */
1468			archive_read_format_lha_read_data_skip(a);
1469			return (ARCHIVE_WARN);
1470		default:
1471			archive_set_error(&a->archive, ENOMEM,
1472			    "Couldn't allocate memory "
1473			    "for lzh decompression");
1474			return (ARCHIVE_FATAL);
1475		}
1476		/* We've initialized decompression for this stream. */
1477		lha->decompress_init = 1;
1478		lha->strm.avail_out = 0;
1479		lha->strm.total_out = 0;
1480	}
1481
1482	/*
1483	 * Note: '1' here is a performance optimization.
1484	 * Recall that the decompression layer returns a count of
1485	 * available bytes; asking for more than that forces the
1486	 * decompressor to combine reads by copying data.
1487	 */
1488	lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1489	if (bytes_avail <= 0) {
1490		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1491		    "Truncated LHa file body");
1492		return (ARCHIVE_FATAL);
1493	}
1494	if (bytes_avail > lha->entry_bytes_remaining)
1495		bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1496
1497	lha->strm.avail_in = (int)bytes_avail;
1498	lha->strm.total_in = 0;
1499	lha->strm.avail_out = 0;
1500
1501	r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1502	switch (r) {
1503	case ARCHIVE_OK:
1504		break;
1505	case ARCHIVE_EOF:
1506		lha->end_of_entry = 1;
1507		break;
1508	default:
1509		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1510		    "Bad lzh data");
1511		return (ARCHIVE_FAILED);
1512	}
1513	lha->entry_unconsumed = lha->strm.total_in;
1514	lha->entry_bytes_remaining -= lha->strm.total_in;
1515
1516	if (lha->strm.avail_out) {
1517		*offset = lha->entry_offset;
1518		*size = lha->strm.avail_out;
1519		*buff = lha->strm.ref_ptr;
1520		lha->entry_crc_calculated =
1521		    lha_crc16(lha->entry_crc_calculated, *buff, *size);
1522		lha->entry_offset += *size;
1523	} else {
1524		*offset = lha->entry_offset;
1525		*size = 0;
1526		*buff = NULL;
1527		if (lha->end_of_entry)
1528			return (lha_end_of_entry(a));
1529	}
1530	return (ARCHIVE_OK);
1531}
1532
1533/*
1534 * Skip a file content.
1535 */
1536static int
1537archive_read_format_lha_read_data_skip(struct archive_read *a)
1538{
1539	struct lha *lha;
1540	int64_t bytes_skipped;
1541
1542	lha = (struct lha *)(a->format->data);
1543
1544	if (lha->entry_unconsumed) {
1545		/* Consume as much as the decompressor actually used. */
1546		__archive_read_consume(a, lha->entry_unconsumed);
1547		lha->entry_unconsumed = 0;
1548	}
1549
1550	/* if we've already read to end of data, we're done. */
1551	if (lha->end_of_entry_cleanup)
1552		return (ARCHIVE_OK);
1553
1554	/*
1555	 * If the length is at the beginning, we can skip the
1556	 * compressed data much more quickly.
1557	 */
1558	bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1559	if (bytes_skipped < 0)
1560		return (ARCHIVE_FATAL);
1561
1562	/* This entry is finished and done. */
1563	lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1564	return (ARCHIVE_OK);
1565}
1566
1567static int
1568archive_read_format_lha_cleanup(struct archive_read *a)
1569{
1570	struct lha *lha = (struct lha *)(a->format->data);
1571
1572	lzh_decode_free(&(lha->strm));
1573	archive_string_free(&(lha->dirname));
1574	archive_string_free(&(lha->filename));
1575	archive_string_free(&(lha->uname));
1576	archive_string_free(&(lha->gname));
1577	archive_wstring_free(&(lha->ws));
1578	free(lha);
1579	(a->format->data) = NULL;
1580	return (ARCHIVE_OK);
1581}
1582
1583/*
1584 * 'LHa for UNIX' utility has archived a symbolic-link name after
1585 * a pathname with '|' character.
1586 * This function extracts the symbolic-link name from the pathname.
1587 *
1588 * example.
1589 *   1. a symbolic-name is 'aaa/bb/cc'
1590 *   2. a filename is 'xxx/bbb'
1591 *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1592 */
1593static int
1594lha_parse_linkname(struct archive_string *linkname,
1595    struct archive_string *pathname)
1596{
1597	char *	linkptr;
1598	size_t 	symlen;
1599
1600	linkptr = strchr(pathname->s, '|');
1601	if (linkptr != NULL) {
1602		symlen = strlen(linkptr + 1);
1603		archive_strncpy(linkname, linkptr+1, symlen);
1604
1605		*linkptr = 0;
1606		pathname->length = strlen(pathname->s);
1607
1608		return (1);
1609	}
1610	return (0);
1611}
1612
1613/* Convert an MSDOS-style date/time into Unix-style time. */
1614static time_t
1615lha_dos_time(const unsigned char *p)
1616{
1617	int msTime, msDate;
1618	struct tm ts;
1619
1620	msTime = archive_le16dec(p);
1621	msDate = archive_le16dec(p+2);
1622
1623	memset(&ts, 0, sizeof(ts));
1624	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1625	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1626	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
1627	ts.tm_hour = (msTime >> 11) & 0x1f;
1628	ts.tm_min = (msTime >> 5) & 0x3f;
1629	ts.tm_sec = (msTime << 1) & 0x3e;
1630	ts.tm_isdst = -1;
1631	return (mktime(&ts));
1632}
1633
1634/* Convert an MS-Windows-style date/time into Unix-style time. */
1635static time_t
1636lha_win_time(uint64_t wintime, long *ns)
1637{
1638#define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1639
1640	if (wintime >= EPOC_TIME) {
1641		wintime -= EPOC_TIME;	/* 1970-01-01 00:00:00 (UTC) */
1642		if (ns != NULL)
1643			*ns = (long)(wintime % 10000000) * 100;
1644		return (wintime / 10000000);
1645	} else {
1646		if (ns != NULL)
1647			*ns = 0;
1648		return (0);
1649	}
1650}
1651
1652static unsigned char
1653lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1654{
1655	unsigned char const *p = (unsigned char const *)pp;
1656
1657	p += offset;
1658	for (;size > 0; --size)
1659		sum += *p++;
1660	return (sum);
1661}
1662
1663static uint16_t crc16tbl[2][256];
1664static void
1665lha_crc16_init(void)
1666{
1667	unsigned int i;
1668	static int crc16init = 0;
1669
1670	if (crc16init)
1671		return;
1672	crc16init = 1;
1673
1674	for (i = 0; i < 256; i++) {
1675		unsigned int j;
1676		uint16_t crc = (uint16_t)i;
1677		for (j = 8; j; j--)
1678			crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1679		crc16tbl[0][i] = crc;
1680	}
1681
1682	for (i = 0; i < 256; i++) {
1683		crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1684			^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1685	}
1686}
1687
1688static uint16_t
1689lha_crc16(uint16_t crc, const void *pp, size_t len)
1690{
1691	const unsigned char *p = (const unsigned char *)pp;
1692	const uint16_t *buff;
1693	const union {
1694		uint32_t i;
1695		char c[4];
1696	} u = { 0x01020304 };
1697
1698	if (len == 0)
1699		return crc;
1700
1701	/* Process unaligned address. */
1702	if (((uintptr_t)p) & (uintptr_t)0x1) {
1703		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1704		len--;
1705	}
1706	buff = (const uint16_t *)p;
1707	/*
1708	 * Modern C compiler such as GCC does not unroll automatically yet
1709	 * without unrolling pragma, and Clang is so. So we should
1710	 * unroll this loop for its performance.
1711	 */
1712	for (;len >= 8; len -= 8) {
1713		/* This if statement expects compiler optimization will
1714		 * remove the stament which will not be executed. */
1715#if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1716#  define bswap16(x) _byteswap_ushort(x)
1717#elif (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8) \
1718      || defined(__clang__)
1719#  define bswap16(x) __builtin_bswap16(x)
1720#else
1721#  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1722#endif
1723#define CRC16W	do { 	\
1724		if(u.c[0] == 1) { /* Big endian */		\
1725			crc ^= bswap16(*buff); buff++;		\
1726		} else						\
1727			crc ^= *buff++;				\
1728		crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1729} while (0)
1730		CRC16W;
1731		CRC16W;
1732		CRC16W;
1733		CRC16W;
1734#undef CRC16W
1735#undef bswap16
1736	}
1737
1738	p = (const unsigned char *)buff;
1739	for (;len; len--) {
1740		crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1741	}
1742	return crc;
1743}
1744
1745/*
1746 * Initialize LZHUF decoder.
1747 *
1748 * Returns ARCHIVE_OK if initialization was successful.
1749 * Returns ARCHIVE_FAILED if method is unsupported.
1750 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1751 * error occurred.
1752 */
1753static int
1754lzh_decode_init(struct lzh_stream *strm, const char *method)
1755{
1756	struct lzh_dec *ds;
1757	int w_bits, w_size;
1758
1759	if (strm->ds == NULL) {
1760		strm->ds = calloc(1, sizeof(*strm->ds));
1761		if (strm->ds == NULL)
1762			return (ARCHIVE_FATAL);
1763	}
1764	ds = strm->ds;
1765	ds->error = ARCHIVE_FAILED;
1766	if (method == NULL || method[0] != 'l' || method[1] != 'h')
1767		return (ARCHIVE_FAILED);
1768	switch (method[2]) {
1769	case '5':
1770		w_bits = 13;/* 8KiB for window */
1771		break;
1772	case '6':
1773		w_bits = 15;/* 32KiB for window */
1774		break;
1775	case '7':
1776		w_bits = 16;/* 64KiB for window */
1777		break;
1778	default:
1779		return (ARCHIVE_FAILED);/* Not supported. */
1780	}
1781	ds->error = ARCHIVE_FATAL;
1782	/* Expand a window size up to 128 KiB for decompressing process
1783	 * performance whatever its original window size is. */
1784	ds->w_size = 1U << 17;
1785	ds->w_mask = ds->w_size -1;
1786	if (ds->w_buff == NULL) {
1787		ds->w_buff = malloc(ds->w_size);
1788		if (ds->w_buff == NULL)
1789			return (ARCHIVE_FATAL);
1790	}
1791	w_size = 1U << w_bits;
1792	memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1793	ds->w_pos = 0;
1794	ds->state = 0;
1795	ds->pos_pt_len_size = w_bits + 1;
1796	ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1797	ds->literal_pt_len_size = PT_BITLEN_SIZE;
1798	ds->literal_pt_len_bits = 5;
1799	ds->br.cache_buffer = 0;
1800	ds->br.cache_avail = 0;
1801
1802	if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1803	    != ARCHIVE_OK)
1804		return (ARCHIVE_FATAL);
1805	ds->lt.len_bits = 9;
1806	if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1807	    != ARCHIVE_OK)
1808		return (ARCHIVE_FATAL);
1809	ds->error = 0;
1810
1811	return (ARCHIVE_OK);
1812}
1813
1814/*
1815 * Release LZHUF decoder.
1816 */
1817static void
1818lzh_decode_free(struct lzh_stream *strm)
1819{
1820
1821	if (strm->ds == NULL)
1822		return;
1823	free(strm->ds->w_buff);
1824	lzh_huffman_free(&(strm->ds->lt));
1825	lzh_huffman_free(&(strm->ds->pt));
1826	free(strm->ds);
1827	strm->ds = NULL;
1828}
1829
1830/*
1831 * Bit stream reader.
1832 */
1833/* Check that the cache buffer has enough bits. */
1834#define lzh_br_has(br, n)	((br)->cache_avail >= n)
1835/* Get compressed data by bit. */
1836#define lzh_br_bits(br, n)				\
1837	(((uint16_t)((br)->cache_buffer >>		\
1838		((br)->cache_avail - (n)))) & cache_masks[n])
1839#define lzh_br_bits_forced(br, n)			\
1840	(((uint16_t)((br)->cache_buffer <<		\
1841		((n) - (br)->cache_avail))) & cache_masks[n])
1842/* Read ahead to make sure the cache buffer has enough compressed data we
1843 * will use.
1844 *  True  : completed, there is enough data in the cache buffer.
1845 *  False : we met that strm->next_in is empty, we have to get following
1846 *          bytes. */
1847#define lzh_br_read_ahead_0(strm, br, n)	\
1848	(lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1849/*  True  : the cache buffer has some bits as much as we need.
1850 *  False : there are no enough bits in the cache buffer to be used,
1851 *          we have to get following bytes if we could. */
1852#define lzh_br_read_ahead(strm, br, n)	\
1853	(lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1854
1855/* Notify how many bits we consumed. */
1856#define lzh_br_consume(br, n)	((br)->cache_avail -= (n))
1857#define lzh_br_unconsume(br, n)	((br)->cache_avail += (n))
1858
1859static const uint16_t cache_masks[] = {
1860	0x0000, 0x0001, 0x0003, 0x0007,
1861	0x000F, 0x001F, 0x003F, 0x007F,
1862	0x00FF, 0x01FF, 0x03FF, 0x07FF,
1863	0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1864	0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1865};
1866
1867/*
1868 * Shift away used bits in the cache data and fill it up with following bits.
1869 * Call this when cache buffer does not have enough bits you need.
1870 *
1871 * Returns 1 if the cache buffer is full.
1872 * Returns 0 if the cache buffer is not full; input buffer is empty.
1873 */
1874static int
1875lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1876{
1877	int n = CACHE_BITS - br->cache_avail;
1878
1879	for (;;) {
1880		const int x = n >> 3;
1881		if (strm->avail_in >= x) {
1882			switch (x) {
1883			case 8:
1884				br->cache_buffer =
1885				    ((uint64_t)strm->next_in[0]) << 56 |
1886				    ((uint64_t)strm->next_in[1]) << 48 |
1887				    ((uint64_t)strm->next_in[2]) << 40 |
1888				    ((uint64_t)strm->next_in[3]) << 32 |
1889				    ((uint32_t)strm->next_in[4]) << 24 |
1890				    ((uint32_t)strm->next_in[5]) << 16 |
1891				    ((uint32_t)strm->next_in[6]) << 8 |
1892				     (uint32_t)strm->next_in[7];
1893				strm->next_in += 8;
1894				strm->avail_in -= 8;
1895				br->cache_avail += 8 * 8;
1896				return (1);
1897			case 7:
1898				br->cache_buffer =
1899		 		   (br->cache_buffer << 56) |
1900				    ((uint64_t)strm->next_in[0]) << 48 |
1901				    ((uint64_t)strm->next_in[1]) << 40 |
1902				    ((uint64_t)strm->next_in[2]) << 32 |
1903				    ((uint32_t)strm->next_in[3]) << 24 |
1904				    ((uint32_t)strm->next_in[4]) << 16 |
1905				    ((uint32_t)strm->next_in[5]) << 8 |
1906				     (uint32_t)strm->next_in[6];
1907				strm->next_in += 7;
1908				strm->avail_in -= 7;
1909				br->cache_avail += 7 * 8;
1910				return (1);
1911			case 6:
1912				br->cache_buffer =
1913		 		   (br->cache_buffer << 48) |
1914				    ((uint64_t)strm->next_in[0]) << 40 |
1915				    ((uint64_t)strm->next_in[1]) << 32 |
1916				    ((uint32_t)strm->next_in[2]) << 24 |
1917				    ((uint32_t)strm->next_in[3]) << 16 |
1918				    ((uint32_t)strm->next_in[4]) << 8 |
1919				     (uint32_t)strm->next_in[5];
1920				strm->next_in += 6;
1921				strm->avail_in -= 6;
1922				br->cache_avail += 6 * 8;
1923				return (1);
1924			case 0:
1925				/* We have enough compressed data in
1926				 * the cache buffer.*/
1927				return (1);
1928			default:
1929				break;
1930			}
1931		}
1932		if (strm->avail_in == 0) {
1933			/* There is not enough compressed data to fill up the
1934			 * cache buffer. */
1935			return (0);
1936		}
1937		br->cache_buffer =
1938		   (br->cache_buffer << 8) | *strm->next_in++;
1939		strm->avail_in--;
1940		br->cache_avail += 8;
1941		n -= 8;
1942	}
1943}
1944
1945/*
1946 * Decode LZHUF.
1947 *
1948 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1949 *    Please set available buffer and call this function again.
1950 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1951 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1952 *    is broken or you do not set 'last' flag properly.
1953 * 4. 'last' flag is very important, you must set 1 to the flag if there
1954 *    is no input data. The lha compressed data format does not provide how
1955 *    to know the compressed data is really finished.
1956 *    Note: lha command utility check if the total size of output bytes is
1957 *    reached the uncompressed size recorded in its header. it does not mind
1958 *    that the decoding process is properly finished.
1959 *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1960 *    it handles EOF as null to fill read buffer with zero until the decoding
1961 *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1962 *    zeros are treated as the mark of the end of the data although the zeros
1963 *    is dummy, not the file data.
1964 */
1965static int	lzh_read_blocks(struct lzh_stream *, int);
1966static int	lzh_decode_blocks(struct lzh_stream *, int);
1967#define ST_RD_BLOCK		0
1968#define ST_RD_PT_1		1
1969#define ST_RD_PT_2		2
1970#define ST_RD_PT_3		3
1971#define ST_RD_PT_4		4
1972#define ST_RD_LITERAL_1		5
1973#define ST_RD_LITERAL_2		6
1974#define ST_RD_LITERAL_3		7
1975#define ST_RD_POS_DATA_1	8
1976#define ST_GET_LITERAL		9
1977#define ST_GET_POS_1		10
1978#define ST_GET_POS_2		11
1979#define ST_COPY_DATA		12
1980
1981static int
1982lzh_decode(struct lzh_stream *strm, int last)
1983{
1984	struct lzh_dec *ds = strm->ds;
1985	int avail_in;
1986	int r;
1987
1988	if (ds->error)
1989		return (ds->error);
1990
1991	avail_in = strm->avail_in;
1992	do {
1993		if (ds->state < ST_GET_LITERAL)
1994			r = lzh_read_blocks(strm, last);
1995		else
1996			r = lzh_decode_blocks(strm, last);
1997	} while (r == 100);
1998	strm->total_in += avail_in - strm->avail_in;
1999	return (r);
2000}
2001
2002static void
2003lzh_emit_window(struct lzh_stream *strm, size_t s)
2004{
2005	strm->ref_ptr = strm->ds->w_buff;
2006	strm->avail_out = (int)s;
2007	strm->total_out += s;
2008}
2009
2010static int
2011lzh_read_blocks(struct lzh_stream *strm, int last)
2012{
2013	struct lzh_dec *ds = strm->ds;
2014	struct lzh_br *br = &(ds->br);
2015	int c = 0, i;
2016	unsigned rbits;
2017
2018	for (;;) {
2019		switch (ds->state) {
2020		case ST_RD_BLOCK:
2021			/*
2022			 * Read a block number indicates how many blocks
2023			 * we will handle. The block is composed of a
2024			 * literal and a match, sometimes a literal only
2025			 * in particular, there are no reference data at
2026			 * the beginning of the decompression.
2027			 */
2028			if (!lzh_br_read_ahead_0(strm, br, 16)) {
2029				if (!last)
2030					/* We need following data. */
2031					return (ARCHIVE_OK);
2032				if (lzh_br_has(br, 8)) {
2033					/*
2034					 * It seems there are extra bits.
2035					 *  1. Compressed data is broken.
2036					 *  2. `last' flag does not properly
2037					 *     set.
2038					 */
2039					goto failed;
2040				}
2041				if (ds->w_pos > 0) {
2042					lzh_emit_window(strm, ds->w_pos);
2043					ds->w_pos = 0;
2044					return (ARCHIVE_OK);
2045				}
2046				/* End of compressed data; we have completely
2047				 * handled all compressed data. */
2048				return (ARCHIVE_EOF);
2049			}
2050			ds->blocks_avail = lzh_br_bits(br, 16);
2051			if (ds->blocks_avail == 0)
2052				goto failed;
2053			lzh_br_consume(br, 16);
2054			/*
2055			 * Read a literal table compressed in huffman
2056			 * coding.
2057			 */
2058			ds->pt.len_size = ds->literal_pt_len_size;
2059			ds->pt.len_bits = ds->literal_pt_len_bits;
2060			ds->reading_position = 0;
2061			/* FALL THROUGH */
2062		case ST_RD_PT_1:
2063			/* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2064			 * used in reading both a literal table and a
2065			 * position table. */
2066			if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2067				if (last)
2068					goto failed;/* Truncated data. */
2069				ds->state = ST_RD_PT_1;
2070				return (ARCHIVE_OK);
2071			}
2072			ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2073			lzh_br_consume(br, ds->pt.len_bits);
2074			/* FALL THROUGH */
2075		case ST_RD_PT_2:
2076			if (ds->pt.len_avail == 0) {
2077				/* There is no bitlen. */
2078				if (!lzh_br_read_ahead(strm, br,
2079				    ds->pt.len_bits)) {
2080					if (last)
2081						goto failed;/* Truncated data.*/
2082					ds->state = ST_RD_PT_2;
2083					return (ARCHIVE_OK);
2084				}
2085				if (!lzh_make_fake_table(&(ds->pt),
2086				    lzh_br_bits(br, ds->pt.len_bits)))
2087					goto failed;/* Invalid data. */
2088				lzh_br_consume(br, ds->pt.len_bits);
2089				if (ds->reading_position)
2090					ds->state = ST_GET_LITERAL;
2091				else
2092					ds->state = ST_RD_LITERAL_1;
2093				break;
2094			} else if (ds->pt.len_avail > ds->pt.len_size)
2095				goto failed;/* Invalid data. */
2096			ds->loop = 0;
2097			memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2098			if (ds->pt.len_avail < 3 ||
2099			    ds->pt.len_size == ds->pos_pt_len_size) {
2100				ds->state = ST_RD_PT_4;
2101				break;
2102			}
2103			/* FALL THROUGH */
2104		case ST_RD_PT_3:
2105			ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2106			if (ds->loop < 3) {
2107				if (ds->loop < 0 || last)
2108					goto failed;/* Invalid data. */
2109				/* Not completed, get following data. */
2110				ds->state = ST_RD_PT_3;
2111				return (ARCHIVE_OK);
2112			}
2113			/* There are some null in bitlen of the literal. */
2114			if (!lzh_br_read_ahead(strm, br, 2)) {
2115				if (last)
2116					goto failed;/* Truncated data. */
2117				ds->state = ST_RD_PT_3;
2118				return (ARCHIVE_OK);
2119			}
2120			c = lzh_br_bits(br, 2);
2121			lzh_br_consume(br, 2);
2122			if (c > ds->pt.len_avail - 3)
2123				goto failed;/* Invalid data. */
2124			for (i = 3; c-- > 0 ;)
2125				ds->pt.bitlen[i++] = 0;
2126			ds->loop = i;
2127			/* FALL THROUGH */
2128		case ST_RD_PT_4:
2129			ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2130			    ds->pt.len_avail);
2131			if (ds->loop < ds->pt.len_avail) {
2132				if (ds->loop < 0 || last)
2133					goto failed;/* Invalid data. */
2134				/* Not completed, get following data. */
2135				ds->state = ST_RD_PT_4;
2136				return (ARCHIVE_OK);
2137			}
2138			if (!lzh_make_huffman_table(&(ds->pt)))
2139				goto failed;/* Invalid data */
2140			if (ds->reading_position) {
2141				ds->state = ST_GET_LITERAL;
2142				break;
2143			}
2144			/* FALL THROUGH */
2145		case ST_RD_LITERAL_1:
2146			if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2147				if (last)
2148					goto failed;/* Truncated data. */
2149				ds->state = ST_RD_LITERAL_1;
2150				return (ARCHIVE_OK);
2151			}
2152			ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2153			lzh_br_consume(br, ds->lt.len_bits);
2154			/* FALL THROUGH */
2155		case ST_RD_LITERAL_2:
2156			if (ds->lt.len_avail == 0) {
2157				/* There is no bitlen. */
2158				if (!lzh_br_read_ahead(strm, br,
2159				    ds->lt.len_bits)) {
2160					if (last)
2161						goto failed;/* Truncated data.*/
2162					ds->state = ST_RD_LITERAL_2;
2163					return (ARCHIVE_OK);
2164				}
2165				if (!lzh_make_fake_table(&(ds->lt),
2166				    lzh_br_bits(br, ds->lt.len_bits)))
2167					goto failed;/* Invalid data */
2168				lzh_br_consume(br, ds->lt.len_bits);
2169				ds->state = ST_RD_POS_DATA_1;
2170				break;
2171			} else if (ds->lt.len_avail > ds->lt.len_size)
2172				goto failed;/* Invalid data */
2173			ds->loop = 0;
2174			memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2175			/* FALL THROUGH */
2176		case ST_RD_LITERAL_3:
2177			i = ds->loop;
2178			while (i < ds->lt.len_avail) {
2179				if (!lzh_br_read_ahead(strm, br,
2180				    ds->pt.max_bits)) {
2181					if (last)
2182						goto failed;/* Truncated data.*/
2183					ds->loop = i;
2184					ds->state = ST_RD_LITERAL_3;
2185					return (ARCHIVE_OK);
2186				}
2187				rbits = lzh_br_bits(br, ds->pt.max_bits);
2188				c = lzh_decode_huffman(&(ds->pt), rbits);
2189				if (c > 2) {
2190					/* Note: 'c' will never be more than
2191					 * eighteen since it's limited by
2192					 * PT_BITLEN_SIZE, which is being set
2193					 * to ds->pt.len_size through
2194					 * ds->literal_pt_len_size. */
2195					lzh_br_consume(br, ds->pt.bitlen[c]);
2196					c -= 2;
2197					ds->lt.freq[c]++;
2198					ds->lt.bitlen[i++] = c;
2199				} else if (c == 0) {
2200					lzh_br_consume(br, ds->pt.bitlen[c]);
2201					ds->lt.bitlen[i++] = 0;
2202				} else {
2203					/* c == 1 or c == 2 */
2204					int n = (c == 1)?4:9;
2205					if (!lzh_br_read_ahead(strm, br,
2206					     ds->pt.bitlen[c] + n)) {
2207						if (last) /* Truncated data. */
2208							goto failed;
2209						ds->loop = i;
2210						ds->state = ST_RD_LITERAL_3;
2211						return (ARCHIVE_OK);
2212					}
2213					lzh_br_consume(br, ds->pt.bitlen[c]);
2214					c = lzh_br_bits(br, n);
2215					lzh_br_consume(br, n);
2216					c += (n == 4)?3:20;
2217					if (i + c > ds->lt.len_avail)
2218						goto failed;/* Invalid data */
2219					memset(&(ds->lt.bitlen[i]), 0, c);
2220					i += c;
2221				}
2222			}
2223			if (i > ds->lt.len_avail ||
2224			    !lzh_make_huffman_table(&(ds->lt)))
2225				goto failed;/* Invalid data */
2226			/* FALL THROUGH */
2227		case ST_RD_POS_DATA_1:
2228			/*
2229			 * Read a position table compressed in huffman
2230			 * coding.
2231			 */
2232			ds->pt.len_size = ds->pos_pt_len_size;
2233			ds->pt.len_bits = ds->pos_pt_len_bits;
2234			ds->reading_position = 1;
2235			ds->state = ST_RD_PT_1;
2236			break;
2237		case ST_GET_LITERAL:
2238			return (100);
2239		}
2240	}
2241failed:
2242	return (ds->error = ARCHIVE_FAILED);
2243}
2244
2245static int
2246lzh_decode_blocks(struct lzh_stream *strm, int last)
2247{
2248	struct lzh_dec *ds = strm->ds;
2249	struct lzh_br bre = ds->br;
2250	struct huffman *lt = &(ds->lt);
2251	struct huffman *pt = &(ds->pt);
2252	unsigned char *w_buff = ds->w_buff;
2253	unsigned char *lt_bitlen = lt->bitlen;
2254	unsigned char *pt_bitlen = pt->bitlen;
2255	int blocks_avail = ds->blocks_avail, c = 0;
2256	int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2257	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2258	int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2259	int state = ds->state;
2260
2261	for (;;) {
2262		switch (state) {
2263		case ST_GET_LITERAL:
2264			for (;;) {
2265				if (blocks_avail == 0) {
2266					/* We have decoded all blocks.
2267					 * Let's handle next blocks. */
2268					ds->state = ST_RD_BLOCK;
2269					ds->br = bre;
2270					ds->blocks_avail = 0;
2271					ds->w_pos = w_pos;
2272					ds->copy_pos = 0;
2273					return (100);
2274				}
2275
2276				/* lzh_br_read_ahead() always try to fill the
2277				 * cache buffer up. In specific situation we
2278				 * are close to the end of the data, the cache
2279				 * buffer will not be full and thus we have to
2280				 * determine if the cache buffer has some bits
2281				 * as much as we need after lzh_br_read_ahead()
2282				 * failed. */
2283				if (!lzh_br_read_ahead(strm, &bre,
2284				    lt_max_bits)) {
2285					if (!last)
2286						goto next_data;
2287					/* Remaining bits are less than
2288					 * maximum bits(lt.max_bits) but maybe
2289					 * it still remains as much as we need,
2290					 * so we should try to use it with
2291					 * dummy bits. */
2292					c = lzh_decode_huffman(lt,
2293					      lzh_br_bits_forced(&bre,
2294					        lt_max_bits));
2295					lzh_br_consume(&bre, lt_bitlen[c]);
2296					if (!lzh_br_has(&bre, 0))
2297						goto failed;/* Over read. */
2298				} else {
2299					c = lzh_decode_huffman(lt,
2300					      lzh_br_bits(&bre, lt_max_bits));
2301					lzh_br_consume(&bre, lt_bitlen[c]);
2302				}
2303				blocks_avail--;
2304				if (c > UCHAR_MAX)
2305					/* Current block is a match data. */
2306					break;
2307				/*
2308				 * 'c' is exactly a literal code.
2309				 */
2310				/* Save a decoded code to reference it
2311				 * afterward. */
2312				w_buff[w_pos] = c;
2313				if (++w_pos >= w_size) {
2314					w_pos = 0;
2315					lzh_emit_window(strm, w_size);
2316					goto next_data;
2317				}
2318			}
2319			/* 'c' is the length of a match pattern we have
2320			 * already extracted, which has be stored in
2321			 * window(ds->w_buff). */
2322			copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2323			/* FALL THROUGH */
2324		case ST_GET_POS_1:
2325			/*
2326			 * Get a reference position.
2327			 */
2328			if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2329				if (!last) {
2330					state = ST_GET_POS_1;
2331					ds->copy_len = copy_len;
2332					goto next_data;
2333				}
2334				copy_pos = lzh_decode_huffman(pt,
2335				    lzh_br_bits_forced(&bre, pt_max_bits));
2336				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2337				if (!lzh_br_has(&bre, 0))
2338					goto failed;/* Over read. */
2339			} else {
2340				copy_pos = lzh_decode_huffman(pt,
2341				    lzh_br_bits(&bre, pt_max_bits));
2342				lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2343			}
2344			/* FALL THROUGH */
2345		case ST_GET_POS_2:
2346			if (copy_pos > 1) {
2347				/* We need an additional adjustment number to
2348				 * the position. */
2349				int p = copy_pos - 1;
2350				if (!lzh_br_read_ahead(strm, &bre, p)) {
2351					if (last)
2352						goto failed;/* Truncated data.*/
2353					state = ST_GET_POS_2;
2354					ds->copy_len = copy_len;
2355					ds->copy_pos = copy_pos;
2356					goto next_data;
2357				}
2358				copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2359				lzh_br_consume(&bre, p);
2360			}
2361			/* The position is actually a distance from the last
2362			 * code we had extracted and thus we have to convert
2363			 * it to a position of the window. */
2364			copy_pos = (w_pos - copy_pos - 1) & w_mask;
2365			/* FALL THROUGH */
2366		case ST_COPY_DATA:
2367			/*
2368			 * Copy `copy_len' bytes as extracted data from
2369			 * the window into the output buffer.
2370			 */
2371			for (;;) {
2372				int l;
2373
2374				l = copy_len;
2375				if (copy_pos > w_pos) {
2376					if (l > w_size - copy_pos)
2377						l = w_size - copy_pos;
2378				} else {
2379					if (l > w_size - w_pos)
2380						l = w_size - w_pos;
2381				}
2382				if ((copy_pos + l < w_pos)
2383				    || (w_pos + l < copy_pos)) {
2384					/* No overlap. */
2385					memcpy(w_buff + w_pos,
2386					    w_buff + copy_pos, l);
2387				} else {
2388					const unsigned char *s;
2389					unsigned char *d;
2390					int li;
2391
2392					d = w_buff + w_pos;
2393					s = w_buff + copy_pos;
2394					for (li = 0; li < l-1;) {
2395						d[li] = s[li];li++;
2396						d[li] = s[li];li++;
2397					}
2398					if (li < l)
2399						d[li] = s[li];
2400				}
2401				w_pos += l;
2402				if (w_pos == w_size) {
2403					w_pos = 0;
2404					lzh_emit_window(strm, w_size);
2405					if (copy_len <= l)
2406						state = ST_GET_LITERAL;
2407					else {
2408						state = ST_COPY_DATA;
2409						ds->copy_len = copy_len - l;
2410						ds->copy_pos =
2411						    (copy_pos + l) & w_mask;
2412					}
2413					goto next_data;
2414				}
2415				if (copy_len <= l)
2416					/* A copy of current pattern ended. */
2417					break;
2418				copy_len -= l;
2419				copy_pos = (copy_pos + l) & w_mask;
2420			}
2421			state = ST_GET_LITERAL;
2422			break;
2423		}
2424	}
2425failed:
2426	return (ds->error = ARCHIVE_FAILED);
2427next_data:
2428	ds->br = bre;
2429	ds->blocks_avail = blocks_avail;
2430	ds->state = state;
2431	ds->w_pos = w_pos;
2432	return (ARCHIVE_OK);
2433}
2434
2435static int
2436lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2437{
2438	int bits;
2439
2440	if (hf->bitlen == NULL) {
2441		hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2442		if (hf->bitlen == NULL)
2443			return (ARCHIVE_FATAL);
2444	}
2445	if (hf->tbl == NULL) {
2446		if (tbl_bits < HTBL_BITS)
2447			bits = tbl_bits;
2448		else
2449			bits = HTBL_BITS;
2450		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2451		if (hf->tbl == NULL)
2452			return (ARCHIVE_FATAL);
2453	}
2454	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2455		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2456		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2457		if (hf->tree == NULL)
2458			return (ARCHIVE_FATAL);
2459	}
2460	hf->len_size = (int)len_size;
2461	hf->tbl_bits = tbl_bits;
2462	return (ARCHIVE_OK);
2463}
2464
2465static void
2466lzh_huffman_free(struct huffman *hf)
2467{
2468	free(hf->bitlen);
2469	free(hf->tbl);
2470	free(hf->tree);
2471}
2472
2473static char bitlen_tbl[0x400] = {
2474	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2475	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2476	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2477	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2478	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2479	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2480	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2481	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2482	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2483	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2484	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2485	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2486	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2487	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505	 7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2507	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2508	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2509	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2510	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2511	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2512	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2513	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2514	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2515	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2516	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2517	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2518	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2519	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521	 8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2523	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2524	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2525	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2526	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2527	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2528	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2529	 9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2530	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2531	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2532	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2533	10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2534	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2535	11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2536	12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2537	13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2538};
2539static int
2540lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2541{
2542	struct lzh_dec *ds = strm->ds;
2543	struct lzh_br *br = &(ds->br);
2544	int c, i;
2545
2546	for (i = start; i < end; ) {
2547		/*
2548		 *  bit pattern     the number we need
2549		 *     000           ->  0
2550		 *     001           ->  1
2551		 *     010           ->  2
2552		 *     ...
2553		 *     110           ->  6
2554		 *     1110          ->  7
2555		 *     11110         ->  8
2556		 *     ...
2557		 *     1111111111110 ->  16
2558		 */
2559		if (!lzh_br_read_ahead(strm, br, 3))
2560			return (i);
2561		if ((c = lzh_br_bits(br, 3)) == 7) {
2562			if (!lzh_br_read_ahead(strm, br, 13))
2563				return (i);
2564			c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2565			if (c)
2566				lzh_br_consume(br, c - 3);
2567			else
2568				return (-1);/* Invalid data. */
2569		} else
2570			lzh_br_consume(br, 3);
2571		ds->pt.bitlen[i++] = c;
2572		ds->pt.freq[c]++;
2573	}
2574	return (i);
2575}
2576
2577static int
2578lzh_make_fake_table(struct huffman *hf, uint16_t c)
2579{
2580	if (c >= hf->len_size)
2581		return (0);
2582	hf->tbl[0] = c;
2583	hf->max_bits = 0;
2584	hf->shift_bits = 0;
2585	hf->bitlen[hf->tbl[0]] = 0;
2586	return (1);
2587}
2588
2589/*
2590 * Make a huffman coding table.
2591 */
2592static int
2593lzh_make_huffman_table(struct huffman *hf)
2594{
2595	uint16_t *tbl;
2596	const unsigned char *bitlen;
2597	int bitptn[17], weight[17];
2598	int i, maxbits = 0, ptn, tbl_size, w;
2599	int diffbits, len_avail;
2600
2601	/*
2602	 * Initialize bit patterns.
2603	 */
2604	ptn = 0;
2605	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2606		bitptn[i] = ptn;
2607		weight[i] = w;
2608		if (hf->freq[i]) {
2609			ptn += hf->freq[i] * w;
2610			maxbits = i;
2611		}
2612	}
2613	if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2614		return (0);/* Invalid */
2615
2616	hf->max_bits = maxbits;
2617
2618	/*
2619	 * Cut out extra bits which we won't house in the table.
2620	 * This preparation reduces the same calculation in the for-loop
2621	 * making the table.
2622	 */
2623	if (maxbits < 16) {
2624		int ebits = 16 - maxbits;
2625		for (i = 1; i <= maxbits; i++) {
2626			bitptn[i] >>= ebits;
2627			weight[i] >>= ebits;
2628		}
2629	}
2630	if (maxbits > HTBL_BITS) {
2631		unsigned htbl_max;
2632		uint16_t *p;
2633
2634		diffbits = maxbits - HTBL_BITS;
2635		for (i = 1; i <= HTBL_BITS; i++) {
2636			bitptn[i] >>= diffbits;
2637			weight[i] >>= diffbits;
2638		}
2639		htbl_max = bitptn[HTBL_BITS] +
2640		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2641		p = &(hf->tbl[htbl_max]);
2642		while (p < &hf->tbl[1U<<HTBL_BITS])
2643			*p++ = 0;
2644	} else
2645		diffbits = 0;
2646	hf->shift_bits = diffbits;
2647
2648	/*
2649	 * Make the table.
2650	 */
2651	tbl_size = 1 << HTBL_BITS;
2652	tbl = hf->tbl;
2653	bitlen = hf->bitlen;
2654	len_avail = hf->len_avail;
2655	hf->tree_used = 0;
2656	for (i = 0; i < len_avail; i++) {
2657		uint16_t *p;
2658		int len, cnt;
2659		uint16_t bit;
2660		int extlen;
2661		struct htree_t *ht;
2662
2663		if (bitlen[i] == 0)
2664			continue;
2665		/* Get a bit pattern */
2666		len = bitlen[i];
2667		ptn = bitptn[len];
2668		cnt = weight[len];
2669		if (len <= HTBL_BITS) {
2670			/* Calculate next bit pattern */
2671			if ((bitptn[len] = ptn + cnt) > tbl_size)
2672				return (0);/* Invalid */
2673			/* Update the table */
2674			p = &(tbl[ptn]);
2675			if (cnt > 7) {
2676				uint16_t *pc;
2677
2678				cnt -= 8;
2679				pc = &p[cnt];
2680				pc[0] = (uint16_t)i;
2681				pc[1] = (uint16_t)i;
2682				pc[2] = (uint16_t)i;
2683				pc[3] = (uint16_t)i;
2684				pc[4] = (uint16_t)i;
2685				pc[5] = (uint16_t)i;
2686				pc[6] = (uint16_t)i;
2687				pc[7] = (uint16_t)i;
2688				if (cnt > 7) {
2689					cnt -= 8;
2690					memcpy(&p[cnt], pc,
2691						8 * sizeof(uint16_t));
2692					pc = &p[cnt];
2693					while (cnt > 15) {
2694						cnt -= 16;
2695						memcpy(&p[cnt], pc,
2696							16 * sizeof(uint16_t));
2697					}
2698				}
2699				if (cnt)
2700					memcpy(p, pc, cnt * sizeof(uint16_t));
2701			} else {
2702				while (cnt > 1) {
2703					p[--cnt] = (uint16_t)i;
2704					p[--cnt] = (uint16_t)i;
2705				}
2706				if (cnt)
2707					p[--cnt] = (uint16_t)i;
2708			}
2709			continue;
2710		}
2711
2712		/*
2713		 * A bit length is too big to be housed to a direct table,
2714		 * so we use a tree model for its extra bits.
2715		 */
2716		bitptn[len] = ptn + cnt;
2717		bit = 1U << (diffbits -1);
2718		extlen = len - HTBL_BITS;
2719
2720		p = &(tbl[ptn >> diffbits]);
2721		if (*p == 0) {
2722			*p = len_avail + hf->tree_used;
2723			ht = &(hf->tree[hf->tree_used++]);
2724			if (hf->tree_used > hf->tree_avail)
2725				return (0);/* Invalid */
2726			ht->left = 0;
2727			ht->right = 0;
2728		} else {
2729			if (*p < len_avail ||
2730			    *p >= (len_avail + hf->tree_used))
2731				return (0);/* Invalid */
2732			ht = &(hf->tree[*p - len_avail]);
2733		}
2734		while (--extlen > 0) {
2735			if (ptn & bit) {
2736				if (ht->left < len_avail) {
2737					ht->left = len_avail + hf->tree_used;
2738					ht = &(hf->tree[hf->tree_used++]);
2739					if (hf->tree_used > hf->tree_avail)
2740						return (0);/* Invalid */
2741					ht->left = 0;
2742					ht->right = 0;
2743				} else {
2744					ht = &(hf->tree[ht->left - len_avail]);
2745				}
2746			} else {
2747				if (ht->right < len_avail) {
2748					ht->right = len_avail + hf->tree_used;
2749					ht = &(hf->tree[hf->tree_used++]);
2750					if (hf->tree_used > hf->tree_avail)
2751						return (0);/* Invalid */
2752					ht->left = 0;
2753					ht->right = 0;
2754				} else {
2755					ht = &(hf->tree[ht->right - len_avail]);
2756				}
2757			}
2758			bit >>= 1;
2759		}
2760		if (ptn & bit) {
2761			if (ht->left != 0)
2762				return (0);/* Invalid */
2763			ht->left = (uint16_t)i;
2764		} else {
2765			if (ht->right != 0)
2766				return (0);/* Invalid */
2767			ht->right = (uint16_t)i;
2768		}
2769	}
2770	return (1);
2771}
2772
2773static int
2774lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2775{
2776	struct htree_t *ht;
2777	int extlen;
2778
2779	ht = hf->tree;
2780	extlen = hf->shift_bits;
2781	while (c >= hf->len_avail) {
2782		c -= hf->len_avail;
2783		if (extlen-- <= 0 || c >= hf->tree_used)
2784			return (0);
2785		if (rbits & (1U << extlen))
2786			c = ht[c].left;
2787		else
2788			c = ht[c].right;
2789	}
2790	return (c);
2791}
2792
2793static inline int
2794lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2795{
2796	int c;
2797	/*
2798	 * At first search an index table for a bit pattern.
2799	 * If it fails, search a huffman tree for.
2800	 */
2801	c = hf->tbl[rbits >> hf->shift_bits];
2802	if (c < hf->len_avail || hf->len_avail == 0)
2803		return (c);
2804	/* This bit pattern needs to be found out at a huffman tree. */
2805	return (lzh_decode_huffman_tree(hf, rbits, c));
2806}
2807
2808