index_decoder.c revision 312518
1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       index_decoder.c
4/// \brief      Decodes the Index field
5//
6//  Author:     Lasse Collin
7//
8//  This file has been put into the public domain.
9//  You can do whatever you want with this file.
10//
11///////////////////////////////////////////////////////////////////////////////
12
13#include "index.h"
14#include "check.h"
15
16
17typedef struct {
18	enum {
19		SEQ_INDICATOR,
20		SEQ_COUNT,
21		SEQ_MEMUSAGE,
22		SEQ_UNPADDED,
23		SEQ_UNCOMPRESSED,
24		SEQ_PADDING_INIT,
25		SEQ_PADDING,
26		SEQ_CRC32,
27	} sequence;
28
29	/// Memory usage limit
30	uint64_t memlimit;
31
32	/// Target Index
33	lzma_index *index;
34
35	/// Pointer give by the application, which is set after
36	/// successful decoding.
37	lzma_index **index_ptr;
38
39	/// Number of Records left to decode.
40	lzma_vli count;
41
42	/// The most recent Unpadded Size field
43	lzma_vli unpadded_size;
44
45	/// The most recent Uncompressed Size field
46	lzma_vli uncompressed_size;
47
48	/// Position in integers
49	size_t pos;
50
51	/// CRC32 of the List of Records field
52	uint32_t crc32;
53} lzma_index_coder;
54
55
56static lzma_ret
57index_decode(void *coder_ptr, const lzma_allocator *allocator,
58		const uint8_t *restrict in, size_t *restrict in_pos,
59		size_t in_size,
60		uint8_t *restrict out lzma_attribute((__unused__)),
61		size_t *restrict out_pos lzma_attribute((__unused__)),
62		size_t out_size lzma_attribute((__unused__)),
63		lzma_action action lzma_attribute((__unused__)))
64{
65	lzma_index_coder *coder = coder_ptr;
66
67	// Similar optimization as in index_encoder.c
68	const size_t in_start = *in_pos;
69	lzma_ret ret = LZMA_OK;
70
71	while (*in_pos < in_size)
72	switch (coder->sequence) {
73	case SEQ_INDICATOR:
74		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
75		// LZMA_FORMAT_ERROR, because a typical usage case for Index
76		// decoder is when parsing the Stream backwards. If seeking
77		// backward from the Stream Footer gives us something that
78		// doesn't begin with Index Indicator, the file is considered
79		// corrupt, not "programming error" or "unrecognized file
80		// format". One could argue that the application should
81		// verify the Index Indicator before trying to decode the
82		// Index, but well, I suppose it is simpler this way.
83		if (in[(*in_pos)++] != 0x00)
84			return LZMA_DATA_ERROR;
85
86		coder->sequence = SEQ_COUNT;
87		break;
88
89	case SEQ_COUNT:
90		ret = lzma_vli_decode(&coder->count, &coder->pos,
91				in, in_pos, in_size);
92		if (ret != LZMA_STREAM_END)
93			goto out;
94
95		coder->pos = 0;
96		coder->sequence = SEQ_MEMUSAGE;
97
98	// Fall through
99
100	case SEQ_MEMUSAGE:
101		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
102			ret = LZMA_MEMLIMIT_ERROR;
103			goto out;
104		}
105
106		// Tell the Index handling code how many Records this
107		// Index has to allow it to allocate memory more efficiently.
108		lzma_index_prealloc(coder->index, coder->count);
109
110		ret = LZMA_OK;
111		coder->sequence = coder->count == 0
112				? SEQ_PADDING_INIT : SEQ_UNPADDED;
113		break;
114
115	case SEQ_UNPADDED:
116	case SEQ_UNCOMPRESSED: {
117		lzma_vli *size = coder->sequence == SEQ_UNPADDED
118				? &coder->unpadded_size
119				: &coder->uncompressed_size;
120
121		ret = lzma_vli_decode(size, &coder->pos,
122				in, in_pos, in_size);
123		if (ret != LZMA_STREAM_END)
124			goto out;
125
126		ret = LZMA_OK;
127		coder->pos = 0;
128
129		if (coder->sequence == SEQ_UNPADDED) {
130			// Validate that encoded Unpadded Size isn't too small
131			// or too big.
132			if (coder->unpadded_size < UNPADDED_SIZE_MIN
133					|| coder->unpadded_size
134						> UNPADDED_SIZE_MAX)
135				return LZMA_DATA_ERROR;
136
137			coder->sequence = SEQ_UNCOMPRESSED;
138		} else {
139			// Add the decoded Record to the Index.
140			return_if_error(lzma_index_append(
141					coder->index, allocator,
142					coder->unpadded_size,
143					coder->uncompressed_size));
144
145			// Check if this was the last Record.
146			coder->sequence = --coder->count == 0
147					? SEQ_PADDING_INIT
148					: SEQ_UNPADDED;
149		}
150
151		break;
152	}
153
154	case SEQ_PADDING_INIT:
155		coder->pos = lzma_index_padding_size(coder->index);
156		coder->sequence = SEQ_PADDING;
157
158	// Fall through
159
160	case SEQ_PADDING:
161		if (coder->pos > 0) {
162			--coder->pos;
163			if (in[(*in_pos)++] != 0x00)
164				return LZMA_DATA_ERROR;
165
166			break;
167		}
168
169		// Finish the CRC32 calculation.
170		coder->crc32 = lzma_crc32(in + in_start,
171				*in_pos - in_start, coder->crc32);
172
173		coder->sequence = SEQ_CRC32;
174
175	// Fall through
176
177	case SEQ_CRC32:
178		do {
179			if (*in_pos == in_size)
180				return LZMA_OK;
181
182			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
183					!= in[(*in_pos)++])
184				return LZMA_DATA_ERROR;
185
186		} while (++coder->pos < 4);
187
188		// Decoding was successful, now we can let the application
189		// see the decoded Index.
190		*coder->index_ptr = coder->index;
191
192		// Make index NULL so we don't free it unintentionally.
193		coder->index = NULL;
194
195		return LZMA_STREAM_END;
196
197	default:
198		assert(0);
199		return LZMA_PROG_ERROR;
200	}
201
202out:
203	// Update the CRC32,
204	coder->crc32 = lzma_crc32(in + in_start,
205			*in_pos - in_start, coder->crc32);
206
207	return ret;
208}
209
210
211static void
212index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
213{
214	lzma_index_coder *coder = coder_ptr;
215	lzma_index_end(coder->index, allocator);
216	lzma_free(coder, allocator);
217	return;
218}
219
220
221static lzma_ret
222index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
223		uint64_t *old_memlimit, uint64_t new_memlimit)
224{
225	lzma_index_coder *coder = coder_ptr;
226
227	*memusage = lzma_index_memusage(1, coder->count);
228	*old_memlimit = coder->memlimit;
229
230	if (new_memlimit != 0) {
231		if (new_memlimit < *memusage)
232			return LZMA_MEMLIMIT_ERROR;
233
234		coder->memlimit = new_memlimit;
235	}
236
237	return LZMA_OK;
238}
239
240
241static lzma_ret
242index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
243		lzma_index **i, uint64_t memlimit)
244{
245	// Remember the pointer given by the application. We will set it
246	// to point to the decoded Index only if decoding is successful.
247	// Before that, keep it NULL so that applications can always safely
248	// pass it to lzma_index_end() no matter did decoding succeed or not.
249	coder->index_ptr = i;
250	*i = NULL;
251
252	// We always allocate a new lzma_index.
253	coder->index = lzma_index_init(allocator);
254	if (coder->index == NULL)
255		return LZMA_MEM_ERROR;
256
257	// Initialize the rest.
258	coder->sequence = SEQ_INDICATOR;
259	coder->memlimit = memlimit;
260	coder->count = 0; // Needs to be initialized due to _memconfig().
261	coder->pos = 0;
262	coder->crc32 = 0;
263
264	return LZMA_OK;
265}
266
267
268static lzma_ret
269index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
270		lzma_index **i, uint64_t memlimit)
271{
272	lzma_next_coder_init(&index_decoder_init, next, allocator);
273
274	if (i == NULL || memlimit == 0)
275		return LZMA_PROG_ERROR;
276
277	lzma_index_coder *coder = next->coder;
278	if (coder == NULL) {
279		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
280		if (coder == NULL)
281			return LZMA_MEM_ERROR;
282
283		next->coder = coder;
284		next->code = &index_decode;
285		next->end = &index_decoder_end;
286		next->memconfig = &index_decoder_memconfig;
287		coder->index = NULL;
288	} else {
289		lzma_index_end(coder->index, allocator);
290	}
291
292	return index_decoder_reset(coder, allocator, i, memlimit);
293}
294
295
296extern LZMA_API(lzma_ret)
297lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
298{
299	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
300
301	strm->internal->supported_actions[LZMA_RUN] = true;
302	strm->internal->supported_actions[LZMA_FINISH] = true;
303
304	return LZMA_OK;
305}
306
307
308extern LZMA_API(lzma_ret)
309lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
310		const lzma_allocator *allocator,
311		const uint8_t *in, size_t *in_pos, size_t in_size)
312{
313	// Sanity checks
314	if (i == NULL || memlimit == NULL
315			|| in == NULL || in_pos == NULL || *in_pos > in_size)
316		return LZMA_PROG_ERROR;
317
318	// Initialize the decoder.
319	lzma_index_coder coder;
320	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
321
322	// Store the input start position so that we can restore it in case
323	// of an error.
324	const size_t in_start = *in_pos;
325
326	// Do the actual decoding.
327	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
328			NULL, NULL, 0, LZMA_RUN);
329
330	if (ret == LZMA_STREAM_END) {
331		ret = LZMA_OK;
332	} else {
333		// Something went wrong, free the Index structure and restore
334		// the input position.
335		lzma_index_end(coder.index, allocator);
336		*in_pos = in_start;
337
338		if (ret == LZMA_OK) {
339			// The input is truncated or otherwise corrupt.
340			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
341			// like lzma_vli_decode() does in single-call mode.
342			ret = LZMA_DATA_ERROR;
343
344		} else if (ret == LZMA_MEMLIMIT_ERROR) {
345			// Tell the caller how much memory would have
346			// been needed.
347			*memlimit = lzma_index_memusage(1, coder.count);
348		}
349	}
350
351	return ret;
352}
353