1229159Sadrian/* 2229159Sadrian * XZ decompressor 3229159Sadrian * 4229159Sadrian * Authors: Lasse Collin <lasse.collin@tukaani.org> 5229159Sadrian * Igor Pavlov <http://7-zip.org/> 6229159Sadrian * 7229159Sadrian * This file has been put into the public domain. 8229159Sadrian * You can do whatever you want with this file. 9229159Sadrian */ 10229159Sadrian 11229159Sadrian#ifndef XZ_H 12229159Sadrian#define XZ_H 13229159Sadrian 14229159Sadrian#ifdef __KERNEL__ 15229159Sadrian# include <linux/stddef.h> 16229159Sadrian# include <linux/types.h> 17229159Sadrian#else 18229533Sray#ifdef __FreeBSD__ 19229533Sray# include <sys/stddef.h> 20229533Sray# include <sys/types.h> 21229533Sray#else 22229159Sadrian# include <stddef.h> 23229159Sadrian# include <stdint.h> 24229159Sadrian#endif 25229533Sray#endif 26229159Sadrian 27229159Sadrian#ifdef __cplusplus 28229159Sadrianextern "C" { 29229159Sadrian#endif 30229159Sadrian 31229159Sadrian/* In Linux, this is used to make extern functions static when needed. */ 32229159Sadrian#ifndef XZ_EXTERN 33229159Sadrian# define XZ_EXTERN extern 34229159Sadrian#endif 35229159Sadrian 36229159Sadrian/** 37229159Sadrian * enum xz_mode - Operation mode 38229159Sadrian * 39229159Sadrian * @XZ_SINGLE: Single-call mode. This uses less RAM than 40229159Sadrian * than multi-call modes, because the LZMA2 41229159Sadrian * dictionary doesn't need to be allocated as 42229159Sadrian * part of the decoder state. All required data 43229159Sadrian * structures are allocated at initialization, 44229159Sadrian * so xz_dec_run() cannot return XZ_MEM_ERROR. 45229159Sadrian * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 46229159Sadrian * dictionary buffer. All data structures are 47229159Sadrian * allocated at initialization, so xz_dec_run() 48229159Sadrian * cannot return XZ_MEM_ERROR. 49229159Sadrian * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is 50229159Sadrian * allocated once the required size has been 51229159Sadrian * parsed from the stream headers. If the 52229159Sadrian * allocation fails, xz_dec_run() will return 53229159Sadrian * XZ_MEM_ERROR. 54229159Sadrian * 55229159Sadrian * It is possible to enable support only for a subset of the above 56229159Sadrian * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, 57229159Sadrian * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled 58229159Sadrian * with support for all operation modes, but the preboot code may 59229159Sadrian * be built with fewer features to minimize code size. 60229159Sadrian */ 61229159Sadrianenum xz_mode { 62229159Sadrian XZ_SINGLE, 63229159Sadrian XZ_PREALLOC, 64229159Sadrian XZ_DYNALLOC 65229159Sadrian}; 66229159Sadrian 67229159Sadrian/** 68229159Sadrian * enum xz_ret - Return codes 69229159Sadrian * @XZ_OK: Everything is OK so far. More input or more 70229159Sadrian * output space is required to continue. This 71229159Sadrian * return code is possible only in multi-call mode 72229159Sadrian * (XZ_PREALLOC or XZ_DYNALLOC). 73229159Sadrian * @XZ_STREAM_END: Operation finished successfully. 74229159Sadrian * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding 75229159Sadrian * is still possible in multi-call mode by simply 76229159Sadrian * calling xz_dec_run() again. 77229159Sadrian * Note that this return value is used only if 78229159Sadrian * XZ_DEC_ANY_CHECK was defined at build time, 79229159Sadrian * which is not used in the kernel. Unsupported 80229159Sadrian * check types return XZ_OPTIONS_ERROR if 81229159Sadrian * XZ_DEC_ANY_CHECK was not defined at build time. 82229159Sadrian * @XZ_MEM_ERROR: Allocating memory failed. This return code is 83229159Sadrian * possible only if the decoder was initialized 84229159Sadrian * with XZ_DYNALLOC. The amount of memory that was 85229159Sadrian * tried to be allocated was no more than the 86229159Sadrian * dict_max argument given to xz_dec_init(). 87229159Sadrian * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than 88229159Sadrian * allowed by the dict_max argument given to 89229159Sadrian * xz_dec_init(). This return value is possible 90229159Sadrian * only in multi-call mode (XZ_PREALLOC or 91229159Sadrian * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) 92229159Sadrian * ignores the dict_max argument. 93229159Sadrian * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic 94229159Sadrian * bytes). 95229159Sadrian * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested 96229159Sadrian * compression options. In the decoder this means 97229159Sadrian * that the header CRC32 matches, but the header 98229159Sadrian * itself specifies something that we don't support. 99229159Sadrian * @XZ_DATA_ERROR: Compressed data is corrupt. 100229159Sadrian * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly 101229159Sadrian * different between multi-call and single-call 102229159Sadrian * mode; more information below. 103229159Sadrian * 104229159Sadrian * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls 105229159Sadrian * to XZ code cannot consume any input and cannot produce any new output. 106229159Sadrian * This happens when there is no new input available, or the output buffer 107229159Sadrian * is full while at least one output byte is still pending. Assuming your 108229159Sadrian * code is not buggy, you can get this error only when decoding a compressed 109229159Sadrian * stream that is truncated or otherwise corrupt. 110229159Sadrian * 111229159Sadrian * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer 112229159Sadrian * is too small or the compressed input is corrupt in a way that makes the 113229159Sadrian * decoder produce more output than the caller expected. When it is 114229159Sadrian * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR 115229159Sadrian * is used instead of XZ_BUF_ERROR. 116229159Sadrian */ 117229159Sadrianenum xz_ret { 118229159Sadrian XZ_OK, 119229159Sadrian XZ_STREAM_END, 120229159Sadrian XZ_UNSUPPORTED_CHECK, 121229159Sadrian XZ_MEM_ERROR, 122229159Sadrian XZ_MEMLIMIT_ERROR, 123229159Sadrian XZ_FORMAT_ERROR, 124229159Sadrian XZ_OPTIONS_ERROR, 125229159Sadrian XZ_DATA_ERROR, 126229159Sadrian XZ_BUF_ERROR 127229159Sadrian}; 128229159Sadrian 129229159Sadrian/** 130229159Sadrian * struct xz_buf - Passing input and output buffers to XZ code 131229159Sadrian * @in: Beginning of the input buffer. This may be NULL if and only 132229159Sadrian * if in_pos is equal to in_size. 133229159Sadrian * @in_pos: Current position in the input buffer. This must not exceed 134229159Sadrian * in_size. 135229159Sadrian * @in_size: Size of the input buffer 136229159Sadrian * @out: Beginning of the output buffer. This may be NULL if and only 137229159Sadrian * if out_pos is equal to out_size. 138229159Sadrian * @out_pos: Current position in the output buffer. This must not exceed 139229159Sadrian * out_size. 140229159Sadrian * @out_size: Size of the output buffer 141229159Sadrian * 142229159Sadrian * Only the contents of the output buffer from out[out_pos] onward, and 143229159Sadrian * the variables in_pos and out_pos are modified by the XZ code. 144229159Sadrian */ 145229159Sadrianstruct xz_buf { 146229159Sadrian const uint8_t *in; 147229159Sadrian size_t in_pos; 148229159Sadrian size_t in_size; 149229159Sadrian 150229159Sadrian uint8_t *out; 151229159Sadrian size_t out_pos; 152229159Sadrian size_t out_size; 153229159Sadrian}; 154229159Sadrian 155229159Sadrian/** 156229159Sadrian * struct xz_dec - Opaque type to hold the XZ decoder state 157229159Sadrian */ 158229159Sadrianstruct xz_dec; 159229159Sadrian 160229159Sadrian/** 161229159Sadrian * xz_dec_init() - Allocate and initialize a XZ decoder state 162229159Sadrian * @mode: Operation mode 163229159Sadrian * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for 164229159Sadrian * multi-call decoding. This is ignored in single-call mode 165229159Sadrian * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes 166229159Sadrian * or 2^n + 2^(n-1) bytes (the latter sizes are less common 167229159Sadrian * in practice), so other values for dict_max don't make sense. 168229159Sadrian * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, 169229159Sadrian * 512 KiB, and 1 MiB are probably the only reasonable values, 170229159Sadrian * except for kernel and initramfs images where a bigger 171229159Sadrian * dictionary can be fine and useful. 172229159Sadrian * 173229159Sadrian * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at 174229159Sadrian * once. The caller must provide enough output space or the decoding will 175229159Sadrian * fail. The output space is used as the dictionary buffer, which is why 176229159Sadrian * there is no need to allocate the dictionary as part of the decoder's 177229159Sadrian * internal state. 178229159Sadrian * 179229159Sadrian * Because the output buffer is used as the workspace, streams encoded using 180229159Sadrian * a big dictionary are not a problem in single-call mode. It is enough that 181229159Sadrian * the output buffer is big enough to hold the actual uncompressed data; it 182229159Sadrian * can be smaller than the dictionary size stored in the stream headers. 183229159Sadrian * 184229159Sadrian * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes 185229159Sadrian * of memory is preallocated for the LZMA2 dictionary. This way there is no 186229159Sadrian * risk that xz_dec_run() could run out of memory, since xz_dec_run() will 187229159Sadrian * never allocate any memory. Instead, if the preallocated dictionary is too 188229159Sadrian * small for decoding the given input stream, xz_dec_run() will return 189229159Sadrian * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be 190229159Sadrian * decoded to avoid allocating excessive amount of memory for the dictionary. 191229159Sadrian * 192229159Sadrian * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): 193229159Sadrian * dict_max specifies the maximum allowed dictionary size that xz_dec_run() 194229159Sadrian * may allocate once it has parsed the dictionary size from the stream 195229159Sadrian * headers. This way excessive allocations can be avoided while still 196229159Sadrian * limiting the maximum memory usage to a sane value to prevent running the 197229159Sadrian * system out of memory when decompressing streams from untrusted sources. 198229159Sadrian * 199229159Sadrian * On success, xz_dec_init() returns a pointer to struct xz_dec, which is 200229159Sadrian * ready to be used with xz_dec_run(). If memory allocation fails, 201229159Sadrian * xz_dec_init() returns NULL. 202229159Sadrian */ 203229159SadrianXZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); 204229159Sadrian 205229159Sadrian/** 206229159Sadrian * xz_dec_run() - Run the XZ decoder 207229159Sadrian * @s: Decoder state allocated using xz_dec_init() 208229159Sadrian * @b: Input and output buffers 209229159Sadrian * 210229159Sadrian * The possible return values depend on build options and operation mode. 211229159Sadrian * See enum xz_ret for details. 212229159Sadrian * 213229159Sadrian * Note that if an error occurs in single-call mode (return value is not 214229159Sadrian * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the 215229159Sadrian * contents of the output buffer from b->out[b->out_pos] onward are 216229159Sadrian * undefined. This is true even after XZ_BUF_ERROR, because with some filter 217229159Sadrian * chains, there may be a second pass over the output buffer, and this pass 218229159Sadrian * cannot be properly done if the output buffer is truncated. Thus, you 219229159Sadrian * cannot give the single-call decoder a too small buffer and then expect to 220229159Sadrian * get that amount valid data from the beginning of the stream. You must use 221229159Sadrian * the multi-call decoder if you don't want to uncompress the whole stream. 222229159Sadrian */ 223229159SadrianXZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); 224229159Sadrian 225229159Sadrian/** 226229159Sadrian * xz_dec_reset() - Reset an already allocated decoder state 227229159Sadrian * @s: Decoder state allocated using xz_dec_init() 228229159Sadrian * 229229159Sadrian * This function can be used to reset the multi-call decoder state without 230229159Sadrian * freeing and reallocating memory with xz_dec_end() and xz_dec_init(). 231229159Sadrian * 232229159Sadrian * In single-call mode, xz_dec_reset() is always called in the beginning of 233229159Sadrian * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in 234229159Sadrian * multi-call mode. 235229159Sadrian */ 236229159SadrianXZ_EXTERN void xz_dec_reset(struct xz_dec *s); 237229159Sadrian 238229159Sadrian/** 239229159Sadrian * xz_dec_end() - Free the memory allocated for the decoder state 240229159Sadrian * @s: Decoder state allocated using xz_dec_init(). If s is NULL, 241229159Sadrian * this function does nothing. 242229159Sadrian */ 243229159SadrianXZ_EXTERN void xz_dec_end(struct xz_dec *s); 244229159Sadrian 245229159Sadrian/* 246229159Sadrian * Standalone build (userspace build or in-kernel build for boot time use) 247229159Sadrian * needs a CRC32 implementation. For normal in-kernel use, kernel's own 248229159Sadrian * CRC32 module is used instead, and users of this module don't need to 249229159Sadrian * care about the functions below. 250229159Sadrian */ 251229159Sadrian#ifndef XZ_INTERNAL_CRC32 252229159Sadrian# ifdef __KERNEL__ 253229159Sadrian# define XZ_INTERNAL_CRC32 0 254229159Sadrian# else 255229159Sadrian# define XZ_INTERNAL_CRC32 1 256229159Sadrian# endif 257229159Sadrian#endif 258229159Sadrian 259229159Sadrian#if XZ_INTERNAL_CRC32 260229159Sadrian/* 261229159Sadrian * This must be called before any other xz_* function to initialize 262229159Sadrian * the CRC32 lookup table. 263229159Sadrian */ 264229159SadrianXZ_EXTERN void xz_crc32_init(void); 265229159Sadrian 266229159Sadrian/* 267229159Sadrian * Update CRC32 value using the polynomial from IEEE-802.3. To start a new 268229159Sadrian * calculation, the third argument must be zero. To continue the calculation, 269229159Sadrian * the previously returned value is passed as the third argument. 270229159Sadrian */ 271229159SadrianXZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); 272229159Sadrian#endif 273229159Sadrian 274229159Sadrian#ifdef __cplusplus 275229159Sadrian} 276229159Sadrian#endif 277229159Sadrian 278229159Sadrian#endif 279