1
0
mirror of https://git.tukaani.org/xz.git synced 2025-12-25 23:08:44 +00:00
xz/src/liblzma/lzma/lzma2_decoder.c
Lasse Collin 0d21f49a80 liblzma: Fix decoding of LZMA2 streams having no uncompressed data.
The decoder considered empty LZMA2 streams to be corrupt.
This shouldn't matter much with .xz files, because no encoder
creates empty LZMA2 streams in .xz. This bug is more likely
to cause problems in applications that use raw LZMA2 streams.
2011-03-31 11:54:48 +03:00

305 lines
7.2 KiB
C

///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma2_decoder.c
/// \brief LZMA2 decoder
///
// Authors: Igor Pavlov
// Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include "lzma2_decoder.h"
#include "lz_decoder.h"
#include "lzma_decoder.h"
struct lzma_coder_s {
enum sequence {
SEQ_CONTROL,
SEQ_UNCOMPRESSED_1,
SEQ_UNCOMPRESSED_2,
SEQ_COMPRESSED_0,
SEQ_COMPRESSED_1,
SEQ_PROPERTIES,
SEQ_LZMA,
SEQ_COPY,
} sequence;
/// Sequence after the size fields have been decoded.
enum sequence next_sequence;
/// LZMA decoder
lzma_lz_decoder lzma;
/// Uncompressed size of LZMA chunk
size_t uncompressed_size;
/// Compressed size of the chunk (naturally equals to uncompressed
/// size of uncompressed chunk)
size_t compressed_size;
/// True if properties are needed. This is false before the
/// first LZMA chunk.
bool need_properties;
/// True if dictionary reset is needed. This is false before the
/// first chunk (LZMA or uncompressed).
bool need_dictionary_reset;
lzma_options_lzma options;
};
static lzma_ret
lzma2_decode(lzma_coder *restrict coder, lzma_dict *restrict dict,
const uint8_t *restrict in, size_t *restrict in_pos,
size_t in_size)
{
// With SEQ_LZMA it is possible that no new input is needed to do
// some progress. The rest of the sequences assume that there is
// at least one byte of input.
while (*in_pos < in_size || coder->sequence == SEQ_LZMA)
switch (coder->sequence) {
case SEQ_CONTROL: {
const uint32_t control = in[*in_pos];
++*in_pos;
// End marker
if (control == 0x00)
return LZMA_STREAM_END;
if (control >= 0xE0 || control == 1) {
// Dictionary reset implies that next LZMA chunk has
// to set new properties.
coder->need_properties = true;
coder->need_dictionary_reset = true;
} else if (coder->need_dictionary_reset) {
return LZMA_DATA_ERROR;
}
if (control >= 0x80) {
// LZMA chunk. The highest five bits of the
// uncompressed size are taken from the control byte.
coder->uncompressed_size = (control & 0x1F) << 16;
coder->sequence = SEQ_UNCOMPRESSED_1;
// See if there are new properties or if we need to
// reset the state.
if (control >= 0xC0) {
// When there are new properties, state reset
// is done at SEQ_PROPERTIES.
coder->need_properties = false;
coder->next_sequence = SEQ_PROPERTIES;
} else if (coder->need_properties) {
return LZMA_DATA_ERROR;
} else {
coder->next_sequence = SEQ_LZMA;
// If only state reset is wanted with old
// properties, do the resetting here for
// simplicity.
if (control >= 0xA0)
coder->lzma.reset(coder->lzma.coder,
&coder->options);
}
} else {
// Invalid control values
if (control > 2)
return LZMA_DATA_ERROR;
// It's uncompressed chunk
coder->sequence = SEQ_COMPRESSED_0;
coder->next_sequence = SEQ_COPY;
}
if (coder->need_dictionary_reset) {
// Finish the dictionary reset and let the caller
// flush the dictionary to the actual output buffer.
coder->need_dictionary_reset = false;
dict_reset(dict);
return LZMA_OK;
}
break;
}
case SEQ_UNCOMPRESSED_1:
coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8;
coder->sequence = SEQ_UNCOMPRESSED_2;
break;
case SEQ_UNCOMPRESSED_2:
coder->uncompressed_size += in[(*in_pos)++] + 1;
coder->sequence = SEQ_COMPRESSED_0;
coder->lzma.set_uncompressed(coder->lzma.coder,
coder->uncompressed_size);
break;
case SEQ_COMPRESSED_0:
coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8;
coder->sequence = SEQ_COMPRESSED_1;
break;
case SEQ_COMPRESSED_1:
coder->compressed_size += in[(*in_pos)++] + 1;
coder->sequence = coder->next_sequence;
break;
case SEQ_PROPERTIES:
if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++]))
return LZMA_DATA_ERROR;
coder->lzma.reset(coder->lzma.coder, &coder->options);
coder->sequence = SEQ_LZMA;
break;
case SEQ_LZMA: {
// Store the start offset so that we can update
// coder->compressed_size later.
const size_t in_start = *in_pos;
// Decode from in[] to *dict.
const lzma_ret ret = coder->lzma.code(coder->lzma.coder,
dict, in, in_pos, in_size);
// Validate and update coder->compressed_size.
const size_t in_used = *in_pos - in_start;
if (in_used > coder->compressed_size)
return LZMA_DATA_ERROR;
coder->compressed_size -= in_used;
// Return if we didn't finish the chunk, or an error occurred.
if (ret != LZMA_STREAM_END)
return ret;
// The LZMA decoder must have consumed the whole chunk now.
// We don't need to worry about uncompressed size since it
// is checked by the LZMA decoder.
if (coder->compressed_size != 0)
return LZMA_DATA_ERROR;
coder->sequence = SEQ_CONTROL;
break;
}
case SEQ_COPY: {
// Copy from input to the dictionary as is.
dict_write(dict, in, in_pos, in_size, &coder->compressed_size);
if (coder->compressed_size != 0)
return LZMA_OK;
coder->sequence = SEQ_CONTROL;
break;
}
default:
assert(0);
return LZMA_PROG_ERROR;
}
return LZMA_OK;
}
static void
lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator)
{
assert(coder->lzma.end == NULL);
lzma_free(coder->lzma.coder, allocator);
lzma_free(coder, allocator);
return;
}
static lzma_ret
lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator,
const void *opt, lzma_lz_options *lz_options)
{
if (lz->coder == NULL) {
lz->coder = lzma_alloc(sizeof(lzma_coder), allocator);
if (lz->coder == NULL)
return LZMA_MEM_ERROR;
lz->code = &lzma2_decode;
lz->end = &lzma2_decoder_end;
lz->coder->lzma = LZMA_LZ_DECODER_INIT;
}
const lzma_options_lzma *options = opt;
lz->coder->sequence = SEQ_CONTROL;
lz->coder->need_properties = true;
lz->coder->need_dictionary_reset = options->preset_dict == NULL
|| options->preset_dict_size == 0;
return lzma_lzma_decoder_create(&lz->coder->lzma,
allocator, options, lz_options);
}
extern lzma_ret
lzma_lzma2_decoder_init(lzma_next_coder *next, lzma_allocator *allocator,
const lzma_filter_info *filters)
{
// LZMA2 can only be the last filter in the chain. This is enforced
// by the raw_decoder initialization.
assert(filters[1].init == NULL);
return lzma_lz_decoder_init(next, allocator, filters,
&lzma2_decoder_init);
}
extern uint64_t
lzma_lzma2_decoder_memusage(const void *options)
{
return sizeof(lzma_coder)
+ lzma_lzma_decoder_memusage_nocheck(options);
}
extern lzma_ret
lzma_lzma2_props_decode(void **options, lzma_allocator *allocator,
const uint8_t *props, size_t props_size)
{
if (props_size != 1)
return LZMA_OPTIONS_ERROR;
// Check that reserved bits are unset.
if (props[0] & 0xC0)
return LZMA_OPTIONS_ERROR;
// Decode the dictionary size.
if (props[0] > 40)
return LZMA_OPTIONS_ERROR;
lzma_options_lzma *opt = lzma_alloc(
sizeof(lzma_options_lzma), allocator);
if (opt == NULL)
return LZMA_MEM_ERROR;
if (props[0] == 40) {
opt->dict_size = UINT32_MAX;
} else {
opt->dict_size = 2 | (props[0] & 1);
opt->dict_size <<= props[0] / 2 + 11;
}
opt->preset_dict = NULL;
opt->preset_dict_size = 0;
*options = opt;
return LZMA_OK;
}