lzma_encoder_optimum_normal.c revision 213700
14Srgrimes///////////////////////////////////////////////////////////////////////////////
2738Sache//
34Srgrimes/// \file       lzma_encoder_optimum_normal.c
4738Sache//
5738Sache//  Author:     Igor Pavlov
6619Srgrimes//
712502Sjulian//  This file has been put into the public domain.
84Srgrimes//  You can do whatever you want with this file.
94Srgrimes//
104Srgrimes///////////////////////////////////////////////////////////////////////////////
114Srgrimes
124Srgrimes#include "lzma_encoder_private.h"
134Srgrimes#include "fastpos.h"
142056Swollman
152056Swollman
162056Swollman////////////
172056Swollman// Prices //
182056Swollman////////////
197090Sbde
202056Swollmanstatic uint32_t
212056Swollmanget_literal_price(const lzma_coder *const coder, const uint32_t pos,
222056Swollman		const uint32_t prev_byte, const bool match_mode,
237090Sbde		uint32_t match_byte, uint32_t symbol)
242056Swollman{
254Srgrimes	const probability *const subcoder = literal_subcoder(coder->literal,
2612502Sjulian			coder->literal_context_bits, coder->literal_pos_mask,
2712502Sjulian			pos, prev_byte);
2812502Sjulian
2912502Sjulian	uint32_t price = 0;
3012502Sjulian
3112502Sjulian	if (!match_mode) {
3212502Sjulian		price = rc_bittree_price(subcoder, 8, symbol);
3312502Sjulian	} else {
3412502Sjulian		uint32_t offset = 0x100;
3510537Sjulian		symbol += UINT32_C(1) << 8;
3610537Sjulian
3710537Sjulian		do {
3812502Sjulian			match_byte <<= 1;
3910537Sjulian
4010653Sdg			const uint32_t match_bit = match_byte & offset;
4110537Sjulian			const uint32_t subcoder_index
4210537Sjulian					= offset + match_bit + (symbol >> 8);
4312502Sjulian			const uint32_t bit = (symbol >> 7) & 1;
4412502Sjulian			price += rc_bit_price(subcoder[subcoder_index], bit);
4512502Sjulian
4612502Sjulian			symbol <<= 1;
4710537Sjulian			offset &= ~(match_byte ^ symbol);
4810537Sjulian
4912502Sjulian		} while (symbol < (UINT32_C(1) << 16));
5012502Sjulian	}
5110537Sjulian
5210537Sjulian	return price;
5312502Sjulian}
5410537Sjulian
554Srgrimes
564Srgrimesstatic inline uint32_t
574Srgrimesget_len_price(const lzma_length_encoder *const lencoder,
584Srgrimes		const uint32_t len, const uint32_t pos_state)
594Srgrimes{
604Srgrimes	// NOTE: Unlike the other price tables, length prices are updated
614Srgrimes	// in lzma_encoder.c
624Srgrimes	return lencoder->prices[pos_state][len - MATCH_LEN_MIN];
634Srgrimes}
644Srgrimes
654Srgrimes
664Srgrimesstatic inline uint32_t
674Srgrimesget_short_rep_price(const lzma_coder *const coder,
684Srgrimes		const lzma_lzma_state state, const uint32_t pos_state)
694Srgrimes{
704Srgrimes	return rc_bit_0_price(coder->is_rep0[state])
714Srgrimes		+ rc_bit_0_price(coder->is_rep0_long[state][pos_state]);
724Srgrimes}
734Srgrimes
744Srgrimes
754Srgrimesstatic inline uint32_t
764Srgrimesget_pure_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
774Srgrimes		const lzma_lzma_state state, uint32_t pos_state)
784Srgrimes{
794Srgrimes	uint32_t price;
808876Srgrimes
814Srgrimes	if (rep_index == 0) {
824Srgrimes		price = rc_bit_0_price(coder->is_rep0[state]);
834Srgrimes		price += rc_bit_1_price(coder->is_rep0_long[state][pos_state]);
844Srgrimes	} else {
854Srgrimes		price = rc_bit_1_price(coder->is_rep0[state]);
864Srgrimes
878876Srgrimes		if (rep_index == 1) {
884Srgrimes			price += rc_bit_0_price(coder->is_rep1[state]);
894Srgrimes		} else {
904Srgrimes			price += rc_bit_1_price(coder->is_rep1[state]);
91766Sache			price += rc_bit_price(coder->is_rep2[state],
92766Sache					rep_index - 2);
934Srgrimes		}
941016Sache	}
95766Sache
96766Sache	return price;
974Srgrimes}
988288Sdg
991016Sache
1004Srgrimesstatic inline uint32_t
1018288Sdgget_rep_price(const lzma_coder *const coder, const uint32_t rep_index,
1028288Sdg		const uint32_t len, const lzma_lzma_state state,
1038288Sdg		const uint32_t pos_state)
1048288Sdg{
1058288Sdg	return get_len_price(&coder->rep_len_encoder, len, pos_state)
1064Srgrimes		+ get_pure_rep_price(coder, rep_index, state, pos_state);
107766Sache}
1084Srgrimes
1094Srgrimes
1104Srgrimesstatic inline uint32_t
1114Srgrimesget_pos_len_price(const lzma_coder *const coder, const uint32_t pos,
1121393Ssos		const uint32_t len, const uint32_t pos_state)
1131393Ssos{
1141393Ssos	const uint32_t len_to_pos_state = get_len_to_pos_state(len);
1151393Ssos	uint32_t price;
1161393Ssos
1171393Ssos	if (pos < FULL_DISTANCES) {
1181393Ssos		price = coder->distances_prices[len_to_pos_state][pos];
1194Srgrimes	} else {
1204Srgrimes		const uint32_t pos_slot = get_pos_slot_2(pos);
1214Srgrimes		price = coder->pos_slot_prices[len_to_pos_state][pos_slot]
1221393Ssos				+ coder->align_prices[pos & ALIGN_MASK];
1234Srgrimes	}
1244Srgrimes
1254Srgrimes	price += get_len_price(&coder->match_len_encoder, len, pos_state);
1264Srgrimes
1274Srgrimes	return price;
1284Srgrimes}
1296152Sache
1306152Sache
1311393Ssosstatic void
1321393Ssosfill_distances_prices(lzma_coder *coder)
1334Srgrimes{
1344Srgrimes	for (uint32_t len_to_pos_state = 0;
1351016Sache			len_to_pos_state < LEN_TO_POS_STATES;
1364Srgrimes			++len_to_pos_state) {
1374Srgrimes
1384Srgrimes		uint32_t *const pos_slot_prices
1394Srgrimes				= coder->pos_slot_prices[len_to_pos_state];
1404Srgrimes
1414Srgrimes		// Price to encode the pos_slot.
1424Srgrimes		for (uint32_t pos_slot = 0;
1434Srgrimes				pos_slot < coder->dist_table_size; ++pos_slot)
1444Srgrimes			pos_slot_prices[pos_slot] = rc_bittree_price(
145738Sache					coder->pos_slot[len_to_pos_state],
1464Srgrimes					POS_SLOT_BITS, pos_slot);
1476152Sache
1486152Sache		// For matches with distance >= FULL_DISTANCES, add the price
1494Srgrimes		// of the direct bits part of the match distance. (Align bits
1504Srgrimes		// are handled by fill_align_prices()).
1514Srgrimes		for (uint32_t pos_slot = END_POS_MODEL_INDEX;
1524Srgrimes				pos_slot < coder->dist_table_size; ++pos_slot)
1534Srgrimes			pos_slot_prices[pos_slot] += rc_direct_price(
154738Sache					((pos_slot >> 1) - 1) - ALIGN_BITS);
155738Sache
1564Srgrimes		// Distances in the range [0, 3] are fully encoded with
1574Srgrimes		// pos_slot, so they are used for coder->distances_prices
1584Srgrimes		// as is.
1594Srgrimes		for (uint32_t i = 0; i < START_POS_MODEL_INDEX; ++i)
1604Srgrimes			coder->distances_prices[len_to_pos_state][i]
1614Srgrimes					= pos_slot_prices[i];
1624Srgrimes	}
1634Srgrimes
1644Srgrimes	// Distances in the range [4, 127] depend on pos_slot and pos_special.
1654Srgrimes	// We do this in a loop separate from the above loop to avoid
1664Srgrimes	// redundant calls to get_pos_slot().
1674Srgrimes	for (uint32_t i = START_POS_MODEL_INDEX; i < FULL_DISTANCES; ++i) {
1684Srgrimes		const uint32_t pos_slot = get_pos_slot(i);
1694Srgrimes		const uint32_t footer_bits = ((pos_slot >> 1) - 1);
1704Srgrimes		const uint32_t base = (2 | (pos_slot & 1)) << footer_bits;
1714Srgrimes		const uint32_t price = rc_bittree_reverse_price(
1724Srgrimes				coder->pos_special + base - pos_slot - 1,
1734Srgrimes				footer_bits, i - base);
1744Srgrimes
1754Srgrimes		for (uint32_t len_to_pos_state = 0;
1764Srgrimes				len_to_pos_state < LEN_TO_POS_STATES;
1774Srgrimes				++len_to_pos_state)
1784Srgrimes			coder->distances_prices[len_to_pos_state][i]
1794Srgrimes					= price + coder->pos_slot_prices[
1804Srgrimes						len_to_pos_state][pos_slot];
1814Srgrimes	}
1824Srgrimes
1834Srgrimes	coder->match_price_count = 0;
1844Srgrimes	return;
1854Srgrimes}
1864Srgrimes
1874Srgrimes
1884Srgrimesstatic void
1894Srgrimesfill_align_prices(lzma_coder *coder)
1904Srgrimes{
1914Srgrimes	for (uint32_t i = 0; i < ALIGN_TABLE_SIZE; ++i)
1924Srgrimes		coder->align_prices[i] = rc_bittree_reverse_price(
1934Srgrimes				coder->pos_align, ALIGN_BITS, i);
1944Srgrimes
1954Srgrimes	coder->align_price_count = 0;
1964Srgrimes	return;
1974Srgrimes}
1984Srgrimes
1994Srgrimes
2004Srgrimes/////////////
2014Srgrimes// Optimal //
2024Srgrimes/////////////
2034Srgrimes
2044Srgrimesstatic inline void
2054Srgrimesmake_literal(lzma_optimal *optimal)
2064Srgrimes{
2074Srgrimes	optimal->back_prev = UINT32_MAX;
2084Srgrimes	optimal->prev_1_is_literal = false;
2094Srgrimes}
2104Srgrimes
2114Srgrimes
2124Srgrimesstatic inline void
2134Srgrimesmake_short_rep(lzma_optimal *optimal)
2144Srgrimes{
2154Srgrimes	optimal->back_prev = 0;
2164Srgrimes	optimal->prev_1_is_literal = false;
217766Sache}
2184Srgrimes
2194Srgrimes
2204Srgrimes#define is_short_rep(optimal) \
2214Srgrimes	((optimal).back_prev == 0)
2224Srgrimes
2234Srgrimes
2241016Sachestatic void
2254Srgrimesbackward(lzma_coder *restrict coder, uint32_t *restrict len_res,
2264Srgrimes		uint32_t *restrict back_res, uint32_t cur)
2274Srgrimes{
2284Srgrimes	coder->opts_end_index = cur;
2294Srgrimes
2304Srgrimes	uint32_t pos_mem = coder->opts[cur].pos_prev;
2314Srgrimes	uint32_t back_mem = coder->opts[cur].back_prev;
2324Srgrimes
233738Sache	do {
2344Srgrimes		if (coder->opts[cur].prev_1_is_literal) {
2354Srgrimes			make_literal(&coder->opts[pos_mem]);
2364Srgrimes			coder->opts[pos_mem].pos_prev = pos_mem - 1;
2374Srgrimes
2388288Sdg			if (coder->opts[cur].prev_2) {
2398288Sdg				coder->opts[pos_mem - 1].prev_1_is_literal
2408288Sdg						= false;
2414Srgrimes				coder->opts[pos_mem - 1].pos_prev
2421016Sache						= coder->opts[cur].pos_prev_2;
2434Srgrimes				coder->opts[pos_mem - 1].back_prev
2444Srgrimes						= coder->opts[cur].back_prev_2;
2454Srgrimes			}
2464Srgrimes		}
2474Srgrimes
2484Srgrimes		const uint32_t pos_prev = pos_mem;
2494Srgrimes		const uint32_t back_cur = back_mem;
250738Sache
2514Srgrimes		back_mem = coder->opts[pos_prev].back_prev;
2524Srgrimes		pos_mem = coder->opts[pos_prev].pos_prev;
2534Srgrimes
2541016Sache		coder->opts[pos_prev].back_prev = back_cur;
2554Srgrimes		coder->opts[pos_prev].pos_prev = cur;
2561016Sache		cur = pos_prev;
2574Srgrimes
2584Srgrimes	} while (cur != 0);
2594Srgrimes
2604Srgrimes	coder->opts_current_index = coder->opts[0].pos_prev;
2614Srgrimes	*len_res = coder->opts[0].pos_prev;
2624Srgrimes	*back_res = coder->opts[0].back_prev;
2634Srgrimes
2644Srgrimes	return;
2654Srgrimes}
2664Srgrimes
2674Srgrimes
2684Srgrimes//////////
2691016Sache// Main //
2704Srgrimes//////////
2714Srgrimes
2724Srgrimesstatic inline uint32_t
2734Srgrimeshelper1(lzma_coder *restrict coder, lzma_mf *restrict mf,
274738Sache		uint32_t *restrict back_res, uint32_t *restrict len_res,
2754Srgrimes		uint32_t position)
2764Srgrimes{
2774Srgrimes	const uint32_t nice_len = mf->nice_len;
2784Srgrimes
2794Srgrimes	uint32_t len_main;
2804Srgrimes	uint32_t matches_count;
2814Srgrimes
2824Srgrimes	if (mf->read_ahead == 0) {
2834Srgrimes		len_main = mf_find(mf, &matches_count, coder->matches);
284738Sache	} else {
2854Srgrimes		assert(mf->read_ahead == 1);
2864Srgrimes		len_main = coder->longest_match_length;
2874Srgrimes		matches_count = coder->matches_count;
2884Srgrimes	}
2894Srgrimes
2904Srgrimes	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
2914Srgrimes	if (buf_avail < 2) {
2924Srgrimes		*back_res = UINT32_MAX;
2934Srgrimes		*len_res = 1;
2944Srgrimes		return UINT32_MAX;
2954Srgrimes	}
2964Srgrimes
2974Srgrimes	const uint8_t *const buf = mf_ptr(mf) - 1;
2984Srgrimes
2994Srgrimes	uint32_t rep_lens[REP_DISTANCES];
3004Srgrimes	uint32_t rep_max_index = 0;
3014Srgrimes
3024Srgrimes	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
3034Srgrimes		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
3044Srgrimes
3054Srgrimes		if (not_equal_16(buf, buf_back)) {
3064Srgrimes			rep_lens[i] = 0;
3074Srgrimes			continue;
3084Srgrimes		}
3094Srgrimes
3104Srgrimes		uint32_t len_test;
3114Srgrimes		for (len_test = 2; len_test < buf_avail
3124Srgrimes				&& buf[len_test] == buf_back[len_test];
3134Srgrimes				++len_test) ;
3144Srgrimes
3154Srgrimes		rep_lens[i] = len_test;
3164Srgrimes		if (len_test > rep_lens[rep_max_index])
3174Srgrimes			rep_max_index = i;
3184Srgrimes	}
3194Srgrimes
3204Srgrimes	if (rep_lens[rep_max_index] >= nice_len) {
3214Srgrimes		*back_res = rep_max_index;
3224Srgrimes		*len_res = rep_lens[rep_max_index];
3234Srgrimes		mf_skip(mf, *len_res - 1);
3244Srgrimes		return UINT32_MAX;
3254Srgrimes	}
3264Srgrimes
3274Srgrimes
3284Srgrimes	if (len_main >= nice_len) {
3294Srgrimes		*back_res = coder->matches[matches_count - 1].dist
3304Srgrimes				+ REP_DISTANCES;
3314Srgrimes		*len_res = len_main;
3324Srgrimes		mf_skip(mf, len_main - 1);
3334Srgrimes		return UINT32_MAX;
3344Srgrimes	}
3354Srgrimes
3364Srgrimes	const uint8_t current_byte = *buf;
3374Srgrimes	const uint8_t match_byte = *(buf - coder->reps[0] - 1);
3384Srgrimes
3394Srgrimes	if (len_main < 2 && current_byte != match_byte
3404Srgrimes			&& rep_lens[rep_max_index] < 2) {
3414Srgrimes		*back_res = UINT32_MAX;
342738Sache		*len_res = 1;
343738Sache		return UINT32_MAX;
344738Sache	}
345738Sache
346738Sache	coder->opts[0].state = coder->state;
347738Sache
348738Sache	const uint32_t pos_state = position & coder->pos_mask;
349738Sache
350738Sache	coder->opts[1].price = rc_bit_0_price(
3514Srgrimes				coder->is_match[coder->state][pos_state])
3521016Sache			+ get_literal_price(coder, position, buf[-1],
353738Sache				!is_literal_state(coder->state),
354738Sache				match_byte, current_byte);
3554Srgrimes
3564Srgrimes	make_literal(&coder->opts[1]);
3574Srgrimes
3584Srgrimes	const uint32_t match_price = rc_bit_1_price(
3594Srgrimes			coder->is_match[coder->state][pos_state]);
3604Srgrimes	const uint32_t rep_match_price = match_price
3614Srgrimes			+ rc_bit_1_price(coder->is_rep[coder->state]);
3624Srgrimes
3634Srgrimes	if (match_byte == current_byte) {
3644Srgrimes		const uint32_t short_rep_price = rep_match_price
3654Srgrimes				+ get_short_rep_price(
3664Srgrimes					coder, coder->state, pos_state);
3674Srgrimes
3684Srgrimes		if (short_rep_price < coder->opts[1].price) {
3694Srgrimes			coder->opts[1].price = short_rep_price;
3704Srgrimes			make_short_rep(&coder->opts[1]);
3714Srgrimes		}
3724Srgrimes	}
3733593Sache
3744Srgrimes	const uint32_t len_end = my_max(len_main, rep_lens[rep_max_index]);
3754Srgrimes
3764Srgrimes	if (len_end < 2) {
3774Srgrimes		*back_res = coder->opts[1].back_prev;
3784Srgrimes		*len_res = 1;
3794Srgrimes		return UINT32_MAX;
3803593Sache	}
3814Srgrimes
3824Srgrimes	coder->opts[1].pos_prev = 0;
3834Srgrimes
3844Srgrimes	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
3854Srgrimes		coder->opts[0].backs[i] = coder->reps[i];
3864Srgrimes
3874Srgrimes	uint32_t len = len_end;
3884Srgrimes	do {
3894Srgrimes		coder->opts[len].price = RC_INFINITY_PRICE;
3904Srgrimes	} while (--len >= 2);
3914Srgrimes
3924Srgrimes
3934Srgrimes	for (uint32_t i = 0; i < REP_DISTANCES; ++i) {
3944Srgrimes		uint32_t rep_len = rep_lens[i];
3954Srgrimes		if (rep_len < 2)
3964Srgrimes			continue;
3974Srgrimes
398738Sache		const uint32_t price = rep_match_price + get_pure_rep_price(
399738Sache				coder, i, coder->state, pos_state);
400738Sache
401738Sache		do {
402738Sache			const uint32_t cur_and_len_price = price
403738Sache					+ get_len_price(
404738Sache						&coder->rep_len_encoder,
4051016Sache						rep_len, pos_state);
406738Sache
4074Srgrimes			if (cur_and_len_price < coder->opts[rep_len].price) {
4084Srgrimes				coder->opts[rep_len].price = cur_and_len_price;
4094Srgrimes				coder->opts[rep_len].pos_prev = 0;
4104Srgrimes				coder->opts[rep_len].back_prev = i;
4114Srgrimes				coder->opts[rep_len].prev_1_is_literal = false;
4124Srgrimes			}
4134Srgrimes		} while (--rep_len >= 2);
4144Srgrimes	}
4154Srgrimes
4164Srgrimes
4174Srgrimes	const uint32_t normal_match_price = match_price
4184Srgrimes			+ rc_bit_0_price(coder->is_rep[coder->state]);
4194Srgrimes
4204Srgrimes	len = rep_lens[0] >= 2 ? rep_lens[0] + 1 : 2;
4214Srgrimes	if (len <= len_main) {
4224Srgrimes		uint32_t i = 0;
4234Srgrimes		while (len > coder->matches[i].len)
4244Srgrimes			++i;
4254Srgrimes
4261016Sache		for(; ; ++len) {
4274Srgrimes			const uint32_t dist = coder->matches[i].dist;
4284Srgrimes			const uint32_t cur_and_len_price = normal_match_price
4294Srgrimes					+ get_pos_len_price(coder,
4304Srgrimes						dist, len, pos_state);
4314Srgrimes
4324Srgrimes			if (cur_and_len_price < coder->opts[len].price) {
433766Sache				coder->opts[len].price = cur_and_len_price;
4344Srgrimes				coder->opts[len].pos_prev = 0;
4354Srgrimes				coder->opts[len].back_prev
4364Srgrimes						= dist + REP_DISTANCES;
4374Srgrimes				coder->opts[len].prev_1_is_literal = false;
4384Srgrimes			}
4394Srgrimes
4404Srgrimes			if (len == coder->matches[i].len)
4414Srgrimes				if (++i == matches_count)
4424Srgrimes					break;
4434Srgrimes		}
4444Srgrimes	}
4454Srgrimes
4464Srgrimes	return len_end;
4474Srgrimes}
4484Srgrimes
4494Srgrimes
4504Srgrimesstatic inline uint32_t
4514Srgrimeshelper2(lzma_coder *coder, uint32_t *reps, const uint8_t *buf,
4524Srgrimes		uint32_t len_end, uint32_t position, const uint32_t cur,
4534Srgrimes		const uint32_t nice_len, const uint32_t buf_avail_full)
4544Srgrimes{
4554Srgrimes	uint32_t matches_count = coder->matches_count;
4564Srgrimes	uint32_t new_len = coder->longest_match_length;
4574Srgrimes	uint32_t pos_prev = coder->opts[cur].pos_prev;
4584Srgrimes	lzma_lzma_state state;
4594Srgrimes
4604Srgrimes	if (coder->opts[cur].prev_1_is_literal) {
4614Srgrimes		--pos_prev;
4624Srgrimes
4634Srgrimes		if (coder->opts[cur].prev_2) {
4644Srgrimes			state = coder->opts[coder->opts[cur].pos_prev_2].state;
4654Srgrimes
466738Sache			if (coder->opts[cur].back_prev_2 < REP_DISTANCES)
467738Sache				update_long_rep(state);
4684Srgrimes			else
46910624Sbde				update_match(state);
47010624Sbde
47110624Sbde		} else {
47210624Sbde			state = coder->opts[pos_prev].state;
47310624Sbde		}
4744Srgrimes
4754Srgrimes		update_literal(state);
476738Sache
4774Srgrimes	} else {
4784Srgrimes		state = coder->opts[pos_prev].state;
4794Srgrimes	}
4804Srgrimes
4814Srgrimes	if (pos_prev == cur - 1) {
4824Srgrimes		if (is_short_rep(coder->opts[cur]))
4834Srgrimes			update_short_rep(state);
4844Srgrimes		else
485738Sache			update_literal(state);
486738Sache	} else {
487738Sache		uint32_t pos;
4884Srgrimes		if (coder->opts[cur].prev_1_is_literal
4894Srgrimes				&& coder->opts[cur].prev_2) {
490738Sache			pos_prev = coder->opts[cur].pos_prev_2;
491738Sache			pos = coder->opts[cur].back_prev_2;
4924Srgrimes			update_long_rep(state);
4934Srgrimes		} else {
4944Srgrimes			pos = coder->opts[cur].back_prev;
49510624Sbde			if (pos < REP_DISTANCES)
496738Sache				update_long_rep(state);
497738Sache			else
49810624Sbde				update_match(state);
4994Srgrimes		}
5004Srgrimes
5014Srgrimes		if (pos < REP_DISTANCES) {
5024Srgrimes			reps[0] = coder->opts[pos_prev].backs[pos];
5034Srgrimes
5044Srgrimes			uint32_t i;
5054Srgrimes			for (i = 1; i <= pos; ++i)
5064Srgrimes				reps[i] = coder->opts[pos_prev].backs[i - 1];
507738Sache
508738Sache			for (; i < REP_DISTANCES; ++i)
5094Srgrimes				reps[i] = coder->opts[pos_prev].backs[i];
5104Srgrimes
511738Sache		} else {
512738Sache			reps[0] = pos - REP_DISTANCES;
513738Sache
514738Sache			for (uint32_t i = 1; i < REP_DISTANCES; ++i)
515738Sache				reps[i] = coder->opts[pos_prev].backs[i - 1];
5164Srgrimes		}
517738Sache	}
5181016Sache
5194Srgrimes	coder->opts[cur].state = state;
5204Srgrimes
5214Srgrimes	for (uint32_t i = 0; i < REP_DISTANCES; ++i)
5224Srgrimes		coder->opts[cur].backs[i] = reps[i];
52310624Sbde
52410624Sbde	const uint32_t cur_price = coder->opts[cur].price;
52510624Sbde
52610624Sbde	const uint8_t current_byte = *buf;
52710624Sbde	const uint8_t match_byte = *(buf - reps[0] - 1);
5284Srgrimes
5294Srgrimes	const uint32_t pos_state = position & coder->pos_mask;
530738Sache
5314Srgrimes	const uint32_t cur_and_1_price = cur_price
5324Srgrimes			+ rc_bit_0_price(coder->is_match[state][pos_state])
5334Srgrimes			+ get_literal_price(coder, position, buf[-1],
5344Srgrimes			!is_literal_state(state), match_byte, current_byte);
5354Srgrimes
5364Srgrimes	bool next_is_literal = false;
537766Sache
538766Sache	if (cur_and_1_price < coder->opts[cur + 1].price) {
5394Srgrimes		coder->opts[cur + 1].price = cur_and_1_price;
540738Sache		coder->opts[cur + 1].pos_prev = cur;
541738Sache		make_literal(&coder->opts[cur + 1]);
5424Srgrimes		next_is_literal = true;
5434Srgrimes	}
5444Srgrimes
54510624Sbde	const uint32_t match_price = cur_price
54610624Sbde			+ rc_bit_1_price(coder->is_match[state][pos_state]);
54710624Sbde	const uint32_t rep_match_price = match_price
54810624Sbde			+ rc_bit_1_price(coder->is_rep[state]);
54910624Sbde
55010624Sbde	if (match_byte == current_byte
5514Srgrimes			&& !(coder->opts[cur + 1].pos_prev < cur
5524Srgrimes				&& coder->opts[cur + 1].back_prev == 0)) {
553738Sache
5544Srgrimes		const uint32_t short_rep_price = rep_match_price
5554Srgrimes				+ get_short_rep_price(coder, state, pos_state);
5564Srgrimes
5574Srgrimes		if (short_rep_price <= coder->opts[cur + 1].price) {
5584Srgrimes			coder->opts[cur + 1].price = short_rep_price;
5594Srgrimes			coder->opts[cur + 1].pos_prev = cur;
5604Srgrimes			make_short_rep(&coder->opts[cur + 1]);
5614Srgrimes			next_is_literal = true;
5624Srgrimes		}
5631016Sache	}
5644Srgrimes
5651016Sache	if (buf_avail_full < 2)
5661016Sache		return len_end;
5674Srgrimes
5684Srgrimes	const uint32_t buf_avail = my_min(buf_avail_full, nice_len);
5694Srgrimes
5704Srgrimes	if (!next_is_literal && match_byte != current_byte) { // speed optimization
5714Srgrimes		// try literal + rep0
5724Srgrimes		const uint8_t *const buf_back = buf - reps[0] - 1;
5734Srgrimes		const uint32_t limit = my_min(buf_avail_full, nice_len + 1);
5744Srgrimes
5754Srgrimes		uint32_t len_test = 1;
5764Srgrimes		while (len_test < limit && buf[len_test] == buf_back[len_test])
5774Srgrimes			++len_test;
5784Srgrimes
5794Srgrimes		--len_test;
5804Srgrimes
5811016Sache		if (len_test >= 2) {
5824Srgrimes			lzma_lzma_state state_2 = state;
5831016Sache			update_literal(state_2);
5844Srgrimes
585738Sache			const uint32_t pos_state_next = (position + 1) & coder->pos_mask;
5864Srgrimes			const uint32_t next_rep_match_price = cur_and_1_price
587738Sache					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
5884Srgrimes					+ rc_bit_1_price(coder->is_rep[state_2]);
5894Srgrimes
59012502Sjulian			//for (; len_test >= 2; --len_test) {
59112502Sjulian			const uint32_t offset = cur + 1 + len_test;
59212502Sjulian
59312502Sjulian			while (len_end < offset)
59412502Sjulian				coder->opts[++len_end].price = RC_INFINITY_PRICE;
59512502Sjulian
59612502Sjulian			const uint32_t cur_and_len_price = next_rep_match_price
59712502Sjulian					+ get_rep_price(coder, 0, len_test,
59812502Sjulian						state_2, pos_state_next);
59912502Sjulian
60012502Sjulian			if (cur_and_len_price < coder->opts[offset].price) {
60112502Sjulian				coder->opts[offset].price = cur_and_len_price;
60212502Sjulian				coder->opts[offset].pos_prev = cur + 1;
60312502Sjulian				coder->opts[offset].back_prev = 0;
60412502Sjulian				coder->opts[offset].prev_1_is_literal = true;
60512502Sjulian				coder->opts[offset].prev_2 = false;
60612502Sjulian			}
60712502Sjulian			//}
60812502Sjulian		}
60912502Sjulian	}
61012502Sjulian
61112502Sjulian
6124Srgrimes	uint32_t start_len = 2; // speed optimization
6134Srgrimes
614	for (uint32_t rep_index = 0; rep_index < REP_DISTANCES; ++rep_index) {
615		const uint8_t *const buf_back = buf - reps[rep_index] - 1;
616		if (not_equal_16(buf, buf_back))
617			continue;
618
619		uint32_t len_test;
620		for (len_test = 2; len_test < buf_avail
621				&& buf[len_test] == buf_back[len_test];
622				++len_test) ;
623
624		while (len_end < cur + len_test)
625			coder->opts[++len_end].price = RC_INFINITY_PRICE;
626
627		const uint32_t len_test_temp = len_test;
628		const uint32_t price = rep_match_price + get_pure_rep_price(
629				coder, rep_index, state, pos_state);
630
631		do {
632			const uint32_t cur_and_len_price = price
633					+ get_len_price(&coder->rep_len_encoder,
634							len_test, pos_state);
635
636			if (cur_and_len_price < coder->opts[cur + len_test].price) {
637				coder->opts[cur + len_test].price = cur_and_len_price;
638				coder->opts[cur + len_test].pos_prev = cur;
639				coder->opts[cur + len_test].back_prev = rep_index;
640				coder->opts[cur + len_test].prev_1_is_literal = false;
641			}
642		} while (--len_test >= 2);
643
644		len_test = len_test_temp;
645
646		if (rep_index == 0)
647			start_len = len_test + 1;
648
649
650		uint32_t len_test_2 = len_test + 1;
651		const uint32_t limit = my_min(buf_avail_full,
652				len_test_2 + nice_len);
653		for (; len_test_2 < limit
654				&& buf[len_test_2] == buf_back[len_test_2];
655				++len_test_2) ;
656
657		len_test_2 -= len_test + 1;
658
659		if (len_test_2 >= 2) {
660			lzma_lzma_state state_2 = state;
661			update_long_rep(state_2);
662
663			uint32_t pos_state_next = (position + len_test) & coder->pos_mask;
664
665			const uint32_t cur_and_len_literal_price = price
666					+ get_len_price(&coder->rep_len_encoder,
667						len_test, pos_state)
668					+ rc_bit_0_price(coder->is_match[state_2][pos_state_next])
669					+ get_literal_price(coder, position + len_test,
670						buf[len_test - 1], true,
671						buf_back[len_test], buf[len_test]);
672
673			update_literal(state_2);
674
675			pos_state_next = (position + len_test + 1) & coder->pos_mask;
676
677			const uint32_t next_rep_match_price = cur_and_len_literal_price
678					+ rc_bit_1_price(coder->is_match[state_2][pos_state_next])
679					+ rc_bit_1_price(coder->is_rep[state_2]);
680
681			//for(; len_test_2 >= 2; len_test_2--) {
682			const uint32_t offset = cur + len_test + 1 + len_test_2;
683
684			while (len_end < offset)
685				coder->opts[++len_end].price = RC_INFINITY_PRICE;
686
687			const uint32_t cur_and_len_price = next_rep_match_price
688					+ get_rep_price(coder, 0, len_test_2,
689						state_2, pos_state_next);
690
691			if (cur_and_len_price < coder->opts[offset].price) {
692				coder->opts[offset].price = cur_and_len_price;
693				coder->opts[offset].pos_prev = cur + len_test + 1;
694				coder->opts[offset].back_prev = 0;
695				coder->opts[offset].prev_1_is_literal = true;
696				coder->opts[offset].prev_2 = true;
697				coder->opts[offset].pos_prev_2 = cur;
698				coder->opts[offset].back_prev_2 = rep_index;
699			}
700			//}
701		}
702	}
703
704
705	//for (uint32_t len_test = 2; len_test <= new_len; ++len_test)
706	if (new_len > buf_avail) {
707		new_len = buf_avail;
708
709		matches_count = 0;
710		while (new_len > coder->matches[matches_count].len)
711			++matches_count;
712
713		coder->matches[matches_count++].len = new_len;
714	}
715
716
717	if (new_len >= start_len) {
718		const uint32_t normal_match_price = match_price
719				+ rc_bit_0_price(coder->is_rep[state]);
720
721		while (len_end < cur + new_len)
722			coder->opts[++len_end].price = RC_INFINITY_PRICE;
723
724		uint32_t i = 0;
725		while (start_len > coder->matches[i].len)
726			++i;
727
728		for (uint32_t len_test = start_len; ; ++len_test) {
729			const uint32_t cur_back = coder->matches[i].dist;
730			uint32_t cur_and_len_price = normal_match_price
731					+ get_pos_len_price(coder,
732						cur_back, len_test, pos_state);
733
734			if (cur_and_len_price < coder->opts[cur + len_test].price) {
735				coder->opts[cur + len_test].price = cur_and_len_price;
736				coder->opts[cur + len_test].pos_prev = cur;
737				coder->opts[cur + len_test].back_prev
738						= cur_back + REP_DISTANCES;
739				coder->opts[cur + len_test].prev_1_is_literal = false;
740			}
741
742			if (len_test == coder->matches[i].len) {
743				// Try Match + Literal + Rep0
744				const uint8_t *const buf_back = buf - cur_back - 1;
745				uint32_t len_test_2 = len_test + 1;
746				const uint32_t limit = my_min(buf_avail_full,
747						len_test_2 + nice_len);
748
749				for (; len_test_2 < limit &&
750						buf[len_test_2] == buf_back[len_test_2];
751						++len_test_2) ;
752
753				len_test_2 -= len_test + 1;
754
755				if (len_test_2 >= 2) {
756					lzma_lzma_state state_2 = state;
757					update_match(state_2);
758					uint32_t pos_state_next
759							= (position + len_test) & coder->pos_mask;
760
761					const uint32_t cur_and_len_literal_price = cur_and_len_price
762							+ rc_bit_0_price(
763								coder->is_match[state_2][pos_state_next])
764							+ get_literal_price(coder,
765								position + len_test,
766								buf[len_test - 1],
767								true,
768								buf_back[len_test],
769								buf[len_test]);
770
771					update_literal(state_2);
772					pos_state_next = (pos_state_next + 1) & coder->pos_mask;
773
774					const uint32_t next_rep_match_price
775							= cur_and_len_literal_price
776							+ rc_bit_1_price(
777								coder->is_match[state_2][pos_state_next])
778							+ rc_bit_1_price(coder->is_rep[state_2]);
779
780					// for(; len_test_2 >= 2; --len_test_2) {
781					const uint32_t offset = cur + len_test + 1 + len_test_2;
782
783					while (len_end < offset)
784						coder->opts[++len_end].price = RC_INFINITY_PRICE;
785
786					cur_and_len_price = next_rep_match_price
787							+ get_rep_price(coder, 0, len_test_2,
788								state_2, pos_state_next);
789
790					if (cur_and_len_price < coder->opts[offset].price) {
791						coder->opts[offset].price = cur_and_len_price;
792						coder->opts[offset].pos_prev = cur + len_test + 1;
793						coder->opts[offset].back_prev = 0;
794						coder->opts[offset].prev_1_is_literal = true;
795						coder->opts[offset].prev_2 = true;
796						coder->opts[offset].pos_prev_2 = cur;
797						coder->opts[offset].back_prev_2
798								= cur_back + REP_DISTANCES;
799					}
800					//}
801				}
802
803				if (++i == matches_count)
804					break;
805			}
806		}
807	}
808
809	return len_end;
810}
811
812
813extern void
814lzma_lzma_optimum_normal(lzma_coder *restrict coder, lzma_mf *restrict mf,
815		uint32_t *restrict back_res, uint32_t *restrict len_res,
816		uint32_t position)
817{
818	// If we have symbols pending, return the next pending symbol.
819	if (coder->opts_end_index != coder->opts_current_index) {
820		assert(mf->read_ahead > 0);
821		*len_res = coder->opts[coder->opts_current_index].pos_prev
822				- coder->opts_current_index;
823		*back_res = coder->opts[coder->opts_current_index].back_prev;
824		coder->opts_current_index = coder->opts[
825				coder->opts_current_index].pos_prev;
826		return;
827	}
828
829	// Update the price tables. In LZMA SDK <= 4.60 (and possibly later)
830	// this was done in both initialization function and in the main loop.
831	// In liblzma they were moved into this single place.
832	if (mf->read_ahead == 0) {
833		if (coder->match_price_count >= (1 << 7))
834			fill_distances_prices(coder);
835
836		if (coder->align_price_count >= ALIGN_TABLE_SIZE)
837			fill_align_prices(coder);
838	}
839
840	// TODO: This needs quite a bit of cleaning still. But splitting
841	// the original function into two pieces makes it at least a little
842	// more readable, since those two parts don't share many variables.
843
844	uint32_t len_end = helper1(coder, mf, back_res, len_res, position);
845	if (len_end == UINT32_MAX)
846		return;
847
848	uint32_t reps[REP_DISTANCES];
849	memcpy(reps, coder->reps, sizeof(reps));
850
851	uint32_t cur;
852	for (cur = 1; cur < len_end; ++cur) {
853		assert(cur < OPTS);
854
855		coder->longest_match_length = mf_find(
856				mf, &coder->matches_count, coder->matches);
857
858		if (coder->longest_match_length >= mf->nice_len)
859			break;
860
861		len_end = helper2(coder, reps, mf_ptr(mf) - 1, len_end,
862				position + cur, cur, mf->nice_len,
863				my_min(mf_avail(mf) + 1, OPTS - 1 - cur));
864	}
865
866	backward(coder, len_res, back_res, cur);
867	return;
868}
869