1///////////////////////////////////////////////////////////////////////////////
2//
3/// \file       lzma_encoder_optimum_fast.c
4//
5//  Author:     Igor Pavlov
6//
7//  This file has been put into the public domain.
8//  You can do whatever you want with this file.
9//
10///////////////////////////////////////////////////////////////////////////////
11
12#include "lzma_encoder_private.h"
13#include "memcmplen.h"
14
15
16#define change_pair(small_dist, big_dist) \
17	(((big_dist) >> 7) > (small_dist))
18
19
20extern void
21lzma_lzma_optimum_fast(lzma_lzma1_encoder *restrict coder,
22		lzma_mf *restrict mf,
23		uint32_t *restrict back_res, uint32_t *restrict len_res)
24{
25	const uint32_t nice_len = mf->nice_len;
26
27	uint32_t len_main;
28	uint32_t matches_count;
29	if (mf->read_ahead == 0) {
30		len_main = mf_find(mf, &matches_count, coder->matches);
31	} else {
32		assert(mf->read_ahead == 1);
33		len_main = coder->longest_match_length;
34		matches_count = coder->matches_count;
35	}
36
37	const uint8_t *buf = mf_ptr(mf) - 1;
38	const uint32_t buf_avail = my_min(mf_avail(mf) + 1, MATCH_LEN_MAX);
39
40	if (buf_avail < 2) {
41		// There's not enough input left to encode a match.
42		*back_res = UINT32_MAX;
43		*len_res = 1;
44		return;
45	}
46
47	// Look for repeated matches; scan the previous four match distances
48	uint32_t rep_len = 0;
49	uint32_t rep_index = 0;
50
51	for (uint32_t i = 0; i < REPS; ++i) {
52		// Pointer to the beginning of the match candidate
53		const uint8_t *const buf_back = buf - coder->reps[i] - 1;
54
55		// If the first two bytes (2 == MATCH_LEN_MIN) do not match,
56		// this rep is not useful.
57		if (not_equal_16(buf, buf_back))
58			continue;
59
60		// The first two bytes matched.
61		// Calculate the length of the match.
62		const uint32_t len = lzma_memcmplen(
63				buf, buf_back, 2, buf_avail);
64
65		// If we have found a repeated match that is at least
66		// nice_len long, return it immediately.
67		if (len >= nice_len) {
68			*back_res = i;
69			*len_res = len;
70			mf_skip(mf, len - 1);
71			return;
72		}
73
74		if (len > rep_len) {
75			rep_index = i;
76			rep_len = len;
77		}
78	}
79
80	// We didn't find a long enough repeated match. Encode it as a normal
81	// match if the match length is at least nice_len.
82	if (len_main >= nice_len) {
83		*back_res = coder->matches[matches_count - 1].dist + REPS;
84		*len_res = len_main;
85		mf_skip(mf, len_main - 1);
86		return;
87	}
88
89	uint32_t back_main = 0;
90	if (len_main >= 2) {
91		back_main = coder->matches[matches_count - 1].dist;
92
93		while (matches_count > 1 && len_main ==
94				coder->matches[matches_count - 2].len + 1) {
95			if (!change_pair(coder->matches[
96						matches_count - 2].dist,
97					back_main))
98				break;
99
100			--matches_count;
101			len_main = coder->matches[matches_count - 1].len;
102			back_main = coder->matches[matches_count - 1].dist;
103		}
104
105		if (len_main == 2 && back_main >= 0x80)
106			len_main = 1;
107	}
108
109	if (rep_len >= 2) {
110		if (rep_len + 1 >= len_main
111				|| (rep_len + 2 >= len_main
112					&& back_main > (UINT32_C(1) << 9))
113				|| (rep_len + 3 >= len_main
114					&& back_main > (UINT32_C(1) << 15))) {
115			*back_res = rep_index;
116			*len_res = rep_len;
117			mf_skip(mf, rep_len - 1);
118			return;
119		}
120	}
121
122	if (len_main < 2 || buf_avail <= 2) {
123		*back_res = UINT32_MAX;
124		*len_res = 1;
125		return;
126	}
127
128	// Get the matches for the next byte. If we find a better match,
129	// the current byte is encoded as a literal.
130	coder->longest_match_length = mf_find(mf,
131			&coder->matches_count, coder->matches);
132
133	if (coder->longest_match_length >= 2) {
134		const uint32_t new_dist = coder->matches[
135				coder->matches_count - 1].dist;
136
137		if ((coder->longest_match_length >= len_main
138					&& new_dist < back_main)
139				|| (coder->longest_match_length == len_main + 1
140					&& !change_pair(back_main, new_dist))
141				|| (coder->longest_match_length > len_main + 1)
142				|| (coder->longest_match_length + 1 >= len_main
143					&& len_main >= 3
144					&& change_pair(new_dist, back_main))) {
145			*back_res = UINT32_MAX;
146			*len_res = 1;
147			return;
148		}
149	}
150
151	// In contrast to LZMA SDK, dictionary could not have been moved
152	// between mf_find() calls, thus it is safe to just increment
153	// the old buf pointer instead of recalculating it with mf_ptr().
154	++buf;
155
156	const uint32_t limit = my_max(2, len_main - 1);
157
158	for (uint32_t i = 0; i < REPS; ++i) {
159		if (memcmp(buf, buf - coder->reps[i] - 1, limit) == 0) {
160			*back_res = UINT32_MAX;
161			*len_res = 1;
162			return;
163		}
164	}
165
166	*back_res = back_main + REPS;
167	*len_res = len_main;
168	mf_skip(mf, len_main - 2);
169	return;
170}
171