1184610Salfred// SPDX-License-Identifier: 0BSD
2184610Salfred
3184610Salfred///////////////////////////////////////////////////////////////////////////////
4184610Salfred//
5184610Salfred/// \file       fastpos.h
6184610Salfred/// \brief      Kind of two-bit version of bit scan reverse
7184610Salfred///
8184610Salfred//  Authors:    Igor Pavlov
9184610Salfred//              Lasse Collin
10184610Salfred//
11184610Salfred///////////////////////////////////////////////////////////////////////////////
12184610Salfred
13184610Salfred#ifndef LZMA_FASTPOS_H
14184610Salfred#define LZMA_FASTPOS_H
15184610Salfred
16184610Salfred// LZMA encodes match distances by storing the highest two bits using
17184610Salfred// a six-bit value [0, 63], and then the missing lower bits.
18184610Salfred// Dictionary size is also stored using this encoding in the .xz
19184610Salfred// file format header.
20184610Salfred//
21184610Salfred// fastpos.h provides a way to quickly find out the correct six-bit
22184610Salfred// values. The following table gives some examples of this encoding:
23184610Salfred//
24184610Salfred//     dist   return
25184610Salfred//       0       0
26184610Salfred//       1       1
27246122Shselasky//       2       2
28246122Shselasky//       3       3
29246122Shselasky//       4       4
30194677Sthompsa//       5       4
31194677Sthompsa//       6       5
32194677Sthompsa//       7       5
33194677Sthompsa//       8       6
34194677Sthompsa//      11       6
35194677Sthompsa//      12       7
36194677Sthompsa//     ...      ...
37194677Sthompsa//      15       7
38194677Sthompsa//      16       8
39194677Sthompsa//      17       8
40194677Sthompsa//     ...      ...
41194677Sthompsa//      23       8
42194677Sthompsa//      24       9
43194677Sthompsa//      25       9
44194677Sthompsa//     ...      ...
45194677Sthompsa//
46194677Sthompsa//
47194677Sthompsa// Provided functions or macros
48194677Sthompsa// ----------------------------
49194677Sthompsa//
50194677Sthompsa// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
51194677Sthompsa// assumes that dist >= FULL_DISTANCES, thus the result is at least
52188942Sthompsa// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
53246360Shselasky// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
54246360Shselasky// should be tiny bit faster due to the assumption being made.
55188942Sthompsa//
56188942Sthompsa//
57184610Salfred// Size vs. speed
58184610Salfred// --------------
59184610Salfred//
60184610Salfred// With some CPUs that have fast BSR (bit scan reverse) instruction, the
61246122Shselasky// size optimized version is slightly faster than the bigger table based
62184610Salfred// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
63184610Salfred// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
64184610Salfred// would still have speed roughly comparable to the table version. Older
65184610Salfred// x86 CPUs like the original Pentium have very slow BSR; on those systems
66184610Salfred// the table version is a lot faster.
67184610Salfred//
68184610Salfred// On some CPUs, the table version is a lot faster when using position
69196274Sthompsa// dependent code, but with position independent code the size optimized
70196547Sthompsa// version is slightly faster. This occurs at least on 32-bit SPARC (no
71184610Salfred// ASM optimizations).
72196274Sthompsa//
73196274Sthompsa// I'm making the table version the default, because that has good speed
74227286Shselasky// on all systems I have tried. The size optimized version is sometimes
75227075Shselasky// slightly faster, but sometimes it is a lot slower.
76227286Shselasky
77227286Shselasky#ifdef HAVE_SMALL
78227286Shselasky#	define get_dist_slot(dist) \
79196274Sthompsa		((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
80196274Sthompsa
81184610Salfredstatic inline uint32_t
82184610Salfredget_dist_slot_2(uint32_t dist)
83184610Salfred{
84227286Shselasky	const uint32_t i = bsr32(dist);
85184610Salfred	return (i + i) + ((dist >> (i - 1)) & 1);
86184610Salfred}
87184610Salfred
88184610Salfred
89194677Sthompsa#else
90194228Sthompsa
91184610Salfred#define FASTPOS_BITS 13
92227309Sed
93267992Shselaskylzma_attr_visibility_hidden
94184610Salfredextern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
95184610Salfred
96184610Salfred
97184610Salfred#define fastpos_shift(extra, n) \
98192984Sthompsa	((extra) + (n) * (FASTPOS_BITS - 1))
99184610Salfred
100184610Salfred#define fastpos_limit(extra, n) \
101184610Salfred	(UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
102184610Salfred
103192984Sthompsa#define fastpos_result(dist, extra, n) \
104184610Salfred	(uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
105192984Sthompsa			+ 2 * fastpos_shift(extra, n)
106192984Sthompsa
107184610Salfred
108184610Salfredstatic inline uint32_t
109227075Shselaskyget_dist_slot(uint32_t dist)
110227075Shselasky{
111227075Shselasky	// If it is small enough, we can pick the result directly from
112184610Salfred	// the precalculated table.
113184610Salfred	if (dist < fastpos_limit(0, 0))
114184610Salfred		return lzma_fastpos[dist];
115184610Salfred
116184610Salfred	if (dist < fastpos_limit(0, 1))
117184610Salfred		return fastpos_result(dist, 0, 1);
118184610Salfred
119184610Salfred	return fastpos_result(dist, 0, 2);
120184610Salfred}
121184610Salfred
122184610Salfred
123184610Salfred#ifdef FULL_DISTANCES_BITS
124188600Sthompsastatic inline uint32_t
125184610Salfredget_dist_slot_2(uint32_t dist)
126188600Sthompsa{
127184610Salfred	assert(dist >= FULL_DISTANCES);
128184610Salfred
129184610Salfred	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
130184610Salfred		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
131184610Salfred
132184610Salfred	if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
133184610Salfred		return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
134184610Salfred
135184610Salfred	return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
136184610Salfred}
137184610Salfred#endif
138184610Salfred
139184610Salfred#endif
140184610Salfred
141184610Salfred#endif
142184610Salfred