1#include "WKdm.h"
2
3/***********************************************************************
4 *                   THE PACKING ROUTINES
5 */
6
7/* WK_pack_2bits()
8 * Pack some multiple of four words holding two-bit tags (in the low
9 * two bits of each byte) into an integral number of words, i.e.,
10 * one fourth as many.
11 * NOTE: Pad the input out with zeroes to a multiple of four words!
12 */
13static WK_word*
14WK_pack_2bits(WK_word* source_buf,
15              WK_word* source_end,
16	      WK_word* dest_buf) {
17
18   register WK_word* src_next = source_buf;
19   WK_word* dest_next = dest_buf;
20
21   while (src_next < source_end) {
22      register WK_word temp = src_next[0];
23      temp |= (src_next[1] << 2);
24      temp |= (src_next[2] << 4);
25      temp |= (src_next[3] << 6);
26
27      dest_next[0] = temp;
28      dest_next++;
29      src_next += 4;
30   }
31
32   return dest_next;
33
34}
35
36/* WK_pack_4bits()
37 * Pack an even number of words holding 4-bit patterns in the low bits
38 * of each byte into half as many words.
39 * note: pad out the input with zeroes to an even number of words!
40 */
41
42static WK_word*
43WK_pack_4bits(WK_word* source_buf,
44	      WK_word* source_end,
45	      WK_word* dest_buf) {
46   register WK_word* src_next = source_buf;
47   WK_word* dest_next = dest_buf;
48
49   /* this loop should probably be unrolled */
50   while (src_next < source_end) {
51     register WK_word temp = src_next[0];
52     temp |= (src_next[1] << 4);
53
54     dest_next[0] = temp;
55     dest_next++;
56     src_next += 2;
57   }
58
59   return dest_next;
60
61}
62
63/* pack_3_tenbits()
64 * Pack a sequence of three ten bit items into one word.
65 * note: pad out the input with zeroes to an even number of words!
66 */
67static WK_word*
68WK_pack_3_tenbits(WK_word* source_buf,
69		  WK_word* source_end,
70		  WK_word* dest_buf) {
71
72   register WK_word* src_next = source_buf;
73   WK_word* dest_next = dest_buf;
74
75   /* this loop should probably be unrolled */
76   while (src_next < source_end) {
77      register WK_word temp = src_next[0];
78      temp |= (src_next[1] << 10);
79      temp |= (src_next[2] << 20);
80
81      dest_next[0] = temp;
82      dest_next++;
83      src_next += 3;
84   }
85
86   return dest_next;
87
88}
89
90/***************************************************************************
91 *  WKdm_compress()---THE COMPRESSOR
92 */
93
94unsigned int
95WKdm_compress (WK_word* src_buf,
96               WK_word* dest_buf,
97	       unsigned int num_input_words)
98{
99  DictionaryElement dictionary[DICTIONARY_SIZE];
100
101  /* arrays that hold output data in intermediate form during modeling */
102  /* and whose contents are packed into the actual output after modeling */
103
104  /* sizes of these arrays should be increased if you want to compress
105   * pages larger than 4KB
106   */
107  WK_word tempTagsArray[300];         /* tags for everything          */
108  WK_word tempQPosArray[300];         /* queue positions for matches  */
109  WK_word tempLowBitsArray[1200];     /* low bits for partial matches */
110
111  /* boundary_tmp will be used for keeping track of what's where in
112   * the compressed page during packing
113   */
114  WK_word* boundary_tmp;
115
116  /* Fill pointers for filling intermediate arrays (of queue positions
117   * and low bits) during encoding.
118   * Full words go straight to the destination buffer area reserved
119   * for them.  (Right after where the tags go.)
120   */
121  WK_word* next_full_patt;
122  char* next_tag = (char *) tempTagsArray;
123  char* next_qp = (char *) tempQPosArray;
124  WK_word* next_low_bits = tempLowBitsArray;
125
126  WK_word* next_input_word = src_buf;
127  WK_word* end_of_input = src_buf + num_input_words;
128
129  PRELOAD_DICTIONARY;
130
131  next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16);
132
133#ifdef WK_DEBUG
134  printf("\nIn WKdm_compress\n");
135  printf("About to actually compress, src_buf is %u\n", src_buf);
136  printf("dictionary is at %u\n", dictionary);
137  printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt);
138  fflush(stdout);
139#endif
140
141  while (next_input_word < end_of_input)
142  {
143     WK_word *dict_location;
144     WK_word dict_word;
145     WK_word input_word = *next_input_word;
146
147     /* compute hash value, which is a byte offset into the dictionary,
148      * and add it to the base address of the dictionary. Cast back and
149      * forth to/from char * so no shifts are needed
150      */
151     dict_location =
152       (WK_word *)
153       ((void*) (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)));
154
155     dict_word = *dict_location;
156
157     if (input_word == dict_word)
158     {
159        RECORD_EXACT(dict_location - dictionary);
160     }
161     else if (input_word == 0) {
162        RECORD_ZERO;
163     }
164     else
165     {
166        WK_word input_high_bits = HIGH_BITS(input_word);
167        if (input_high_bits == HIGH_BITS(dict_word)) {
168	  RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word));
169          *dict_location = input_word;
170        }
171        else {
172	  RECORD_MISS(input_word);
173            *dict_location = input_word;
174        }
175     }
176     next_input_word++;
177  }
178
179#ifdef WK_DEBUG
180  printf("AFTER MODELING in WKdm_compress()\n");  fflush(stdout);
181  printf("tempTagsArray holds %u bytes\n",
182         next_tag - (char *) tempTagsArray);
183  printf("tempQPosArray holds %u bytes\n",
184         next_qp - (char *) tempQPosArray);
185  printf("tempLowBitsArray holds %u bytes\n",
186         (char *) next_low_bits - (char *) tempLowBitsArray);
187
188  printf("next_full_patt is %p\n",
189          next_full_patt);
190
191  printf(" i.e., there are %u full patterns\n",
192     next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)));
193  fflush(stdout);
194
195  { int i;
196    WK_word *arr =(dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16));
197
198    printf("  first 20 full patterns are: \n");
199    for (i = 0; i < 20; i++) {
200      printf(" %d", arr[i]);
201    }
202    printf("\n");
203  }
204#endif
205
206  /* Record (into the header) where we stopped writing full words,
207   * which is where we will pack the queue positions.  (Recall
208   * that we wrote the full words directly into the dest buffer
209   * during modeling.
210   */
211
212  SET_QPOS_AREA_START(dest_buf,next_full_patt);
213
214  /* Pack the tags into the tags area, between the page header
215   * and the full words area.  We don't pad for the packer
216   * because we assume that the page size is a multiple of 16.
217   */
218
219#ifdef WK_DEBUG
220  printf("about to pack %u bytes holding tags\n",
221         next_tag - (char *) tempTagsArray);
222
223  { int i;
224    char* arr = (char *) tempTagsArray;
225
226    printf("  first 200 tags are: \n");
227    for (i = 0; i < 200; i++) {
228      printf(" %d", arr[i]);
229    }
230    printf("\n");
231  }
232#endif
233
234  boundary_tmp = WK_pack_2bits(tempTagsArray,
235		               (WK_word *) ((void *) next_tag),
236			       dest_buf + HEADER_SIZE_IN_WORDS);
237
238#ifdef WK_DEBUG
239    printf("packing tags stopped at %u\n", boundary_tmp);
240#endif
241
242  /* Pack the queue positions into the area just after
243   * the full words.  We have to round up the source
244   * region to a multiple of two words.
245   */
246
247  {
248    unsigned int num_bytes_to_pack = (unsigned int)(next_qp - (char *) tempQPosArray);
249    unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8);
250    unsigned int num_source_words = num_packed_words * 2;
251    WK_word* endQPosArray = tempQPosArray + num_source_words;
252
253    /* Pad out the array with zeros to avoid corrupting real packed
254       values. */
255    for (; /* next_qp is already set as desired */
256	 next_qp < (char*)endQPosArray;
257	 next_qp++) {
258      *next_qp = 0;
259    }
260
261#ifdef WK_DEBUG
262    printf("about to pack %u (bytes holding) queue posns.\n",
263           num_bytes_to_pack);
264    printf("packing them from %u words into %u words\n",
265           num_source_words, num_packed_words);
266    printf("dest is range %u to %u\n",
267           next_full_patt, next_full_patt + num_packed_words);
268    { int i;
269      char *arr = (char *) tempQPosArray;
270      printf("  first 200 queue positions are: \n");
271      for (i = 0; i < 200; i++) {
272        printf(" %d", arr[i]);
273      }
274      printf("\n");
275    }
276#endif
277
278    boundary_tmp = WK_pack_4bits(tempQPosArray,
279			         endQPosArray,
280				 next_full_patt);
281#ifdef WK_DEBUG
282     printf("Packing of queue positions stopped at %u\n", boundary_tmp);
283#endif // WK_DEBUG
284
285    /* Record (into the header) where we stopped packing queue positions,
286     * which is where we will start packing low bits.
287     */
288    SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp);
289
290  }
291
292  /* Pack the low bit patterns into the area just after
293   * the queue positions.  We have to round up the source
294   * region to a multiple of three words.
295   */
296
297  {
298    unsigned int num_tenbits_to_pack =
299      (unsigned int)(next_low_bits - tempLowBitsArray);
300    unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3);
301    unsigned int num_source_words = num_packed_words * 3;
302    WK_word* endLowBitsArray = tempLowBitsArray + num_source_words;
303
304    /* Pad out the array with zeros to avoid corrupting real packed
305       values. */
306
307    for (; /* next_low_bits is already set as desired */
308	 next_low_bits < endLowBitsArray;
309	 next_low_bits++) {
310      *next_low_bits = 0;
311    }
312
313#ifdef WK_DEBUG
314	  printf("about to pack low bits\n");
315          printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack);
316          printf("endLowBitsArray is %u\n", endLowBitsArray);
317#endif
318
319    boundary_tmp = WK_pack_3_tenbits (tempLowBitsArray,
320		                      endLowBitsArray,
321				      boundary_tmp);
322
323    SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp);
324
325  }
326
327  return (unsigned int)((char *) boundary_tmp - (char *) dest_buf);
328}
329