1#include "WKdm.h" 2 3/*********************************************************************** 4 * THE PACKING ROUTINES 5 */ 6 7/* WK_pack_2bits() 8 * Pack some multiple of four words holding two-bit tags (in the low 9 * two bits of each byte) into an integral number of words, i.e., 10 * one fourth as many. 11 * NOTE: Pad the input out with zeroes to a multiple of four words! 12 */ 13static WK_word* 14WK_pack_2bits(WK_word* source_buf, 15 WK_word* source_end, 16 WK_word* dest_buf) { 17 18 register WK_word* src_next = source_buf; 19 WK_word* dest_next = dest_buf; 20 21 while (src_next < source_end) { 22 register WK_word temp = src_next[0]; 23 temp |= (src_next[1] << 2); 24 temp |= (src_next[2] << 4); 25 temp |= (src_next[3] << 6); 26 27 dest_next[0] = temp; 28 dest_next++; 29 src_next += 4; 30 } 31 32 return dest_next; 33 34} 35 36/* WK_pack_4bits() 37 * Pack an even number of words holding 4-bit patterns in the low bits 38 * of each byte into half as many words. 39 * note: pad out the input with zeroes to an even number of words! 40 */ 41 42static WK_word* 43WK_pack_4bits(WK_word* source_buf, 44 WK_word* source_end, 45 WK_word* dest_buf) { 46 register WK_word* src_next = source_buf; 47 WK_word* dest_next = dest_buf; 48 49 /* this loop should probably be unrolled */ 50 while (src_next < source_end) { 51 register WK_word temp = src_next[0]; 52 temp |= (src_next[1] << 4); 53 54 dest_next[0] = temp; 55 dest_next++; 56 src_next += 2; 57 } 58 59 return dest_next; 60 61} 62 63/* pack_3_tenbits() 64 * Pack a sequence of three ten bit items into one word. 65 * note: pad out the input with zeroes to an even number of words! 66 */ 67static WK_word* 68WK_pack_3_tenbits(WK_word* source_buf, 69 WK_word* source_end, 70 WK_word* dest_buf) { 71 72 register WK_word* src_next = source_buf; 73 WK_word* dest_next = dest_buf; 74 75 /* this loop should probably be unrolled */ 76 while (src_next < source_end) { 77 register WK_word temp = src_next[0]; 78 temp |= (src_next[1] << 10); 79 temp |= (src_next[2] << 20); 80 81 dest_next[0] = temp; 82 dest_next++; 83 src_next += 3; 84 } 85 86 return dest_next; 87 88} 89 90/*************************************************************************** 91 * WKdm_compress()---THE COMPRESSOR 92 */ 93 94unsigned int 95WKdm_compress (WK_word* src_buf, 96 WK_word* dest_buf, 97 unsigned int num_input_words) 98{ 99 DictionaryElement dictionary[DICTIONARY_SIZE]; 100 101 /* arrays that hold output data in intermediate form during modeling */ 102 /* and whose contents are packed into the actual output after modeling */ 103 104 /* sizes of these arrays should be increased if you want to compress 105 * pages larger than 4KB 106 */ 107 WK_word tempTagsArray[300]; /* tags for everything */ 108 WK_word tempQPosArray[300]; /* queue positions for matches */ 109 WK_word tempLowBitsArray[1200]; /* low bits for partial matches */ 110 111 /* boundary_tmp will be used for keeping track of what's where in 112 * the compressed page during packing 113 */ 114 WK_word* boundary_tmp; 115 116 /* Fill pointers for filling intermediate arrays (of queue positions 117 * and low bits) during encoding. 118 * Full words go straight to the destination buffer area reserved 119 * for them. (Right after where the tags go.) 120 */ 121 WK_word* next_full_patt; 122 char* next_tag = (char *) tempTagsArray; 123 char* next_qp = (char *) tempQPosArray; 124 WK_word* next_low_bits = tempLowBitsArray; 125 126 WK_word* next_input_word = src_buf; 127 WK_word* end_of_input = src_buf + num_input_words; 128 129 PRELOAD_DICTIONARY; 130 131 next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16); 132 133#ifdef WK_DEBUG 134 printf("\nIn WKdm_compress\n"); 135 printf("About to actually compress, src_buf is %u\n", src_buf); 136 printf("dictionary is at %u\n", dictionary); 137 printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt); 138 fflush(stdout); 139#endif 140 141 while (next_input_word < end_of_input) 142 { 143 WK_word *dict_location; 144 WK_word dict_word; 145 WK_word input_word = *next_input_word; 146 147 /* compute hash value, which is a byte offset into the dictionary, 148 * and add it to the base address of the dictionary. Cast back and 149 * forth to/from char * so no shifts are needed 150 */ 151 dict_location = 152 (WK_word *) 153 ((void*) (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word))); 154 155 dict_word = *dict_location; 156 157 if (input_word == dict_word) 158 { 159 RECORD_EXACT(dict_location - dictionary); 160 } 161 else if (input_word == 0) { 162 RECORD_ZERO; 163 } 164 else 165 { 166 WK_word input_high_bits = HIGH_BITS(input_word); 167 if (input_high_bits == HIGH_BITS(dict_word)) { 168 RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word)); 169 *dict_location = input_word; 170 } 171 else { 172 RECORD_MISS(input_word); 173 *dict_location = input_word; 174 } 175 } 176 next_input_word++; 177 } 178 179#ifdef WK_DEBUG 180 printf("AFTER MODELING in WKdm_compress()\n"); fflush(stdout); 181 printf("tempTagsArray holds %u bytes\n", 182 next_tag - (char *) tempTagsArray); 183 printf("tempQPosArray holds %u bytes\n", 184 next_qp - (char *) tempQPosArray); 185 printf("tempLowBitsArray holds %u bytes\n", 186 (char *) next_low_bits - (char *) tempLowBitsArray); 187 188 printf("next_full_patt is %p\n", 189 next_full_patt); 190 191 printf(" i.e., there are %u full patterns\n", 192 next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16))); 193 fflush(stdout); 194 195 { int i; 196 WK_word *arr =(dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)); 197 198 printf(" first 20 full patterns are: \n"); 199 for (i = 0; i < 20; i++) { 200 printf(" %d", arr[i]); 201 } 202 printf("\n"); 203 } 204#endif 205 206 /* Record (into the header) where we stopped writing full words, 207 * which is where we will pack the queue positions. (Recall 208 * that we wrote the full words directly into the dest buffer 209 * during modeling. 210 */ 211 212 SET_QPOS_AREA_START(dest_buf,next_full_patt); 213 214 /* Pack the tags into the tags area, between the page header 215 * and the full words area. We don't pad for the packer 216 * because we assume that the page size is a multiple of 16. 217 */ 218 219#ifdef WK_DEBUG 220 printf("about to pack %u bytes holding tags\n", 221 next_tag - (char *) tempTagsArray); 222 223 { int i; 224 char* arr = (char *) tempTagsArray; 225 226 printf(" first 200 tags are: \n"); 227 for (i = 0; i < 200; i++) { 228 printf(" %d", arr[i]); 229 } 230 printf("\n"); 231 } 232#endif 233 234 boundary_tmp = WK_pack_2bits(tempTagsArray, 235 (WK_word *) ((void *) next_tag), 236 dest_buf + HEADER_SIZE_IN_WORDS); 237 238#ifdef WK_DEBUG 239 printf("packing tags stopped at %u\n", boundary_tmp); 240#endif 241 242 /* Pack the queue positions into the area just after 243 * the full words. We have to round up the source 244 * region to a multiple of two words. 245 */ 246 247 { 248 unsigned int num_bytes_to_pack = (unsigned int)(next_qp - (char *) tempQPosArray); 249 unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8); 250 unsigned int num_source_words = num_packed_words * 2; 251 WK_word* endQPosArray = tempQPosArray + num_source_words; 252 253 /* Pad out the array with zeros to avoid corrupting real packed 254 values. */ 255 for (; /* next_qp is already set as desired */ 256 next_qp < (char*)endQPosArray; 257 next_qp++) { 258 *next_qp = 0; 259 } 260 261#ifdef WK_DEBUG 262 printf("about to pack %u (bytes holding) queue posns.\n", 263 num_bytes_to_pack); 264 printf("packing them from %u words into %u words\n", 265 num_source_words, num_packed_words); 266 printf("dest is range %u to %u\n", 267 next_full_patt, next_full_patt + num_packed_words); 268 { int i; 269 char *arr = (char *) tempQPosArray; 270 printf(" first 200 queue positions are: \n"); 271 for (i = 0; i < 200; i++) { 272 printf(" %d", arr[i]); 273 } 274 printf("\n"); 275 } 276#endif 277 278 boundary_tmp = WK_pack_4bits(tempQPosArray, 279 endQPosArray, 280 next_full_patt); 281#ifdef WK_DEBUG 282 printf("Packing of queue positions stopped at %u\n", boundary_tmp); 283#endif // WK_DEBUG 284 285 /* Record (into the header) where we stopped packing queue positions, 286 * which is where we will start packing low bits. 287 */ 288 SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); 289 290 } 291 292 /* Pack the low bit patterns into the area just after 293 * the queue positions. We have to round up the source 294 * region to a multiple of three words. 295 */ 296 297 { 298 unsigned int num_tenbits_to_pack = 299 (unsigned int)(next_low_bits - tempLowBitsArray); 300 unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3); 301 unsigned int num_source_words = num_packed_words * 3; 302 WK_word* endLowBitsArray = tempLowBitsArray + num_source_words; 303 304 /* Pad out the array with zeros to avoid corrupting real packed 305 values. */ 306 307 for (; /* next_low_bits is already set as desired */ 308 next_low_bits < endLowBitsArray; 309 next_low_bits++) { 310 *next_low_bits = 0; 311 } 312 313#ifdef WK_DEBUG 314 printf("about to pack low bits\n"); 315 printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack); 316 printf("endLowBitsArray is %u\n", endLowBitsArray); 317#endif 318 319 boundary_tmp = WK_pack_3_tenbits (tempLowBitsArray, 320 endLowBitsArray, 321 boundary_tmp); 322 323 SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp); 324 325 } 326 327 return (unsigned int)((char *) boundary_tmp - (char *) dest_buf); 328} 329