1/*
2Open Tracker License
3
4Terms and Conditions
5
6Copyright (c) 1991-2001, Be Incorporated. All rights reserved.
7
8Permission is hereby granted, free of charge, to any person obtaining a copy of
9this software and associated documentation files (the "Software"), to deal in
10the Software without restriction, including without limitation the rights to
11use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12of the Software, and to permit persons to whom the Software is furnished to do
13so, subject to the following conditions:
14
15The above copyright notice and this permission notice applies to all licensees
16and shall be included in all copies or substantial portions of the Software.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24
25Except as contained in this notice, the name of Be Incorporated shall not be
26used in advertising or otherwise to promote the sale, use or other dealings in
27this Software without prior written authorization from Be Incorporated.
28
29BeMail(TM), Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30of Be Incorporated in the United States and other countries. Other brand product
31names are registered trademarks or trademarks of their respective holders.
32All rights reserved.
33*/
34
35
36#include "WIndex.h"
37
38#include <File.h>
39#include <fs_attr.h>
40#include <Message.h>
41#include <Node.h>
42
43#include <ctype.h>
44#include <stdlib.h>
45#include <stdio.h>
46
47
48#define IVERSION	1
49
50
51static int32 kCRCTable = 0;
52
53
54int32 cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2);
55void gen_crc_table();
56unsigned long update_crc(unsigned long crc_accum, const char *data_blk_ptr,
57	int data_blk_size);
58
59
60FileEntry::FileEntry(void)
61{
62
63}
64
65
66FileEntry::FileEntry(const char *entryString)
67	:
68	BString(entryString)
69{
70
71}
72
73
74status_t
75WIndex::SetTo(const char *dataPath, const char *indexPath)
76{
77	BFile* dataFile;
78	BFile indexFile;
79
80	dataFile = new BFile();
81
82	if (dataFile->SetTo(dataPath, B_READ_ONLY) != B_OK) {
83		delete dataFile;
84		return B_ERROR;
85	} else {
86		bool buildIndex = true;
87		SetTo(dataFile);
88
89		time_t mtime;
90		time_t modified;
91
92		dataFile->GetModificationTime(&mtime);
93
94		if (indexFile.SetTo(indexPath, B_READ_ONLY) == B_OK) {
95			attr_info info;
96			if ((indexFile.GetAttrInfo("WINDEX:version", &info) == B_OK)) {
97				uint32 version = 0;
98				indexFile.ReadAttr("WINDEX:version", B_UINT32_TYPE, 0,
99					&version, 4);
100				if (IVERSION == version) {
101					if ((indexFile.GetAttrInfo("WINDEX:modified", &info)
102						== B_OK)) {
103						indexFile.ReadAttr("WINDEX:modified", B_UINT32_TYPE, 0,
104							&modified, 4);
105
106						if (mtime == modified) {
107							if (UnflattenIndex(&indexFile) == B_OK)
108								buildIndex = false;
109						}
110					}
111				}
112			}
113			indexFile.Unset();
114		}
115		if (buildIndex) {
116			InitIndex();
117			BuildIndex();
118			if (indexFile.SetTo(indexPath,
119				B_WRITE_ONLY | B_CREATE_FILE | B_ERASE_FILE) == B_OK) {
120				FlattenIndex(&indexFile);
121				indexFile.WriteAttr("WINDEX:modified", B_UINT32_TYPE, 0,
122					&mtime, 4);
123				uint32 version = IVERSION;
124				indexFile.WriteAttr("WINDEX:version", B_UINT32_TYPE, 0,
125					&version, 4);
126			}
127		}
128	}
129	return B_OK;
130}
131
132
133FileEntry::~FileEntry(void)
134{
135
136}
137
138
139WIndex::WIndex(int32 count)
140{
141	fEntryList = NULL;
142	fDataFile = NULL;
143	fEntriesPerBlock = count;
144	fEntrySize = sizeof(WIndexEntry);
145	if (!atomic_or(&kCRCTable, 1))
146		gen_crc_table();
147}
148
149
150WIndex::WIndex(BPositionIO *dataFile, int32 count)
151{
152	fEntryList = NULL;
153	fDataFile = dataFile;
154	fEntriesPerBlock = count;
155	fEntrySize = sizeof(WIndexEntry);
156	if (!atomic_or(&kCRCTable, 1))
157		gen_crc_table();
158}
159
160
161WIndex::~WIndex(void)
162{
163	if (fEntryList)
164		free(fEntryList);
165	delete fDataFile;
166}
167
168
169status_t
170WIndex::UnflattenIndex(BPositionIO *io)
171{
172	if (fEntryList)
173		free(fEntryList);
174	WIndexHead		head;
175
176	io->Seek(0, SEEK_SET);
177	io->Read(&head, sizeof(head));
178	io->Seek(head.offset, SEEK_SET);
179
180	fEntrySize = head.entrySize;
181	fEntries = head.entries;
182	fMaxEntries = fEntriesPerBlock;
183	fBlockSize = fEntriesPerBlock * fEntrySize;
184	fBlocks = fEntries / fEntriesPerBlock + 1;;
185	fIsSorted = true;
186
187	int32 size = (head.entries + 1) * head.entrySize;
188	if (!(fEntryList = (uint8 *)malloc(size)))
189		return B_ERROR;
190
191	if (fEntries)
192		io->Read(fEntryList, size);
193
194	return B_OK;
195}
196
197
198status_t
199WIndex::FlattenIndex(BPositionIO *io)
200{
201	if (fEntries && !fIsSorted)
202		SortItems();
203	WIndexHead		head;
204
205	head.entries = fEntries;
206	head.entrySize = fEntrySize;
207	head.offset = sizeof(WIndexHead);
208	io->Seek(0, SEEK_SET);
209	io->Write(&head, sizeof(head));
210	if (fEntries)
211		io->Write(fEntryList, head.entries * head.entrySize);
212
213	return B_OK;
214}
215
216
217int32
218WIndex::Lookup(int32 key)
219{
220	if (!fEntries)
221		return -1;
222	if (!fIsSorted)
223		SortItems();
224
225	// Binary Search
226	int32	M, Lb, Ub;
227	Lb = 0;
228	Ub = fEntries - 1;
229	while (true) {
230		M = (Lb + Ub) / 2;
231		if (key < ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
232			Ub = M - 1;
233		else if (key > ((WIndexEntry *)(fEntryList + (M * fEntrySize)))->key)
234			Lb = M + 1;
235		else
236			return M;
237		if (Lb > Ub)
238			return -1;
239	}
240}
241
242
243status_t
244WIndex::AddItem(WIndexEntry *entry)
245{
246	if (_BlockCheck() == B_ERROR)
247		return B_ERROR;
248	memcpy(((WIndexEntry *)(fEntryList + (fEntries * fEntrySize))), entry,
249		fEntrySize);
250	fEntries++;
251	fIsSorted = false;
252	return B_OK;
253}
254
255
256void
257WIndex::SortItems(void)
258{
259	qsort(fEntryList, fEntries, fEntrySize,
260		(int(*)(const void *, const void *))cmp_i_entries);
261
262	fIsSorted = true;
263}
264
265
266status_t
267WIndex::_BlockCheck(void)
268{
269	if (fEntries < fMaxEntries)
270		return B_OK;
271	fBlocks = fEntries / fEntriesPerBlock + 1;
272	uint8* tmpEntryList = (uint8 *)realloc(fEntryList, fBlockSize * fBlocks);
273	if (!tmpEntryList) {
274		free(fEntryList);
275		fEntryList = NULL;
276		return B_ERROR;
277	} else {
278		fEntryList = tmpEntryList;
279	}
280	return B_OK;
281}
282
283
284status_t
285WIndex::InitIndex(void)
286{
287	if (fEntryList)
288		free(fEntryList);
289	fIsSorted = 0;
290	fEntries = 0;
291	fMaxEntries = fEntriesPerBlock;
292	fBlockSize = fEntriesPerBlock * fEntrySize;
293	fBlocks = 1;
294	fEntryList = (uint8 *)malloc(fBlockSize);
295	if (!fEntryList)
296		return B_ERROR;
297	return B_OK;
298}
299
300
301int32
302WIndex::GetKey(const char *s)
303{
304
305	int32	key = 0;
306	/*int32	x;
307	int32	a = 84589;
308	int32	b = 45989;
309	int32	m = 217728;
310	while (*s) {
311		x = *s++ - 'a';
312
313		key ^= (a * x + b) % m;
314		key <<= 1;
315	}*/
316
317	key = update_crc(0, s, strlen(s));
318
319	if (key < 0) // No negative values!
320		key = ~key;
321
322	return key;
323}
324
325
326int32
327cmp_i_entries(const WIndexEntry *e1, const WIndexEntry *e2)
328{
329	return e1->key - e2->key;
330}
331
332
333status_t
334WIndex::SetTo(BPositionIO *dataFile)
335{
336	fDataFile = dataFile;
337	return B_OK;
338}
339
340
341void
342WIndex::Unset(void)
343{
344	fDataFile = NULL;
345}
346
347
348int32
349WIndex::FindFirst(const char *word)
350{
351	if (!fEntries)
352		return -1;
353
354	int32			index;
355	char			nword[256];
356	int32			key;
357
358	NormalizeWord(word, nword);
359	key = GetKey(nword);
360
361	if ((index = Lookup(key)) < 0)
362		return -1;
363	// Find first instance of key
364	while ((ItemAt(index - 1))->key == key)
365		index--;
366	return index;
367}
368
369
370FileEntry*
371WIndex::GetEntry(int32 index)
372{
373	if ((index >= fEntries)||(index < 0))
374		return NULL;
375	WIndexEntry		*ientry;
376	FileEntry		*dentry;
377	char			*buffer;
378
379	dentry = new FileEntry();
380
381	ientry = ItemAt(index);
382
383	int32 size;
384
385	fDataFile->Seek(ientry->offset, SEEK_SET);
386	buffer = dentry->LockBuffer(256);
387	fDataFile->Read(buffer, 256);
388	size = _GetEntrySize(ientry, buffer);
389	//buffer[256] = 0;
390	//printf("Entry: = %s\n", buffer);
391	dentry->UnlockBuffer(size);
392	return dentry;
393}
394
395
396size_t
397WIndex::_GetEntrySize(WIndexEntry *entry, const char *entryData)
398{
399	// eliminate unused parameter warning
400	(void)entry;
401
402	return strcspn(entryData, "\n\r");
403}
404
405
406FileEntry*
407WIndex::GetEntry(const char *word)
408{
409	return GetEntry(FindFirst(word));
410}
411
412
413char*
414WIndex::NormalizeWord(const char *word, char *dest)
415{
416	const char 	*src;
417	char		*dst;
418
419	// remove dots and copy
420	src = word;
421	dst = dest;
422	while (*src) {
423		if (*src != '.')
424			*dst++ = *src;
425		src++;
426	}
427	*dst = 0;
428
429	// convert to lower-case
430	for (dst = dest; *dst; dst++)
431		*dst = tolower(*dst);
432	return dest;
433}
434
435
436/* crc32h.c -- package to compute 32-bit CRC one byte at a time using   */
437/*             the high-bit first (Big-Endian) bit ordering convention  */
438/*                                                                      */
439/* Synopsis:                                                            */
440/*  gen_crc_table() -- generates a 256-word table containing all CRC    */
441/*                     remainders for every possible 8-bit byte.  It    */
442/*                     must be executed (once) before any CRC updates.  */
443/*                                                                      */
444/*  unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size)         */
445/*           unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */
446/*           Returns the updated value of the CRC accumulator after     */
447/*           processing each byte in the addressed block of data.       */
448/*                                                                      */
449/*  It is assumed that an unsigned long is at least 32 bits wide and    */
450/*  that the predefined type char occupies one 8-bit byte of storage.   */
451/*                                                                      */
452/*  The generator polynomial used for this version of the package is    */
453/*  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */
454/*  as specified in the Autodin/Ethernet/ADCCP protocol standards.      */
455/*  Other degree 32 polynomials may be substituted by re-defining the   */
456/*  symbol POLYNOMIAL below.  Lower degree polynomials must first be    */
457/*  multiplied by an appropriate power of x.  The representation used   */
458/*  is that the coefficient of x^0 is stored in the LSB of the 32-bit   */
459/*  word and the coefficient of x^31 is stored in the most significant  */
460/*  bit.  The CRC is to be appended to the data most significant byte   */
461/*  first.  For those protocols in which bytes are transmitted MSB      */
462/*  first and in the same order as they are encountered in the block    */
463/*  this convention results in the CRC remainder being transmitted with */
464/*  the coefficient of x^31 first and with that of x^0 last (just as    */
465/*  would be done by a hardware shift register mechanization).          */
466/*                                                                      */
467/*  The table lookup technique was adapted from the algorithm described */
468/*  by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/
469
470
471#define POLYNOMIAL 0x04c11db7L
472
473
474static unsigned long crc_table[256];
475
476
477void
478gen_crc_table()
479{
480	// generate the table of CRC remainders for all possible bytes
481
482	int i, j;
483	unsigned long crc_accum;
484
485	for (i = 0;  i < 256;  i++) {
486		crc_accum = ((unsigned long) i << 24);
487		for (j = 0;  j < 8;  j++) {
488			if (crc_accum & 0x80000000L)
489				crc_accum = (crc_accum << 1) ^ POLYNOMIAL;
490			else
491				crc_accum = (crc_accum << 1);
492		}
493		crc_table[i] = crc_accum;
494	}
495
496	return;
497}
498
499
500unsigned long
501update_crc(unsigned long crc_accum, const char *data_blk_ptr, int data_blk_size)
502{
503	// update the CRC on the data block one byte at a time
504
505	int i, j;
506
507	for (j = 0;  j < data_blk_size;  j++) {
508		i = ((int) (crc_accum >> 24) ^ *data_blk_ptr++) & 0xff;
509		crc_accum = (crc_accum << 8) ^ crc_table[i];
510	}
511
512	return crc_accum;
513}
514
515