/* * Copyright 2010, Haiku. * Distributed under the terms of the MIT License. * * Authors: * based on previous work of Ankur Sethi * Clemens Zeidler */ #include "CLuceneDataBase.h" #include #include #include #define DEBUG_CLUCENE_DATABASE #ifdef DEBUG_CLUCENE_DATABASE #include # define STRACE(x...) printf("FT: " x) #else # define STRACE(x...) ; #endif using namespace lucene::document; using namespace lucene::util; const uint8 kCluceneTries = 10; wchar_t* to_wchar(const char *str) { int size = strlen(str) * sizeof(wchar_t) ; wchar_t *wStr = new wchar_t[size] ; if (mbstowcs(wStr, str, size) == -1) { delete[] wStr ; return NULL ; } else return wStr ; } CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath) : fDataBasePath(databasePath), fTempPath(databasePath), fIndexWriter(NULL) { printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path()); create_directory(fDataBasePath.Path(), 0755); fTempPath.Append("temp_file"); } CLuceneWriteDataBase::~CLuceneWriteDataBase() { // TODO: delete fTempPath file } status_t CLuceneWriteDataBase::InitCheck() { return B_OK; } status_t CLuceneWriteDataBase::AddDocument(const entry_ref& ref) { // check if already in the queue for (unsigned int i = 0; i < fAddQueue.size(); i++) { if (fAddQueue.at(i) == ref) return B_OK; } fAddQueue.push_back(ref); return B_OK; } status_t CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref) { // check if already in the queue for (unsigned int i = 0; i < fAddQueue.size(); i++) { if (fDeleteQueue.at(i) == ref) return B_OK; } fDeleteQueue.push_back(ref); return B_OK; } status_t CLuceneWriteDataBase::Commit() { if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0) return B_OK; STRACE("Commit\n"); _RemoveDocuments(fAddQueue); _RemoveDocuments(fDeleteQueue); fDeleteQueue.clear(); if (fAddQueue.size() == 0) return B_OK; fIndexWriter = _OpenIndexWriter(); if (fIndexWriter == NULL) return B_ERROR; status_t status = B_OK; for (unsigned int i = 0; i < fAddQueue.size(); i++) { if (!_IndexDocument(fAddQueue.at(i))) { status = B_ERROR; break; } } fAddQueue.clear(); fIndexWriter->close(); delete fIndexWriter; fIndexWriter = NULL; return status; } IndexWriter* CLuceneWriteDataBase::_OpenIndexWriter() { IndexWriter* writer = NULL; for (int i = 0; i < kCluceneTries; i++) { try { bool createIndex = true; if (IndexReader::indexExists(fDataBasePath.Path())) createIndex = false; writer = new IndexWriter(fDataBasePath.Path(), &fStandardAnalyzer, createIndex); if (writer) break; } catch (CLuceneError &error) { STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what()); delete writer; writer = NULL; } } return writer; } IndexReader* CLuceneWriteDataBase::_OpenIndexReader() { IndexReader* reader = NULL; BEntry entry(fDataBasePath.Path(), NULL); if (!entry.Exists()) return NULL; for (int i = 0; i < kCluceneTries; i++) { try { if (!IndexReader::indexExists(fDataBasePath.Path())) return NULL; reader = IndexReader::open(fDataBasePath.Path()); if (reader) break; } catch (CLuceneError &error) { STRACE("CLuceneError: _OpenIndexReader %s\n", error.what()); delete reader; reader = NULL; } } return reader; } bool CLuceneWriteDataBase::_RemoveDocuments(std::vector& docs) { IndexReader *reader = NULL; reader = _OpenIndexReader(); if (!reader) return false; bool status = false; for (unsigned int i = 0; i < docs.size(); i++) { BPath path(&docs.at(i)); wchar_t* wPath = to_wchar(path.Path()); if (wPath == NULL) continue; for (int i = 0; i < kCluceneTries; i++) { status = _RemoveDocument(wPath, reader); if (status) break; reader->close(); delete reader; reader = _OpenIndexReader(); if (!reader) { status = false; break; } } delete[] wPath; if (!status) break; } reader->close(); delete reader; return status; } bool CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader) { try { Term term(_T("path"), wPath); reader->deleteDocuments(&term); } catch (CLuceneError &error) { STRACE("CLuceneError: deleteDocuments %s\n", error.what()); return false; } return true; } bool CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref) { BPath path(&ref); BFile inFile, outFile; inFile.SetTo(path.Path(), B_READ_ONLY); if (inFile.InitCheck() != B_OK) { STRACE("Can't open inFile %s\n", path.Path()); return false; } outFile.SetTo(fTempPath.Path(), B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE); if (outFile.InitCheck() != B_OK) { STRACE("Can't open outFile %s\n", fTempPath.Path()); return false; } BTranslatorRoster* translatorRoster = BTranslatorRoster::Default(); if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT') != B_OK) return false; inFile.Unset(); outFile.Unset(); FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8"); wchar_t* wPath = to_wchar(path.Path()); if (wPath == NULL) return false; Document *document = new Document; Field contentField(_T("contents"), fileReader, Field::STORE_NO | Field::INDEX_TOKENIZED); document->add(contentField); Field pathField(_T("path"), wPath, Field::STORE_YES | Field::INDEX_UNTOKENIZED); document->add(pathField); bool status = true; for (int i = 0; i < kCluceneTries; i++) { try { fIndexWriter->addDocument(document); STRACE("document added, retries: %i\n", i); break; } catch (CLuceneError &error) { STRACE("CLuceneError addDocument %s\n", error.what()); fIndexWriter->close(); delete fIndexWriter; fIndexWriter = _OpenIndexWriter(); if (fIndexWriter == NULL) { status = false; break; } } } if (!status) delete document; delete[] wPath; return status; }