/* * Copyright 2010, Haiku Inc. All rights reserved. * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de. * This file may be used under the terms of the MIT License. * * Authors: * Janito V. Ferreira Filho */ #include "Journal.h" #include #include #include #include #include "CachedBlock.h" #include "CRCTable.h" #include "HashRevokeManager.h" //#define TRACE_EXT2 #ifdef TRACE_EXT2 # define TRACE(x...) dprintf("\33[34mext2:\33[0m " x) #else # define TRACE(x...) ; #endif #define ERROR(x...) dprintf("\33[34mext2:\33[0m " x) #define WARN(x...) dprintf("\33[34mext2:\33[0m " x) class LogEntry : public DoublyLinkedListLinkImpl { public: LogEntry(Journal* journal, uint32 logStart, uint32 length); ~LogEntry(); uint32 Start() const { return fStart; } uint32 CommitID() const { return fCommitID; } Journal* GetJournal() { return fJournal; } private: Journal* fJournal; uint32 fStart; uint32 fCommitID; }; LogEntry::LogEntry(Journal* journal, uint32 logStart, uint32 commitID) : fJournal(journal), fStart(logStart), fCommitID(commitID) { } LogEntry::~LogEntry() { } void JournalHeader::MakeDescriptor(uint32 sequence) { this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC); this->sequence = B_HOST_TO_BENDIAN_INT32(sequence); this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_DESCRIPTOR_BLOCK); } void JournalHeader::MakeCommit(uint32 sequence) { this->magic = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC); this->sequence = B_HOST_TO_BENDIAN_INT32(sequence); this->block_type = B_HOST_TO_BENDIAN_INT32(JOURNAL_COMMIT_BLOCK); } Journal::Journal(Volume* fsVolume, Volume* jVolume) : fJournalVolume(jVolume), fJournalBlockCache(jVolume->BlockCache()), fFilesystemVolume(fsVolume), fFilesystemBlockCache(fsVolume->BlockCache()), fRevokeManager(NULL), fInitStatus(B_OK), fBlockSize(sizeof(JournalSuperBlock)), fFirstCommitID(0), fFirstCacheCommitID(0), fFirstLogBlock(1), fLogSize(0), fVersion(0), fLogStart(0), fLogEnd(0), fFreeBlocks(0), fMaxTransactionSize(0), fCurrentCommitID(0), fHasSubTransaction(false), fSeparateSubTransactions(false), fUnwrittenTransactions(0), fTransactionID(0) { recursive_lock_init(&fLock, "ext2 journal"); mutex_init(&fLogEntriesLock, "ext2 journal log entries"); HashRevokeManager* revokeManager = new(std::nothrow) HashRevokeManager( fsVolume->Has64bitFeature()); TRACE("Journal::Journal(): Allocated a hash revoke manager at %p\n", revokeManager); if (revokeManager == NULL) fInitStatus = B_NO_MEMORY; else { fInitStatus = revokeManager->Init(); if (fInitStatus == B_OK) { fRevokeManager = revokeManager; fInitStatus = _LoadSuperBlock(); } else delete revokeManager; } } Journal::Journal() : fJournalVolume(NULL), fJournalBlockCache(NULL), fFilesystemVolume(NULL), fFilesystemBlockCache(NULL), fOwner(NULL), fRevokeManager(NULL), fInitStatus(B_OK), fBlockSize(sizeof(JournalSuperBlock)), fFirstCommitID(0), fFirstCacheCommitID(0), fFirstLogBlock(1), fLogSize(0), fVersion(0), fIsStarted(false), fLogStart(0), fLogEnd(0), fFreeBlocks(0), fMaxTransactionSize(0), fCurrentCommitID(0), fHasSubTransaction(false), fSeparateSubTransactions(false), fUnwrittenTransactions(0), fTransactionID(0), fChecksumEnabled(false), fChecksumV3Enabled(false), fFeature64bits(false) { recursive_lock_init(&fLock, "ext2 journal"); mutex_init(&fLogEntriesLock, "ext2 journal log entries"); } Journal::~Journal() { TRACE("Journal destructor.\n"); TRACE("Journal::~Journal(): Attempting to delete revoke manager at %p\n", fRevokeManager); delete fRevokeManager; recursive_lock_destroy(&fLock); mutex_destroy(&fLogEntriesLock); } status_t Journal::InitCheck() { return fInitStatus; } status_t Journal::Uninit() { if (!fIsStarted) return B_OK; status_t status = FlushLogAndBlocks(); if (status == B_OK) { // Mark journal as clean fLogStart = 0; status = _SaveSuperBlock(); } fIsStarted = false; return status; } /*virtual*/ status_t Journal::StartLog() { fLogStart = fFirstLogBlock; fLogEnd = fFirstLogBlock; fFreeBlocks = 0; fIsStarted = true; fCurrentCommitID = fFirstCommitID; return _SaveSuperBlock(); } status_t Journal::RestartLog() { fFirstCommitID = 1; return B_OK; } /*virtual*/ status_t Journal::Lock(Transaction* owner, bool separateSubTransactions) { TRACE("Journal::Lock()\n"); status_t status = recursive_lock_lock(&fLock); if (status != B_OK) return status; TRACE("Journal::Lock(): Aquired lock\n"); if (!fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) > 1) { // reuse current transaction TRACE("Journal::Lock(): Reusing current transaction\n"); return B_OK; } if (separateSubTransactions) fSeparateSubTransactions = true; if (owner != NULL) owner->SetParent(fOwner); fOwner = owner; if (fOwner != NULL) { if (fUnwrittenTransactions > 0) { // start a sub transaction TRACE("Journal::Lock(): Starting sub transaction\n"); cache_start_sub_transaction(fFilesystemBlockCache, fTransactionID); fHasSubTransaction = true; } else { TRACE("Journal::Lock(): Starting new transaction\n"); fTransactionID = cache_start_transaction(fFilesystemBlockCache); } if (fTransactionID < B_OK) { recursive_lock_unlock(&fLock); return fTransactionID; } cache_add_transaction_listener(fFilesystemBlockCache, fTransactionID, TRANSACTION_IDLE, _TransactionIdle, this); } return B_OK; } /*virtual*/ status_t Journal::Unlock(Transaction* owner, bool success) { TRACE("Journal::Unlock(): Lock recursion: %" B_PRId32 "\n", recursive_lock_get_recursion(&fLock)); if (fSeparateSubTransactions || recursive_lock_get_recursion(&fLock) == 1) { // we only end the transaction if we unlock it if (owner != NULL) { TRACE("Journal::Unlock(): Calling _TransactionDone\n"); status_t status = _TransactionDone(success); if (status != B_OK) return status; TRACE("Journal::Unlock(): Returned from _TransactionDone\n"); bool separateSubTransactions = fSeparateSubTransactions; fSeparateSubTransactions = true; TRACE("Journal::Unlock(): Notifying listeners for: %p\n", owner); owner->NotifyListeners(success); TRACE("Journal::Unlock(): Done notifying listeners\n"); fSeparateSubTransactions = separateSubTransactions; fOwner = owner->Parent(); } else fOwner = NULL; if (fSeparateSubTransactions && recursive_lock_get_recursion(&fLock) == 1) fSeparateSubTransactions = false; } else owner->MoveListenersTo(fOwner); TRACE("Journal::Unlock(): Unlocking the lock\n"); recursive_lock_unlock(&fLock); return B_OK; } status_t Journal::MapBlock(off_t logical, fsblock_t& physical) { TRACE("Journal::MapBlock()\n"); physical = logical; return B_OK; } inline uint32 Journal::FreeLogBlocks() const { TRACE("Journal::FreeLogBlocks(): start: %" B_PRIu32 ", end: %" B_PRIu32 ", size: %" B_PRIu32 "\n", fLogStart, fLogEnd, fLogSize); return fLogStart <= fLogEnd ? fLogSize - fLogEnd + fLogStart - 1 : fLogStart - fLogEnd; } status_t Journal::FlushLogAndBlocks() { return _FlushLog(true, true); } int32 Journal::TransactionID() const { return fTransactionID; } status_t Journal::_WritePartialTransactionToLog(JournalHeader* descriptorBlock, bool detached, uint8** _escapedData, uint32 &logBlock, off_t& blockNumber, long& cookie, ArrayDeleter& escapedDataDeleter, uint32& blockCount, bool& finished) { TRACE("Journal::_WritePartialTransactionToLog()\n"); uint32 descriptorBlockPos = logBlock; uint8* escapedData = *_escapedData; JournalBlockTag* tag = (JournalBlockTag*)descriptorBlock->data; JournalBlockTag* lastTag = (JournalBlockTag*)((uint8*)descriptorBlock + fBlockSize - sizeof(JournalHeader)); finished = false; status_t status = B_OK; while (tag < lastTag && status == B_OK) { tag->SetBlockNumber(blockNumber); tag->SetFlags(0); CachedBlock data(fFilesystemVolume); const JournalHeader* blockData = (JournalHeader*)data.SetTo( blockNumber); if (blockData == NULL) { panic("Got a NULL pointer while iterating through transaction " "blocks.\n"); return B_ERROR; } void* finalData; if (blockData->CheckMagic()) { // The journaled block starts with the magic value // We must remove it to prevent confusion TRACE("Journal::_WritePartialTransactionToLog(): Block starts with " "magic number. Escaping it\n"); tag->SetEscapedFlag(); if (escapedData == NULL) { TRACE("Journal::_WritePartialTransactionToLog(): Allocating " "space for escaped block (%" B_PRIu32 ")\n", fBlockSize); escapedData = new(std::nothrow) uint8[fBlockSize]; if (escapedData == NULL) { TRACE("Journal::_WritePartialTransactionToLof(): Failed to " "allocate buffer for escaped data block\n"); return B_NO_MEMORY; } escapedDataDeleter.SetTo(escapedData); *_escapedData = escapedData; ((int32*)escapedData)[0] = 0; // Remove magic } memcpy(escapedData + 4, blockData->data, fBlockSize - 4); finalData = escapedData; } else finalData = (void*)blockData; // TODO: use iovecs? logBlock = _WrapAroundLog(logBlock + 1); fsblock_t physicalBlock; status = MapBlock(logBlock, physicalBlock); if (status != B_OK) return status; off_t logOffset = physicalBlock * fBlockSize; TRACE("Journal::_WritePartialTransactionToLog(): Writing from memory: " "%p, to disk: %" B_PRIdOFF "\n", finalData, logOffset); size_t written = write_pos(fJournalVolume->Device(), logOffset, finalData, fBlockSize); if (written != fBlockSize) { TRACE("Failed to write journal block.\n"); return B_IO_ERROR; } TRACE("Journal::_WritePartialTransactionToLog(): Wrote a journal block " "at: %" B_PRIu32 "\n", logBlock); blockCount++; tag++; status = cache_next_block_in_transaction(fFilesystemBlockCache, fTransactionID, detached, &cookie, &blockNumber, NULL, NULL); } finished = status != B_OK; // Write descriptor block --tag; tag->SetLastTagFlag(); fsblock_t physicalBlock; status = MapBlock(descriptorBlockPos, physicalBlock); if (status != B_OK) return status; off_t descriptorBlockOffset = physicalBlock * fBlockSize; TRACE("Journal::_WritePartialTransactionToLog(): Writing to: %" B_PRIdOFF "\n", descriptorBlockOffset); size_t written = write_pos(fJournalVolume->Device(), descriptorBlockOffset, descriptorBlock, fBlockSize); if (written != fBlockSize) { TRACE("Failed to write journal descriptor block.\n"); return B_IO_ERROR; } blockCount++; logBlock = _WrapAroundLog(logBlock + 1); return B_OK; } status_t Journal::_WriteTransactionToLog() { TRACE("Journal::_WriteTransactionToLog()\n"); // Transaction enters the Flush state bool detached = false; TRACE("Journal::_WriteTransactionToLog(): Attempting to get transaction " "size\n"); size_t size = _FullTransactionSize(); TRACE("Journal::_WriteTransactionToLog(): transaction size: %" B_PRIuSIZE "\n", size); if (size > fMaxTransactionSize) { TRACE("Journal::_WriteTransactionToLog(): not enough free space " "for the transaction. Attempting to free some space.\n"); size = _MainTransactionSize(); TRACE("Journal::_WriteTransactionToLog(): main transaction size: %" B_PRIuSIZE "\n", size); if (fHasSubTransaction && size < fMaxTransactionSize) { TRACE("Journal::_WriteTransactionToLog(): transaction doesn't fit, " "but it can be separated\n"); detached = true; } else { // Error: transaction can't fit in log panic("transaction too large (size: %" B_PRIuSIZE ", max size: %" B_PRIu32 ", log size: %" B_PRIu32 ")\n", size, fMaxTransactionSize, fLogSize); return B_BUFFER_OVERFLOW; } } TRACE("Journal::_WriteTransactionToLog(): free log blocks: %" B_PRIu32 "\n", FreeLogBlocks()); if (size > FreeLogBlocks()) { TRACE("Journal::_WriteTransactionToLog(): Syncing block cache\n"); cache_sync_transaction(fFilesystemBlockCache, fTransactionID); if (size > FreeLogBlocks()) { panic("Transaction fits, but sync didn't result in enough" "free space.\n\tGot %" B_PRIu32 " when at least %" B_PRIuSIZE " was expected.", FreeLogBlocks(), size); } } TRACE("Journal::_WriteTransactionToLog(): finished managing space for " "the transaction\n"); fHasSubTransaction = false; if (!fIsStarted) StartLog(); // Prepare Descriptor block TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for " "the descriptor block, block size %" B_PRIu32 "\n", fBlockSize); JournalHeader* descriptorBlock = (JournalHeader*)new(std::nothrow) uint8[fBlockSize]; if (descriptorBlock == NULL) { TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer " "for the descriptor block\n"); return B_NO_MEMORY; } ArrayDeleter descriptorBlockDeleter((uint8*)descriptorBlock); descriptorBlock->MakeDescriptor(fCurrentCommitID); // Prepare Commit block TRACE("Journal::_WriteTransactionToLog(): attempting to allocate space for " "the commit block, block size %" B_PRIu32 "\n", fBlockSize); JournalHeader* commitBlock = (JournalHeader*)new(std::nothrow) uint8[fBlockSize]; if (commitBlock == NULL) { TRACE("Journal::_WriteTransactionToLog(): Failed to allocate a buffer " "for the commit block\n"); return B_NO_MEMORY; } ArrayDeleter commitBlockDeleter((uint8*)commitBlock); commitBlock->MakeCommit(fCurrentCommitID + 1); memset(commitBlock->data, 0, fBlockSize - sizeof(JournalHeader)); // TODO: This probably isn't necessary uint8* escapedData = NULL; ArrayDeleter escapedDataDeleter; off_t blockNumber; long cookie = 0; status_t status = cache_next_block_in_transaction(fFilesystemBlockCache, fTransactionID, detached, &cookie, &blockNumber, NULL, NULL); if (status != B_OK) { TRACE("Journal::_WriteTransactionToLog(): Transaction has no blocks to " "write\n"); return B_OK; } uint32 blockCount = 0; uint32 logBlock = _WrapAroundLog(fLogEnd); bool finished = false; status = _WritePartialTransactionToLog(descriptorBlock, detached, &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter, blockCount, finished); if (!finished && status != B_OK) return status; uint32 commitBlockPos = logBlock; while (!finished) { descriptorBlock->IncrementSequence(); status = _WritePartialTransactionToLog(descriptorBlock, detached, &escapedData, logBlock, blockNumber, cookie, escapedDataDeleter, blockCount, finished); if (!finished && status != B_OK) return status; // It is okay to write the commit blocks of the partial transactions // as long as the commit block of the first partial transaction isn't // written. When it recovery reaches where the first commit should be // and doesn't find it, it considers it found the end of the log. fsblock_t physicalBlock; status = MapBlock(logBlock, physicalBlock); if (status != B_OK) return status; off_t logOffset = physicalBlock * fBlockSize; TRACE("Journal::_WriteTransactionToLog(): Writting commit block to " "%" B_PRIdOFF "\n", logOffset); off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock, fBlockSize); if (written != fBlockSize) { TRACE("Failed to write journal commit block.\n"); return B_IO_ERROR; } commitBlock->IncrementSequence(); blockCount++; logBlock = _WrapAroundLog(logBlock + 1); } // Transaction will enter the Commit state fsblock_t physicalBlock; status = MapBlock(commitBlockPos, physicalBlock); if (status != B_OK) return status; off_t logOffset = physicalBlock * fBlockSize; TRACE("Journal::_WriteTransactionToLog(): Writing to: %" B_PRIdOFF "\n", logOffset); off_t written = write_pos(fJournalVolume->Device(), logOffset, commitBlock, fBlockSize); if (written != fBlockSize) { TRACE("Failed to write journal commit block.\n"); return B_IO_ERROR; } blockCount++; fLogEnd = _WrapAroundLog(fLogEnd + blockCount); status = _SaveSuperBlock(); // Transaction will enter Finished state LogEntry *logEntry = new LogEntry(this, fLogEnd, fCurrentCommitID++); TRACE("Journal::_WriteTransactionToLog(): Allocating log entry at %p\n", logEntry); if (logEntry == NULL) { panic("no memory to allocate log entries!"); return B_NO_MEMORY; } mutex_lock(&fLogEntriesLock); fLogEntries.Add(logEntry); mutex_unlock(&fLogEntriesLock); if (detached) { fTransactionID = cache_detach_sub_transaction(fFilesystemBlockCache, fTransactionID, _TransactionWritten, logEntry); fUnwrittenTransactions = 1; if (status == B_OK && _FullTransactionSize() > fLogSize) { // If the transaction is too large after writing, there is no way to // recover, so let this transaction fail. ERROR("transaction too large (%" B_PRIuSIZE " blocks, log size %" B_PRIu32 ")!\n", _FullTransactionSize(), fLogSize); return B_BUFFER_OVERFLOW; } } else { cache_end_transaction(fFilesystemBlockCache, fTransactionID, _TransactionWritten, logEntry); fUnwrittenTransactions = 0; } return B_OK; } status_t Journal::_SaveSuperBlock() { TRACE("Journal::_SaveSuperBlock()\n"); fsblock_t physicalBlock; status_t status = MapBlock(0, physicalBlock); if (status != B_OK) return status; off_t superblockPos = physicalBlock * fBlockSize; JournalSuperBlock superblock; size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos, &superblock, sizeof(superblock)); if (bytesRead != sizeof(superblock)) return B_IO_ERROR; superblock.SetFirstCommitID(fFirstCommitID); superblock.SetLogStart(fLogStart); if (fChecksumEnabled) superblock.SetChecksum(_Checksum(&superblock)); TRACE("Journal::SaveSuperBlock(): Write to %" B_PRIdOFF "\n", superblockPos); size_t bytesWritten = write_pos(fJournalVolume->Device(), superblockPos, &superblock, sizeof(superblock)); if (bytesWritten != sizeof(superblock)) return B_IO_ERROR; TRACE("Journal::_SaveSuperBlock(): Done\n"); return B_OK; } status_t Journal::_LoadSuperBlock() { STATIC_ASSERT(sizeof(struct JournalHeader) == 12); STATIC_ASSERT(sizeof(struct JournalSuperBlock) == 1024); TRACE("Journal::_LoadSuperBlock()\n"); fsblock_t superblockPos; status_t status = MapBlock(0, superblockPos); if (status != B_OK) return status; TRACE("Journal::_LoadSuperBlock(): superblock physical block: %" B_PRIu64 "\n", superblockPos); JournalSuperBlock superblock; size_t bytesRead = read_pos(fJournalVolume->Device(), superblockPos * fJournalVolume->BlockSize(), &superblock, sizeof(superblock)); if (bytesRead != sizeof(superblock)) { ERROR("Journal::_LoadSuperBlock(): failed to read superblock\n"); return B_IO_ERROR; } if (!superblock.header.CheckMagic()) { ERROR("Journal::_LoadSuperBlock(): Invalid superblock magic %" B_PRIx32 "\n", superblock.header.Magic()); return B_BAD_VALUE; } if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V1) { TRACE("Journal::_LoadSuperBlock(): Journal superblock version 1\n"); fVersion = 1; } else if (superblock.header.BlockType() == JOURNAL_SUPERBLOCK_V2) { TRACE("Journal::_LoadSuperBlock(): Journal superblock version 2\n"); fVersion = 2; } else { ERROR("Journal::_LoadSuperBlock(): Invalid superblock version\n"); return B_BAD_VALUE; } if (fVersion >= 2) { TRACE("Journal::_LoadSuperBlock(): incompatible features %" B_PRIx32 ", read-only features %" B_PRIx32 "\n", superblock.IncompatibleFeatures(), superblock.ReadOnlyCompatibleFeatures()); status = _CheckFeatures(&superblock); if (status != B_OK) return status; if (fChecksumEnabled) { if (superblock.Checksum() != _Checksum(&superblock)) { ERROR("Journal::_LoadSuperBlock(): Invalid checksum\n"); return B_BAD_DATA; } fChecksumSeed = calculate_crc32c(0xffffffff, (uint8*)superblock.uuid, sizeof(superblock.uuid)); } } fBlockSize = superblock.BlockSize(); fFirstCommitID = superblock.FirstCommitID(); fFirstLogBlock = superblock.FirstLogBlock(); fLogStart = superblock.LogStart(); fLogSize = superblock.NumBlocks(); uint32 descriptorTags = (fBlockSize - sizeof(JournalHeader)) / sizeof(JournalBlockTag); // Maximum tags per descriptor block uint32 maxDescriptors = (fLogSize - 1) / (descriptorTags + 2); // Maximum number of full journal transactions fMaxTransactionSize = maxDescriptors * descriptorTags; fMaxTransactionSize += (fLogSize - 1) - fMaxTransactionSize - 2; // Maximum size of a "logical" transaction // TODO: Why is "superblock.MaxTransactionBlocks();" zero? //fFirstCacheCommitID = fFirstCommitID - fTransactionID /*+ 1*/; TRACE("Journal::_LoadSuperBlock(): block size: %" B_PRIu32 ", first commit" " id: %" B_PRIu32 ", first log block: %" B_PRIu32 ", log start: %" B_PRIu32 ", log size: %" B_PRIu32 ", max transaction size: %" B_PRIu32 "\n", fBlockSize, fFirstCommitID, fFirstLogBlock, fLogStart, fLogSize, fMaxTransactionSize); return B_OK; } status_t Journal::_CheckFeatures(JournalSuperBlock* superblock) { uint32 readonly = superblock->ReadOnlyCompatibleFeatures(); uint32 incompatible = superblock->IncompatibleFeatures(); bool hasReadonly = (readonly & ~JOURNAL_KNOWN_READ_ONLY_COMPATIBLE_FEATURES) != 0; bool hasIncompatible = (incompatible & ~JOURNAL_KNOWN_INCOMPATIBLE_FEATURES) != 0; if (hasReadonly || hasIncompatible ) { ERROR("Journal::_CheckFeatures(): Unsupported features: %" B_PRIx32 " %" B_PRIx32 "\n", readonly, incompatible); return B_UNSUPPORTED; } bool hasCsumV2 = (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_CSUM_V2) != 0; bool hasCsumV3 = (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_CSUM_V3) != 0; if (hasCsumV2 && hasCsumV3) { return B_BAD_VALUE; } fChecksumEnabled = hasCsumV2 && hasCsumV3; fChecksumV3Enabled = hasCsumV3; fFeature64bits = (superblock->IncompatibleFeatures() & JOURNAL_FEATURE_INCOMPATIBLE_64BIT) != 0; return B_OK; } uint32 Journal::_Checksum(JournalSuperBlock* superblock) { uint32 oldChecksum = superblock->checksum; superblock->checksum = 0; uint32 checksum = calculate_crc32c(0xffffffff, (uint8*)superblock, sizeof(JournalSuperBlock)); superblock->checksum = oldChecksum; return checksum; } bool Journal::_Checksum(uint8* block, bool set) { JournalBlockTail *tail = (JournalBlockTail*)(block + fBlockSize - sizeof(JournalBlockTail)); uint32 oldChecksum = tail->checksum; tail->checksum = 0; uint32 checksum = calculate_crc32c(0xffffffff, block, fBlockSize); if (set) { tail->checksum = checksum; } else { tail->checksum = oldChecksum; } return checksum == oldChecksum; } uint32 Journal::_CountTags(JournalHeader* descriptorBlock) { uint32 count = 0; size_t tagSize = _TagSize(); size_t size = fBlockSize; if (fChecksumEnabled) size -= sizeof(JournalBlockTail); JournalBlockTag* tags = (JournalBlockTag*)descriptorBlock->data; // Skip the header JournalBlockTag* lastTag = (JournalBlockTag*) (descriptorBlock + size - tagSize); while (tags < lastTag && (tags->Flags() & JOURNAL_FLAG_LAST_TAG) == 0) { if ((tags->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) tags = (JournalBlockTag*)((uint8*)tags + 16); // Skip new UUID TRACE("Journal::_CountTags(): Tag block: %" B_PRIu32 "\n", tags->BlockNumber()); tags = (JournalBlockTag*)((uint8*)tags + tagSize); // Go to next tag count++; } if ((tags->Flags() & JOURNAL_FLAG_LAST_TAG) != 0) count++; TRACE("Journal::_CountTags(): counted tags: %" B_PRIu32 "\n", count); return count; } size_t Journal::_TagSize() { if (fChecksumV3Enabled) return sizeof(JournalBlockTagV3); size_t size = sizeof(JournalBlockTag); if (fChecksumEnabled) size += sizeof(uint16); if (!fFeature64bits) size -= sizeof(uint32); return size; } /*virtual*/ status_t Journal::Recover() { TRACE("Journal::Recover()\n"); if (fLogStart == 0) // Journal was cleanly unmounted return B_OK; TRACE("Journal::Recover(): Journal needs recovery\n"); uint32 lastCommitID; status_t status = _RecoverPassScan(lastCommitID); if (status != B_OK) return status; status = _RecoverPassRevoke(lastCommitID); if (status != B_OK) return status; return _RecoverPassReplay(lastCommitID); } // First pass: Find the end of the log status_t Journal::_RecoverPassScan(uint32& lastCommitID) { TRACE("Journal Recover: 1st Pass: Scan\n"); CachedBlock cached(fJournalVolume); JournalHeader* header; uint32 nextCommitID = fFirstCommitID; uint32 nextBlock = fLogStart; fsblock_t nextBlockPos; status_t status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; header = (JournalHeader*)cached.SetTo(nextBlockPos); while (header->CheckMagic() && header->Sequence() == nextCommitID) { uint32 blockType = header->BlockType(); if (blockType == JOURNAL_DESCRIPTOR_BLOCK) { if (fChecksumEnabled && !_Checksum((uint8*)header, false)) { ERROR("Journal::_RecoverPassScan(): Invalid checksum\n"); return B_BAD_DATA; } uint32 tags = _CountTags(header); nextBlock += tags; TRACE("Journal recover pass scan: Found a descriptor block with " "%" B_PRIu32 " tags\n", tags); } else if (blockType == JOURNAL_COMMIT_BLOCK) { nextCommitID++; TRACE("Journal recover pass scan: Found a commit block. Next " "commit ID: %" B_PRIu32 "\n", nextCommitID); } else if (blockType != JOURNAL_REVOKE_BLOCK) { TRACE("Journal recover pass scan: Reached an unrecognized block, " "assuming as log's end.\n"); break; } else { TRACE("Journal recover pass scan: Found a revoke block, " "skipping it\n"); } nextBlock = _WrapAroundLog(nextBlock + 1); status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; header = (JournalHeader*)cached.SetTo(nextBlockPos); } TRACE("Journal Recovery pass scan: Last detected transaction ID: %" B_PRIu32 "\n", nextCommitID); lastCommitID = nextCommitID; return B_OK; } // Second pass: Collect all revoked blocks status_t Journal::_RecoverPassRevoke(uint32 lastCommitID) { TRACE("Journal Recover: 2nd Pass: Revoke\n"); CachedBlock cached(fJournalVolume); JournalHeader* header; uint32 nextCommitID = fFirstCommitID; uint32 nextBlock = fLogStart; fsblock_t nextBlockPos; status_t status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; header = (JournalHeader*)cached.SetTo(nextBlockPos); while (nextCommitID < lastCommitID) { if (!header->CheckMagic() || header->Sequence() != nextCommitID) { // Somehow the log is different than the expexted return B_ERROR; } uint32 blockType = header->BlockType(); if (blockType == JOURNAL_DESCRIPTOR_BLOCK) nextBlock += _CountTags(header); else if (blockType == JOURNAL_COMMIT_BLOCK) nextCommitID++; else if (blockType == JOURNAL_REVOKE_BLOCK) { TRACE("Journal::_RecoverPassRevoke(): Found a revoke block\n"); status = fRevokeManager->ScanRevokeBlock( (JournalRevokeHeader*)header, nextCommitID); if (status != B_OK) return status; } else { WARN("Journal::_RecoverPassRevoke(): Found an unrecognized block\n"); break; } nextBlock = _WrapAroundLog(nextBlock + 1); status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; header = (JournalHeader*)cached.SetTo(nextBlockPos); } if (nextCommitID != lastCommitID) { // Possibly because of some sort of IO error TRACE("Journal::_RecoverPassRevoke(): Incompatible commit IDs\n"); return B_ERROR; } TRACE("Journal recovery pass revoke: Revoked blocks: %" B_PRIu32 "\n", fRevokeManager->NumRevokes()); return B_OK; } // Third pass: Replay log status_t Journal::_RecoverPassReplay(uint32 lastCommitID) { TRACE("Journal Recover: 3rd Pass: Replay\n"); uint32 nextCommitID = fFirstCommitID; uint32 nextBlock = fLogStart; fsblock_t nextBlockPos; status_t status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; CachedBlock cached(fJournalVolume); JournalHeader* header = (JournalHeader*)cached.SetTo(nextBlockPos); int count = 0; uint8* data = new(std::nothrow) uint8[fBlockSize]; if (data == NULL) { TRACE("Journal::_RecoverPassReplay(): Failed to allocate memory for " "data\n"); return B_NO_MEMORY; } ArrayDeleter dataDeleter(data); while (nextCommitID < lastCommitID) { if (!header->CheckMagic() || header->Sequence() != nextCommitID) { // Somehow the log is different than the expected ERROR("Journal::_RecoverPassReplay(): Weird problem with block\n"); return B_ERROR; } uint32 blockType = header->BlockType(); if (blockType == JOURNAL_DESCRIPTOR_BLOCK) { JournalBlockTag* last_tag = (JournalBlockTag*)((uint8*)header + fBlockSize - sizeof(JournalBlockTag)); for (JournalBlockTag* tag = (JournalBlockTag*)header->data; tag <= last_tag; ++tag) { nextBlock = _WrapAroundLog(nextBlock + 1); status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; if (!fRevokeManager->Lookup(tag->BlockNumber(), nextCommitID)) { // Block isn't revoked size_t read = read_pos(fJournalVolume->Device(), nextBlockPos * fBlockSize, data, fBlockSize); if (read != fBlockSize) return B_IO_ERROR; if ((tag->Flags() & JOURNAL_FLAG_ESCAPED) != 0) { // Block is escaped ((int32*)data)[0] = B_HOST_TO_BENDIAN_INT32(JOURNAL_MAGIC); } TRACE("Journal::_RevoverPassReplay(): Write to %" B_PRIu32 "\n", tag->BlockNumber() * fBlockSize); size_t written = write_pos(fFilesystemVolume->Device(), tag->BlockNumber() * fBlockSize, data, fBlockSize); if (written != fBlockSize) return B_IO_ERROR; ++count; } if ((tag->Flags() & JOURNAL_FLAG_LAST_TAG) != 0) break; if ((tag->Flags() & JOURNAL_FLAG_SAME_UUID) == 0) { // TODO: Check new UUID with file system UUID tag += 2; // sizeof(JournalBlockTag) = 8 // sizeof(UUID) = 16 } } } else if (blockType == JOURNAL_COMMIT_BLOCK) nextCommitID++; else if (blockType != JOURNAL_REVOKE_BLOCK) { WARN("Journal::_RecoverPassReplay(): Found an unrecognized block\n"); break; } // If blockType == JOURNAL_REVOKE_BLOCK we just skip it nextBlock = _WrapAroundLog(nextBlock + 1); status = MapBlock(nextBlock, nextBlockPos); if (status != B_OK) return status; header = (JournalHeader*)cached.SetTo(nextBlockPos); } if (nextCommitID != lastCommitID) { // Possibly because of some sort of IO error return B_ERROR; } TRACE("Journal recovery pass replay: Replayed blocks: %u\n", count); return B_OK; } status_t Journal::_FlushLog(bool canWait, bool flushBlocks) { TRACE("Journal::_FlushLog()\n"); status_t status = canWait ? recursive_lock_lock(&fLock) : recursive_lock_trylock(&fLock); TRACE("Journal::_FlushLog(): Acquired fLock, recursion: %" B_PRId32 "\n", recursive_lock_get_recursion(&fLock)); if (status != B_OK) return status; if (recursive_lock_get_recursion(&fLock) > 1) { // Called from inside a transaction recursive_lock_unlock(&fLock); TRACE("Journal::_FlushLog(): Called from a transaction. Leaving...\n"); return B_OK; } if (fUnwrittenTransactions != 0 && _FullTransactionSize() != 0) { status = _WriteTransactionToLog(); if (status < B_OK) panic("Failed flushing transaction: %s\n", strerror(status)); } TRACE("Journal::_FlushLog(): Attempting to flush journal volume at %p\n", fJournalVolume); // TODO: Not sure this is correct. Need to review... // NOTE: Not correct. Causes double lock of a block cache mutex // TODO: Need some other way to synchronize the journal... /*status = fJournalVolume->FlushDevice(); if (status != B_OK) return status;*/ TRACE("Journal::_FlushLog(): Flushed journal volume\n"); if (flushBlocks) { TRACE("Journal::_FlushLog(): Attempting to flush file system volume " "at %p\n", fFilesystemVolume); status = fFilesystemVolume->FlushDevice(); if (status == B_OK) TRACE("Journal::_FlushLog(): Flushed file system volume\n"); } TRACE("Journal::_FlushLog(): Finished. Releasing lock\n"); recursive_lock_unlock(&fLock); TRACE("Journal::_FlushLog(): Done, final status: %s\n", strerror(status)); return status; } inline uint32 Journal::_WrapAroundLog(uint32 block) { TRACE("Journal::_WrapAroundLog()\n"); if (block >= fLogSize) return block - fLogSize + fFirstLogBlock; else return block; } size_t Journal::_CurrentTransactionSize() const { TRACE("Journal::_CurrentTransactionSize(): transaction %" B_PRIu32 "\n", fTransactionID); size_t count; if (fHasSubTransaction) { count = cache_blocks_in_sub_transaction(fFilesystemBlockCache, fTransactionID); TRACE("\tSub transaction size: %" B_PRIuSIZE "\n", count); } else { count = cache_blocks_in_transaction(fFilesystemBlockCache, fTransactionID); TRACE("\tTransaction size: %" B_PRIuSIZE "\n", count); } return count; } size_t Journal::_FullTransactionSize() const { TRACE("Journal::_FullTransactionSize(): transaction %" B_PRIu32 "\n", fTransactionID); TRACE("\tFile sytem block cache: %p\n", fFilesystemBlockCache); size_t count = cache_blocks_in_transaction(fFilesystemBlockCache, fTransactionID); TRACE("\tFull transaction size: %" B_PRIuSIZE "\n", count); return count; } size_t Journal::_MainTransactionSize() const { TRACE("Journal::_MainTransactionSize(): transaction %" B_PRIu32 "\n", fTransactionID); size_t count = cache_blocks_in_main_transaction(fFilesystemBlockCache, fTransactionID); TRACE("\tMain transaction size: %" B_PRIuSIZE "\n", count); return count; } status_t Journal::_TransactionDone(bool success) { if (!success) { if (fHasSubTransaction) { TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32 " failed, aborting subtransaction\n", fTransactionID); cache_abort_sub_transaction(fFilesystemBlockCache, fTransactionID); // parent is unaffected } else { TRACE("Journal::_TransactionDone(): transaction %" B_PRIu32 " failed, aborting\n", fTransactionID); cache_abort_transaction(fFilesystemBlockCache, fTransactionID); fUnwrittenTransactions = 0; } TRACE("Journal::_TransactionDone(): returning B_OK\n"); return B_OK; } // If possible, delay flushing the transaction uint32 size = _FullTransactionSize(); TRACE("Journal::_TransactionDone(): full transaction size: %" B_PRIu32 ", max transaction size: %" B_PRIu32 ", free log blocks: %" B_PRIu32 "\n", size, fMaxTransactionSize, FreeLogBlocks()); if (fMaxTransactionSize > 0 && size < fMaxTransactionSize) { TRACE("Journal::_TransactionDone(): delaying flush of transaction " "%" B_PRIu32 "\n", fTransactionID); // Make sure the transaction fits in the log if (size < FreeLogBlocks()) cache_sync_transaction(fFilesystemBlockCache, fTransactionID); fUnwrittenTransactions++; TRACE("Journal::_TransactionDone(): returning B_OK\n"); return B_OK; } return _WriteTransactionToLog(); } /*static*/ void Journal::_TransactionWritten(int32 transactionID, int32 event, void* _logEntry) { LogEntry* logEntry = (LogEntry*)_logEntry; TRACE("Journal::_TransactionWritten(): Transaction %" B_PRIu32 " checkpointed\n", transactionID); Journal* journal = logEntry->GetJournal(); TRACE("Journal::_TransactionWritten(): log entry: %p, journal: %p\n", logEntry, journal); TRACE("Journal::_TransactionWritten(): log entries: %p\n", &journal->fLogEntries); mutex_lock(&journal->fLogEntriesLock); TRACE("Journal::_TransactionWritten(): first log entry: %p\n", journal->fLogEntries.First()); if (logEntry == journal->fLogEntries.First()) { TRACE("Journal::_TransactionWritten(): Moving start of log to %" B_PRIu32 "\n", logEntry->Start()); journal->fLogStart = logEntry->Start(); journal->fFirstCommitID = logEntry->CommitID(); TRACE("Journal::_TransactionWritten(): Setting commit ID to %" B_PRIu32 "\n", logEntry->CommitID()); if (journal->_SaveSuperBlock() != B_OK) panic("ext2: Failed to write journal superblock\n"); } TRACE("Journal::_TransactionWritten(): Removing log entry\n"); journal->fLogEntries.Remove(logEntry); TRACE("Journal::_TransactionWritten(): Unlocking entries list\n"); mutex_unlock(&journal->fLogEntriesLock); TRACE("Journal::_TransactionWritten(): Deleting log entry at %p\n", logEntry); delete logEntry; } /*static*/ void Journal::_TransactionIdle(int32 transactionID, int32 event, void* _journal) { Journal* journal = (Journal*)_journal; journal->_FlushLog(false, false); }