1/* 2 * Copyright 2001-2015, Axel D��rfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5#ifndef B_PLUS_TREE_H 6#define B_PLUS_TREE_H 7 8 9#include "bfs.h" 10 11#include "Utility.h" 12 13#if !_BOOT_MODE 14#include "Journal.h" 15class Inode; 16#else 17#define Inode BFS::Stream 18 19namespace BFS { 20 21class Stream; 22class Transaction; 23class TransactionListener {}; 24#endif // _BOOT_MODE 25 26// #pragma mark - on-disk structures 27 28struct bplustree_node; 29 30#define BPLUSTREE_NULL -1LL 31#define BPLUSTREE_FREE -2LL 32 33struct bplustree_header { 34 uint32 magic; 35 uint32 node_size; 36 uint32 max_number_of_levels; 37 uint32 data_type; 38 int64 root_node_pointer; 39 int64 free_node_pointer; 40 int64 maximum_size; 41 42 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 43 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 44 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 45 off_t RootNode() const 46 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 47 off_t FreeNode() const 48 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 49 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 50 uint32 MaxNumberOfLevels() const 51 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 52 53 inline bool CheckNode(const bplustree_node* node) const; 54 inline bool IsValidLink(off_t link) const; 55 bool IsValid() const; 56} _PACKED; 57 58#define BPLUSTREE_MAGIC 0x69f6c2e8 59#define BPLUSTREE_NODE_SIZE 1024 60#define BPLUSTREE_MAX_KEY_LENGTH 256 61#define BPLUSTREE_MIN_KEY_LENGTH 1 62 63enum bplustree_types { 64 BPLUSTREE_STRING_TYPE = 0, 65 BPLUSTREE_INT32_TYPE = 1, 66 BPLUSTREE_UINT32_TYPE = 2, 67 BPLUSTREE_INT64_TYPE = 3, 68 BPLUSTREE_UINT64_TYPE = 4, 69 BPLUSTREE_FLOAT_TYPE = 5, 70 BPLUSTREE_DOUBLE_TYPE = 6 71}; 72 73 74struct duplicate_array; 75 76 77template <typename T> 78struct __attribute__((packed)) Unaligned { 79 T value; 80 81 Unaligned<T>& operator=(const T& newValue) 82 { 83 value = newValue; return *this; 84 } 85 operator T() const { return value; } 86}; 87 88 89struct bplustree_node { 90 int64 left_link; 91 int64 right_link; 92 int64 overflow_link; 93 uint16 all_key_count; 94 uint16 all_key_length; 95 96 off_t LeftLink() const 97 { return BFS_ENDIAN_TO_HOST_INT64( 98 left_link); } 99 off_t RightLink() const 100 { return BFS_ENDIAN_TO_HOST_INT64( 101 right_link); } 102 off_t OverflowLink() const 103 { return BFS_ENDIAN_TO_HOST_INT64( 104 overflow_link); } 105 uint16 NumKeys() const 106 { return BFS_ENDIAN_TO_HOST_INT16( 107 all_key_count); } 108 uint16 AllKeyLength() const 109 { return BFS_ENDIAN_TO_HOST_INT16( 110 all_key_length); } 111 112 inline Unaligned<uint16>* KeyLengths() const; 113 inline Unaligned<off_t>* Values() const; 114 inline uint8* Keys() const; 115 inline int32 Used() const; 116 uint8* KeyAt(int32 index, uint16* keyLength) const; 117 118 inline bool IsLeaf() const; 119 120 void Initialize(); 121 uint8 CountDuplicates(off_t offset, 122 bool isFragment) const; 123 off_t DuplicateAt(off_t offset, bool isFragment, 124 int8 index) const; 125 uint32 FragmentsUsed(uint32 nodeSize) const; 126 inline duplicate_array* FragmentAt(int8 index) const; 127 inline duplicate_array* DuplicateArray() const; 128 129 static inline uint8 LinkType(off_t link); 130 static inline off_t MakeLink(uint8 type, off_t link, 131 uint32 fragmentIndex = 0); 132 static inline bool IsDuplicate(off_t link); 133 static inline off_t FragmentOffset(off_t link); 134 static inline uint32 FragmentIndex(off_t link); 135 static inline uint32 MaxFragments(uint32 nodeSize); 136 137 status_t CheckIntegrity(uint32 nodeSize) const; 138} _PACKED; 139 140//#define BPLUSTREE_NODE 0 141#define BPLUSTREE_DUPLICATE_NODE 2 142#define BPLUSTREE_DUPLICATE_FRAGMENT 3 143 144#define NUM_FRAGMENT_VALUES 7 145#define NUM_DUPLICATE_VALUES 125 146 147//************************************** 148 149enum bplustree_traversing { 150 BPLUSTREE_FORWARD = 1, 151 BPLUSTREE_BACKWARD = -1, 152 153 BPLUSTREE_BEGIN = 0, 154 BPLUSTREE_END = 1 155}; 156 157 158// #pragma mark - in-memory structures 159 160 161class BPlusTree; 162struct TreeCheck; 163class TreeIterator; 164 165 166#if !_BOOT_MODE 167// needed for searching (utilizing a stack) 168struct node_and_key { 169 off_t nodeOffset; 170 uint16 keyIndex; 171}; 172#endif // !_BOOT_MODE 173 174 175class CachedNode { 176public: 177 CachedNode(BPlusTree* tree) 178 : 179 fTree(tree), 180 fNode(NULL), 181 fOffset(0), 182 fBlockNumber(0), 183 fWritable(false) 184 { 185#if _BOOT_MODE 186 fBlock = NULL; 187#endif 188 } 189 190 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 191 : 192 fTree(tree), 193 fNode(NULL), 194 fOffset(0), 195 fBlockNumber(0), 196 fWritable(false) 197 { 198#if _BOOT_MODE 199 fBlock = NULL; 200#endif 201 SetTo(offset, check); 202 } 203 204 ~CachedNode() 205 { 206 Unset(); 207#if _BOOT_MODE 208 free(fBlock); 209#endif 210 } 211 212 const bplustree_node* SetTo(off_t offset, bool check = true); 213 status_t SetTo(off_t offset, 214 const bplustree_node** _node, 215 bool check = true); 216 217 const bplustree_header* SetToHeader(); 218 void Unset(); 219 220#if !_BOOT_MODE 221 bplustree_node* SetToWritable(Transaction& transaction, 222 off_t offset, bool check = true); 223 bplustree_node* MakeWritable(Transaction& transaction); 224 bplustree_header* SetToWritableHeader(Transaction& transaction); 225 226 void UnsetUnchanged(Transaction& transaction); 227 228 status_t Free(Transaction& transaction, off_t offset); 229 status_t Allocate(Transaction& transaction, 230 bplustree_node** _node, off_t* _offset); 231#endif // !_BOOT_MODE 232 233 bool IsWritable() const { return fWritable; } 234 bplustree_node* Node() const { return fNode; } 235 236protected: 237 bplustree_node* InternalSetTo(Transaction* transaction, 238 off_t offset); 239 240 BPlusTree* fTree; 241 bplustree_node* fNode; 242 off_t fOffset; 243 off_t fBlockNumber; 244 bool fWritable; 245#if _BOOT_MODE 246 uint8* fBlock; 247#endif 248}; 249 250 251class BPlusTree : public TransactionListener { 252public: 253#if !_BOOT_MODE 254 BPlusTree(Transaction& transaction, 255 Inode* stream, 256 int32 nodeSize = BPLUSTREE_NODE_SIZE); 257#endif 258 BPlusTree(Inode* stream); 259 BPlusTree(); 260 ~BPlusTree(); 261 262#if !_BOOT_MODE 263 status_t SetTo(Transaction& transaction, Inode* stream, 264 int32 nodeSize = BPLUSTREE_NODE_SIZE); 265#endif 266 status_t SetTo(Inode* stream); 267 status_t SetStream(Inode* stream); 268 269 status_t InitCheck(); 270 271 size_t NodeSize() const { return fNodeSize; } 272 Inode* Stream() const { return fStream; } 273 274#if !_BOOT_MODE 275 status_t Validate(bool repair, bool& _errorsFound); 276 status_t MakeEmpty(); 277 278 status_t Remove(Transaction& transaction, 279 const uint8* key, uint16 keyLength, 280 off_t value); 281 status_t Insert(Transaction& transaction, 282 const uint8* key, uint16 keyLength, 283 off_t value); 284 285 status_t Remove(Transaction& transaction, const char* key, 286 off_t value); 287 status_t Insert(Transaction& transaction, const char* key, 288 off_t value); 289 status_t Insert(Transaction& transaction, int32 key, 290 off_t value); 291 status_t Insert(Transaction& transaction, uint32 key, 292 off_t value); 293 status_t Insert(Transaction& transaction, int64 key, 294 off_t value); 295 status_t Insert(Transaction& transaction, uint64 key, 296 off_t value); 297 status_t Insert(Transaction& transaction, float key, 298 off_t value); 299 status_t Insert(Transaction& transaction, double key, 300 off_t value); 301 302 status_t Replace(Transaction& transaction, 303 const uint8* key, uint16 keyLength, 304 off_t value); 305#endif // !_BOOT_MODE 306 307 status_t Find(const uint8* key, uint16 keyLength, 308 off_t* value); 309 310#if !_BOOT_MODE 311 static int32 TypeCodeToKeyType(type_code code); 312 static int32 ModeToKeyType(mode_t mode); 313 314protected: 315 virtual void TransactionDone(bool success); 316 virtual void RemovedFromTransaction(); 317#endif // !_BOOT_MODE 318 319private: 320 BPlusTree(const BPlusTree& other); 321 BPlusTree& operator=(const BPlusTree& other); 322 // no implementation 323 324 int32 _CompareKeys(const void* key1, int keylength1, 325 const void* key2, int keylength2); 326 status_t _FindKey(const bplustree_node* node, 327 const uint8* key, uint16 keyLength, 328 uint16* index = NULL, off_t* next = NULL); 329#if !_BOOT_MODE 330 status_t _SeekDown(Stack<node_and_key>& stack, 331 const uint8* key, uint16 keyLength); 332 333 status_t _FindFreeDuplicateFragment( 334 Transaction& transaction, 335 const bplustree_node* node, 336 CachedNode& cached, off_t* _offset, 337 bplustree_node** _fragment, uint32* _index); 338 status_t _InsertDuplicate(Transaction& transaction, 339 CachedNode& cached, 340 const bplustree_node* node, uint16 index, 341 off_t value); 342 void _InsertKey(bplustree_node* node, uint16 index, 343 uint8* key, uint16 keyLength, off_t value); 344 status_t _SplitNode(bplustree_node* node, 345 off_t nodeOffset, bplustree_node* other, 346 off_t otherOffset, uint16* _keyIndex, 347 uint8* key, uint16* _keyLength, 348 off_t* _value); 349 350 status_t _RemoveDuplicate(Transaction& transaction, 351 const bplustree_node* node, 352 CachedNode& cached, uint16 keyIndex, 353 off_t value); 354 void _RemoveKey(bplustree_node* node, uint16 index); 355 356 void _UpdateIterators(off_t offset, off_t nextOffset, 357 uint16 keyIndex, uint16 splitAt, 358 int8 change); 359 void _AddIterator(TreeIterator* iterator); 360 void _RemoveIterator(TreeIterator* iterator); 361 362 status_t _ValidateChildren(TreeCheck& check, 363 uint32 level, off_t offset, 364 const uint8* largestKey, uint16 keyLength, 365 const bplustree_node* parent); 366 status_t _ValidateChild(TreeCheck& check, 367 CachedNode& cached, uint32 level, 368 off_t offset, off_t lastOffset, 369 off_t nextOffset, const uint8* key, 370 uint16 keyLength); 371#endif // !_BOOT_MODE 372 373private: 374 friend class TreeIterator; 375 friend class CachedNode; 376 friend struct TreeCheck; 377 378 Inode* fStream; 379 bplustree_header fHeader; 380 int32 fNodeSize; 381 bool fAllowDuplicates; 382 bool fInTransaction; 383 status_t fStatus; 384 385#if !_BOOT_MODE 386 mutex fIteratorLock; 387 SinglyLinkedList<TreeIterator> fIterators; 388#endif 389}; 390 391 392// #pragma mark - helper classes/functions 393 394 395class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 396public: 397 TreeIterator(BPlusTree* tree); 398 ~TreeIterator(); 399 400 status_t Goto(int8 to); 401 status_t Traverse(int8 direction, void* key, 402 uint16* keyLength, uint16 maxLength, 403 off_t* value, uint16* duplicate = NULL); 404 status_t Find(const uint8* key, uint16 keyLength); 405 406 status_t Rewind(); 407 status_t GetNextEntry(void* key, uint16* keyLength, 408 uint16 maxLength, off_t* value, 409 uint16* duplicate = NULL); 410 status_t GetPreviousEntry(void* key, uint16* keyLength, 411 uint16 maxLength, off_t* value, 412 uint16* duplicate = NULL); 413 void SkipDuplicates(); 414 415 BPlusTree* Tree() const { return fTree; } 416 417#ifdef DEBUG 418 void Dump(); 419#endif 420 421private: 422 friend class BPlusTree; 423 424 // called by BPlusTree 425 void Update(off_t offset, off_t nextOffset, 426 uint16 keyIndex, uint16 splitAt, 427 int8 change); 428 void Stop(); 429 430private: 431 BPlusTree* fTree; 432 off_t fCurrentNodeOffset; 433 // traverse position 434 int32 fCurrentKey; 435 off_t fDuplicateNode; 436 uint16 fDuplicate; 437 uint16 fNumDuplicates; 438 bool fIsFragment; 439}; 440 441 442// #pragma mark - BPlusTree's inline functions 443// (most of them may not be needed) 444 445 446#if !_BOOT_MODE 447inline status_t 448BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 449{ 450 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 451 return B_BAD_TYPE; 452 return Remove(transaction, (uint8*)key, strlen(key), value); 453} 454 455 456inline status_t 457BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 458{ 459 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 460 return B_BAD_TYPE; 461 return Insert(transaction, (uint8*)key, strlen(key), value); 462} 463 464 465inline status_t 466BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 467{ 468 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 469 return B_BAD_TYPE; 470 return Insert(transaction, (uint8*)&key, sizeof(key), value); 471} 472 473 474inline status_t 475BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 476{ 477 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 478 return B_BAD_TYPE; 479 return Insert(transaction, (uint8*)&key, sizeof(key), value); 480} 481 482 483inline status_t 484BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 485{ 486 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 487 return B_BAD_TYPE; 488 return Insert(transaction, (uint8*)&key, sizeof(key), value); 489} 490 491 492inline status_t 493BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 494{ 495 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 496 return B_BAD_TYPE; 497 return Insert(transaction, (uint8*)&key, sizeof(key), value); 498} 499 500 501inline status_t 502BPlusTree::Insert(Transaction& transaction, float key, off_t value) 503{ 504 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 505 return B_BAD_TYPE; 506 return Insert(transaction, (uint8*)&key, sizeof(key), value); 507} 508 509 510inline status_t 511BPlusTree::Insert(Transaction& transaction, double key, off_t value) 512{ 513 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 514 return B_BAD_TYPE; 515 return Insert(transaction, (uint8*)&key, sizeof(key), value); 516} 517#endif // !_BOOT_MODE 518 519 520// #pragma mark - TreeIterator inline functions 521 522 523inline status_t 524TreeIterator::Rewind() 525{ 526 return Goto(BPLUSTREE_BEGIN); 527} 528 529 530inline status_t 531TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 532 off_t* value, uint16* duplicate) 533{ 534 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 535 duplicate); 536} 537 538 539inline status_t 540TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 541 off_t* value, uint16* duplicate) 542{ 543 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 544 duplicate); 545} 546 547 548// #pragma mark - bplustree_header inline functions 549 550 551inline bool 552bplustree_header::CheckNode(const bplustree_node* node) const 553{ 554 // sanity checks (links, all_key_count) 555 return IsValidLink(node->LeftLink()) 556 && IsValidLink(node->RightLink()) 557 && IsValidLink(node->OverflowLink()) 558 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 559 <= (int8*)node + NodeSize(); 560} 561 562 563inline bool 564bplustree_header::IsValidLink(off_t link) const 565{ 566 return link == BPLUSTREE_NULL 567 || (link > 0 && link <= MaximumSize() - NodeSize()); 568} 569 570 571// #pragma mark - bplustree_node inline functions 572 573 574inline Unaligned<uint16>* 575bplustree_node::KeyLengths() const 576{ 577 return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node) 578 + AllKeyLength())); 579} 580 581 582inline Unaligned<off_t>* 583bplustree_node::Values() const 584{ 585 return (Unaligned<off_t>*)( 586 (char*)KeyLengths() + NumKeys() * sizeof(uint16)); 587} 588 589 590inline uint8* 591bplustree_node::Keys() const 592{ 593 return (uint8*)this + sizeof(bplustree_node); 594} 595 596 597inline int32 598bplustree_node::Used() const 599{ 600 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 601 * (sizeof(uint16) + sizeof(off_t)); 602} 603 604 605inline bool 606bplustree_node::IsLeaf() const 607{ 608 return OverflowLink() == BPLUSTREE_NULL; 609} 610 611 612inline duplicate_array* 613bplustree_node::FragmentAt(int8 index) const 614{ 615 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 616} 617 618 619inline duplicate_array* 620bplustree_node::DuplicateArray() const 621{ 622 return (duplicate_array*)&overflow_link; 623} 624 625 626inline uint8 627bplustree_node::LinkType(off_t link) 628{ 629 return *(uint64*)&link >> 62; 630} 631 632 633inline off_t 634bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 635{ 636 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 637 | (fragmentIndex & 0x3ff); 638} 639 640 641inline bool 642bplustree_node::IsDuplicate(off_t link) 643{ 644 return (LinkType(link) 645 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 646} 647 648 649inline off_t 650bplustree_node::FragmentOffset(off_t link) 651{ 652 return link & 0x3ffffffffffffc00LL; 653} 654 655 656inline uint32 657bplustree_node::FragmentIndex(off_t link) 658{ 659 return (uint32)(link & 0x3ff); 660} 661 662 663inline uint32 664bplustree_node::MaxFragments(uint32 nodeSize) 665{ 666 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 667} 668 669 670#if _BOOT_MODE 671} // namespace BFS 672 673#undef Inode 674#endif 675 676#endif // B_PLUS_TREE_H 677