1/* Copyright (C) 2012-2013 2 Free Software Foundation 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14 License for more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License 21 and a copy of the GCC Runtime Library Exception along with this 22 program; see the files COPYING3 and COPYING.RUNTIME respectively. 23 If not, see <http://www.gnu.org/licenses/>. */ 24 25#ifndef _VTV_SET_H 26#define _VTV_SET_H 1 27 28/* Code in this file manages a collection of insert-only sets. We 29 have only tested the case where Key is uintptr_t, though it 30 theoretically should work for some other cases. All odd keys are 31 reserved, and must not be inserted into any of the sets. This code 32 is intended primarily for sets of pointers, and the code is 33 optimized for small sets (including size 0 and 1), but regardless 34 of the set size, insert() and contains() have close to O(1) speed 35 in practice. 36 37 TODO(gpike): fix this comment. 38 39 Recommended multithreaded use of a set: 40 41 For speed, we want to use a lock-free test for set membership. The 42 code handles simultaneous reads and inserts, as long as at most one 43 insertion is in progress at a time. After an insert, other threads 44 may not immediately "see" the inserted key if they perform a 45 lock-free read, so we recommend retrying, as explained below. 46 47 Also, to make data corruption less likely, we recommend using a 48 "normal" RW page as well as one or pages that are typically RO 49 but that can be switched to RW and back as needed. The latter 50 pages should contain sets. The former should contain a lock, L, 51 and an int or similar, num_writers. Then, to insert, something 52 like this would be safe: 53 o Acquire L. 54 o Increment num_writers; if that made it 1, change pages to RW. 55 o Release L. 56 o while (there are insertions to do in some set, S) { 57 acquire L; 58 do some insertions in S; 59 release L; 60 } 61 o Acquire L. 62 o Decrement num_writers; if that made it 0, change pages to RO. 63 o Release L. 64 65 And to check if the set contains some key, one could use 66 set.contains(key) || 67 ({ Acquire L; bool b = set.contains(key); Release L; b; }) 68 69 In this scheme, the number of threads with reads in progress isn't 70 tracked, so old sets can never be deleted. In addition, on some 71 architectures the intentionally racy reads might cause contains() 72 to return true when it should have returned false. This should be 73 no problem on x86, and most other machines, where reading or 74 writing an aligned uintptr_t is atomic. E.g., on those machines, 75 if *p is 0 and one thread does *p = x while another reads *p, the 76 read will see either 0 or x. 77 78 To make the above easier, the insert_only_hash_sets class provides 79 an interface to manipulate any number of hash sets. One shouldn't 80 create objects of that class, as it has no member data and its 81 methods are static. 82 83 So the recommended model is to have a single lock, a single 84 num_writers variable, and some number of sets. If lock contention 85 becomes a problem then the sets can be divided into k groups, each 86 of which has a lock and a num_writers variable; or each set can be 87 represented as a set of values that equal 0 mod m, a set of values 88 that equal 1 mod m, ..., plus a set of values that equal m-1 mod m. 89 90 However, we expect most or all uses of this code to call contains() 91 much more frequently than anything else, so lock contention is 92 likely to be low. */ 93 94#include <algorithm> 95 96#ifndef HASHTABLE_STATS 97#define HASHTABLE_STATS 0 98#endif 99 100#ifndef HASHTABLE_STATS_ATOMIC 101#define HASHTABLE_STATS_ATOMIC 0 102#endif 103 104#if HASHTABLE_STATS 105#if HASHTABLE_STATS_ATOMIC 106/* Stat counters, with atomics. */ 107#include <bits/atomic_word.h> 108 109typedef _Atomic_word _AtomicStatCounter; 110 111void 112inc_by (_AtomicStatCounter &stat, int amount) 113{ 114 __atomic_add_fetch (&stat, amount, __ATOMIC_ACQ_REL); 115} 116 117#else 118 119/* Stat counters, but without atomics. */ 120typedef int _AtomicStatCounter; 121 122void 123inc_by (_AtomicStatCounter& stat, int amount) 124{ 125 stat += amount; 126} 127 128#endif 129 130 131/* Number of calls to contains(), insert(), etc. */ 132extern _AtomicStatCounter stat_insert; 133extern _AtomicStatCounter stat_contains; 134extern _AtomicStatCounter stat_resize; 135extern _AtomicStatCounter stat_create; 136 137/* Sum of set size over all calls to contains(). */ 138extern _AtomicStatCounter stat_contains_sizes; 139 140/* contains() calls in a set whose capacity is more than 1. */ 141extern _AtomicStatCounter stat_contains_in_non_trivial_set; 142 143/* Probes in a set whose capacity is more than 1. Ideally, this will 144 be pretty close to stat_contains_in_non_trivial_set. That will 145 happen if our hash function is good and/or important keys were 146 inserted before unimportant keys. */ 147extern _AtomicStatCounter stat_probes_in_non_trivial_set; 148 149/* number of calls to contains() with size=0, 1, etc. */ 150extern _AtomicStatCounter stat_contains_size0; 151extern _AtomicStatCounter stat_contains_size1; 152extern _AtomicStatCounter stat_contains_size2; 153extern _AtomicStatCounter stat_contains_size3; 154extern _AtomicStatCounter stat_contains_size4; 155extern _AtomicStatCounter stat_contains_size5; 156extern _AtomicStatCounter stat_contains_size6; 157extern _AtomicStatCounter stat_contains_size7; 158extern _AtomicStatCounter stat_contains_size8; 159extern _AtomicStatCounter stat_contains_size9; 160extern _AtomicStatCounter stat_contains_size10; 161extern _AtomicStatCounter stat_contains_size11; 162extern _AtomicStatCounter stat_contains_size12; 163extern _AtomicStatCounter stat_contains_size13_or_more; 164extern _AtomicStatCounter stat_grow_from_size0_to_1; 165extern _AtomicStatCounter stat_grow_from_size1_to_2; 166extern _AtomicStatCounter stat_double_the_number_of_buckets; 167extern _AtomicStatCounter stat_insert_key_that_was_already_present; 168 169/* Hash collisions detected during insert_no_resize(). Only counts 170 hasher(k) == hasher(k'); hasher(k) % tablesize == hasher(k') % 171 tablesize is not sufficient. Will count collisions that are 172 detected during table resizes etc., so the same two keys may add to 173 this stat multiple times. */ 174extern _AtomicStatCounter stat_insert_found_hash_collision; 175 176#include <string> 177 178struct insert_only_hash_sets_logger 179{ 180 static char * 181 log (char c, char *buf) 182 { 183 *buf++ = c; 184 return buf; 185 } 186 187 static char * 188 log (const char *s, char *buf) 189 { return strcpy (buf, s) + strlen (s); } 190 191 static char * 192 log (_AtomicStatCounter i, char *buf) 193 { 194 if (i < 10) 195 return log ((char) ('0' + i), buf); 196 else 197 return log ((char) ('0' + i % 10), log (i / 10, buf)); 198 } 199 200 static char * 201 log (const char *label, _AtomicStatCounter i, char *buf) 202 { 203 buf = log (label, buf); 204 buf = log (": ", buf); 205 buf = log (i, buf); 206 return log ('\n', buf); 207 } 208}; 209 210// Write stats to the given buffer, which should be at least 4000 bytes. 211static inline void 212insert_only_hash_tables_stats (char *buf) 213{ 214 buf = insert_only_hash_sets_logger::log ("insert", stat_insert, buf); 215 buf = insert_only_hash_sets_logger::log ("contains", stat_contains, buf); 216 buf = insert_only_hash_sets_logger::log ("resize", stat_resize, buf); 217 buf = insert_only_hash_sets_logger::log ("create", stat_create, buf); 218 buf = insert_only_hash_sets_logger::log ("insert_key_that_was_already_" 219 "present", 220 stat_insert_key_that_was_already_present, 221 buf); 222 buf = insert_only_hash_sets_logger::log ("contains_sizes", 223 stat_contains_sizes, buf); 224 buf = insert_only_hash_sets_logger::log ("contains_in_non_trivial_set", 225 stat_contains_in_non_trivial_set, 226 buf); 227 buf = insert_only_hash_sets_logger::log ("probes_in_non_trivial_set", 228 stat_probes_in_non_trivial_set, 229 buf); 230 buf = insert_only_hash_sets_logger::log ("contains_size0", 231 stat_contains_size0, buf); 232 buf = insert_only_hash_sets_logger::log ("contains_size1", 233 stat_contains_size1, buf); 234 buf = insert_only_hash_sets_logger::log ("contains_size2", 235 stat_contains_size2, buf); 236 buf = insert_only_hash_sets_logger::log ("contains_size3", 237 stat_contains_size3, buf); 238 buf = insert_only_hash_sets_logger::log ("contains_size4", 239 stat_contains_size4, buf); 240 buf = insert_only_hash_sets_logger::log ("contains_size5", 241 stat_contains_size5, buf); 242 buf = insert_only_hash_sets_logger::log ("contains_size6", 243 stat_contains_size6, buf); 244 buf = insert_only_hash_sets_logger::log ("contains_size7", 245 stat_contains_size7, buf); 246 buf = insert_only_hash_sets_logger::log ("contains_size8", 247 stat_contains_size8, buf); 248 buf = insert_only_hash_sets_logger::log ("contains_size9", 249 stat_contains_size9, buf); 250 buf = insert_only_hash_sets_logger::log ("contains_size10", 251 stat_contains_size10, buf); 252 buf = insert_only_hash_sets_logger::log ("contains_size11", 253 stat_contains_size11, buf); 254 buf = insert_only_hash_sets_logger::log ("contains_size12", 255 stat_contains_size12, buf); 256 buf = insert_only_hash_sets_logger::log ("contains_size13_or_more", 257 stat_contains_size13_or_more, buf); 258 buf = insert_only_hash_sets_logger::log ("grow_from_size0_to_1", 259 stat_grow_from_size0_to_1, buf); 260 buf = insert_only_hash_sets_logger::log ("grow_from_size1_to_2", 261 stat_grow_from_size1_to_2, buf); 262 buf = insert_only_hash_sets_logger::log ("insert_found_hash_collision", 263 stat_insert_found_hash_collision, 264 buf); 265 buf = insert_only_hash_sets_logger::log ("double_the_number_of_buckets", 266 stat_double_the_number_of_buckets, 267 buf); 268 *buf = '\0'; 269} 270 271#else 272 273/* No stats. */ 274#define inc_by(statname, amount) do { } while (false && (amount)) 275 276#endif 277 278#define inc(statname) inc_by (statname, 1) 279 280template <typename Key, class HashFcn, class Alloc> 281class insert_only_hash_sets 282{ 283 public: 284 typedef Key key_type; 285 typedef size_t size_type; 286 typedef Alloc alloc_type; 287 enum { illegal_key = 1 }; 288 enum { min_capacity = 4 }; 289#if HASHTABLE_STATS 290 enum { stats = true }; 291#else 292 enum { stats = false }; 293#endif 294 295 /* Do not directly use insert_only_hash_set. Instead, use the 296 static methods below to create and manipulate objects of the 297 following class. 298 299 Implementation details: each set is represented by a pointer 300 plus, perhaps, out-of-line data, which would be an object of type 301 insert_only_hash_set. For a pointer, s, the interpretation is: s 302 == NULL means empty set, lsb(s) == 1 means a set with one 303 element, which is (uintptr_t)s - 1, and otherwise s is a pointer 304 of type insert_only_hash_set*. So, to increase the size of a set 305 we have to change s and/or *s. To check if a set contains some 306 key we have to examine s and possibly *s. */ 307 class insert_only_hash_set 308 { 309 public: 310 /* Insert a key. The key must not be a reserved key. */ 311 static inline insert_only_hash_set *insert (key_type key, 312 insert_only_hash_set *s); 313 314 315 /* Create an empty set. */ 316 static inline insert_only_hash_set *create (size_type capacity); 317 318 /* Return whether the given key is present. If key is illegal_key 319 then either true or false may be returned, but for all other 320 reserved keys false will be returned. */ 321 static bool 322 contains (key_type key, const insert_only_hash_set *s) 323 { 324 if (stats) 325 { 326 inc (stat_contains); 327 switch (size (s)) 328 { 329 case 0: inc (stat_contains_size0); break; 330 case 1: inc (stat_contains_size1); break; 331 case 2: inc (stat_contains_size2); break; 332 case 3: inc (stat_contains_size3); break; 333 case 4: inc (stat_contains_size4); break; 334 case 5: inc (stat_contains_size5); break; 335 case 6: inc (stat_contains_size6); break; 336 case 7: inc (stat_contains_size7); break; 337 case 8: inc (stat_contains_size8); break; 338 case 9: inc (stat_contains_size9); break; 339 case 10: inc (stat_contains_size10); break; 340 case 11: inc (stat_contains_size11); break; 341 case 12: inc (stat_contains_size12); break; 342 default: inc (stat_contains_size13_or_more); break; 343 } 344 inc_by (stat_contains_sizes, size (s)); 345 } 346 347 return (singleton (s) ? 348 singleton_key (key) == s : 349 ((s != NULL) && s->contains (key))); 350 } 351 352 /* Return a set's size. */ 353 static size_type 354 size (const insert_only_hash_set *s) 355 { return (s == NULL) ? 0 : (singleton (s) ? 1 : s->num_entries); } 356 357 static inline insert_only_hash_set *resize (size_type target_num_buckets, 358 insert_only_hash_set *s); 359 360 361 private: 362 /* Return whether a set has size 1. */ 363 static bool 364 singleton (const insert_only_hash_set *s) 365 { return (uintptr_t) s & 1; } 366 367 /* Return the representation of a singleton set containing the 368 given key. */ 369 static insert_only_hash_set * 370 singleton_key (key_type key) 371 { return (insert_only_hash_set *) ((uintptr_t) key + 1); } 372 373 /* Given a singleton set, what key does it contain? */ 374 static key_type 375 extract_singleton_key (const insert_only_hash_set *s) 376 { 377 VTV_DEBUG_ASSERT (singleton (s)); 378 return (key_type) ((uintptr_t) s - 1); 379 } 380 381 volatile key_type & 382 key_at_index (size_type index) 383 { return buckets[index]; } 384 385 key_type 386 key_at_index (size_type index) const 387 { return buckets[index]; } 388 389 size_type 390 next_index (size_type index, size_type indices_examined) const 391 { return (index + indices_examined) & (num_buckets - 1); } 392 393 inline void insert_no_resize (key_type key); 394 395 inline bool contains (key_type key) const; 396 397 inline insert_only_hash_set *resize_if_necessary (void); 398 399 size_type num_buckets; /* Must be a power of 2 not less than 400 min_capacity. */ 401 volatile size_type num_entries; 402 volatile key_type buckets[0]; /* Actual array size is num_buckets. */ 403 }; 404 405 /* Create an empty set with the given capacity. Requires that n be 406 0 or a power of 2. If 1 < n < min_capacity then treat n as 407 min_capacity. Sets *handle. Returns true unless the allocator 408 fails. Subsequent operations on this set should use the same 409 handle. */ 410 411 static inline bool create (size_type n, insert_only_hash_set **handle); 412 413 /* Force the capacity of a set to be n, unless it was more than n 414 already. Requires that n be 0 or a power of 2. Sets *handle 415 unless the current capacity is n or more. Returns true unless 416 the allocator fails. */ 417 418 static inline bool resize (size_type n, insert_only_hash_set **handle); 419 420 /* Insert a key. *handle is unmodified unless (1) a resize occurs, 421 or (2) the set was initially empty. Returns true unless the 422 allocator fails during a resize. If the allocator fails during a 423 resize then the set is reset to be the empty set. The key must 424 not be a reserved key. */ 425 426 static inline bool insert (key_type key, insert_only_hash_set **handle); 427 428 /* Check for the presence of a key. If key is illegal_key then 429 either true or false may be returned, but for all other reserved 430 keys false will be returned. */ 431 432 static inline bool 433 contains (key_type key, /* const */ insert_only_hash_set **handle) 434 { return insert_only_hash_set::contains (key, *handle); } 435 436 /* Return the size of the given set. */ 437 static size_type 438 size (const insert_only_hash_set **handle) 439 { return insert_only_hash_set::size (*handle); } 440 441 static bool 442 is_reserved_key (key_type key) 443 { return ((uintptr_t) key % 2) == 1; } 444}; 445 446template <typename Key, class HashFcn, class Alloc> 447typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set * 448insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::resize 449 (size_type n, insert_only_hash_set *s) 450{ 451 if (s == NULL) 452 return create (n); 453 454 size_type capacity = singleton (s) ? 1 : s->num_buckets; 455 456 if (n <= capacity) 457 return s; 458 459 insert_only_hash_set *result = 460 create (std::max<size_type> (n, min_capacity)); 461 if (result != NULL) 462 { 463 if (singleton (s)) 464 { 465 result->insert_no_resize (extract_singleton_key (s)); 466 } 467 else 468 { 469 for (size_type i = 0; i < s->num_buckets; i++) 470 if (s->buckets[i] != (key_type) illegal_key) 471 result->insert_no_resize (s->buckets[i]); 472 } 473 VTV_DEBUG_ASSERT (size (result) == size (s)); 474 } 475 return result; 476} 477 478template <typename Key, class HashFcn, class Alloc> 479typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set * 480insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::insert 481 (key_type key, insert_only_hash_set *s) 482{ 483 VTV_DEBUG_ASSERT (!is_reserved_key (key)); 484 485 inc_by (stat_grow_from_size0_to_1, s == NULL); 486 487 if (s == NULL) 488 return singleton_key (key); 489 490 if (singleton (s)) 491 { 492 const key_type old_key = extract_singleton_key (s); 493 if (old_key == key) 494 return s; 495 496 /* Grow from size 1 to size 2. */ 497 inc (stat_grow_from_size1_to_2); 498 s = create (2); 499 if (s == NULL) 500 return NULL; 501 502 s->insert_no_resize (old_key); 503 s->insert_no_resize (key); 504 VTV_DEBUG_ASSERT (size (s) == 2); 505 return s; 506 } 507 s = s->resize_if_necessary(); 508 if (s != NULL) 509 s->insert_no_resize (key); 510 return s; 511} 512 513template <typename Key, class HashFcn, class Alloc> 514typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set * 515insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::create 516 (size_type capacity) 517{ 518 if (capacity <= 1) 519 return NULL; 520 521 VTV_DEBUG_ASSERT (capacity > 1 && (capacity & (capacity - 1)) == 0); 522 VTV_DEBUG_ASSERT (sizeof (insert_only_hash_set) == 2 * sizeof (size_type)); 523 capacity = std::max <size_type> (capacity, min_capacity); 524 const size_t num_bytes = sizeof (insert_only_hash_set) + 525 sizeof (key_type) * capacity; 526 alloc_type alloc; 527 insert_only_hash_set *result = (insert_only_hash_set *) alloc (num_bytes); 528 result->num_buckets = capacity; 529 result->num_entries = 0; 530 for (size_type i = 0; i < capacity; i++) 531 result->buckets[i] = (key_type) illegal_key; 532 return result; 533} 534 535template <typename Key, class HashFcn, class Alloc> 536void 537insert_only_hash_sets<Key, HashFcn, 538 Alloc>::insert_only_hash_set::insert_no_resize 539 (key_type key) 540{ 541 HashFcn hasher; 542 const size_type capacity = num_buckets; 543 VTV_DEBUG_ASSERT (capacity >= min_capacity); 544 VTV_DEBUG_ASSERT (!is_reserved_key (key)); 545 size_type index = hasher (key) & (capacity - 1); 546 key_type k = key_at_index (index); 547 size_type indices_examined = 0; 548 while (k != key) 549 { 550 ++indices_examined; 551 if (k == (key_type) illegal_key) 552 { 553 key_at_index (index) = key; 554 ++num_entries; 555 return; 556 } 557 else 558 { 559 inc_by (stat_insert_found_hash_collision, 560 hasher (k) == hasher (key)); 561 } 562 VTV_DEBUG_ASSERT (indices_examined < capacity); 563 index = next_index (index, indices_examined); 564 k = key_at_index (index); 565 } 566} 567 568template<typename Key, class HashFcn, class Alloc> 569bool 570insert_only_hash_sets<Key, HashFcn, Alloc>::insert_only_hash_set::contains 571 (key_type key) const 572{ 573 inc (stat_contains_in_non_trivial_set); 574 HashFcn hasher; 575 const size_type capacity = num_buckets; 576 size_type index = hasher (key) & (capacity - 1); 577 key_type k = key_at_index (index); 578 size_type indices_examined = 0; 579 inc (stat_probes_in_non_trivial_set); 580 while (k != key) 581 { 582 ++indices_examined; 583 if (/*UNLIKELY*/(k == (key_type) illegal_key 584 || indices_examined == capacity)) 585 return false; 586 587 index = next_index (index, indices_examined); 588 k = key_at_index (index); 589 inc (stat_probes_in_non_trivial_set); 590 } 591 return true; 592} 593 594template <typename Key, class HashFcn, class Alloc> 595typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set * 596 insert_only_hash_sets<Key, HashFcn, 597 Alloc>::insert_only_hash_set::resize_if_necessary (void) 598{ 599 VTV_DEBUG_ASSERT (num_buckets >= min_capacity); 600 size_type unused = num_buckets - num_entries; 601 if (unused < (num_buckets >> 2)) 602 { 603 inc (stat_double_the_number_of_buckets); 604 size_type new_num_buckets = num_buckets * 2; 605 insert_only_hash_set *s = create (new_num_buckets); 606 for (size_type i = 0; i < num_buckets; i++) 607 if (buckets[i] != (key_type) illegal_key) 608 s->insert_no_resize (buckets[i]); 609 VTV_DEBUG_ASSERT (size (this) == size (s)); 610 return s; 611 } 612 else 613 return this; 614} 615 616template<typename Key, class HashFcn, class Alloc> 617bool 618insert_only_hash_sets<Key, HashFcn, Alloc>::create (size_type n, 619 insert_only_hash_set **handle) 620 621{ 622 inc (stat_create); 623 *handle = insert_only_hash_set::create (n); 624 return (n <= 1) || (*handle != NULL); 625} 626 627template<typename Key, class HashFcn, class Alloc> 628bool 629insert_only_hash_sets<Key, HashFcn, Alloc>::resize (size_type n, 630 insert_only_hash_set **handle) 631{ 632 inc (stat_resize); 633 *handle = insert_only_hash_set::resize (n, *handle); 634 return (n <= 1) || (*handle != NULL); 635} 636 637template<typename Key, class HashFcn, class Alloc> 638bool 639insert_only_hash_sets<Key, HashFcn, Alloc>::insert (key_type key, 640 insert_only_hash_set **handle) 641{ 642 inc (stat_insert); 643 const size_type old_size = insert_only_hash_set::size (*handle); 644 *handle = insert_only_hash_set::insert (key, *handle); 645 if (*handle != NULL) 646 { 647 const size_type delta = insert_only_hash_set::size (*handle) - old_size; 648 inc_by (stat_insert_key_that_was_already_present, delta == 0); 649 } 650 return *handle != NULL; 651} 652 653#endif /* VTV_SET_H */ 654