1/** 2 * \file 3 * \brief header specifying the interface of libnuma 4 * 5 * This is derived from: 6 * 7 * Linux man pages "numa" 8 * libnuma from http://oss.sgi.com/projects/libnuma/ 9 * 10 */ 11 12/* 13 * Copyright (c) 2014, ETH Zurich. 14 * All rights reserved. 15 * 16 * This file is distributed under the terms in the attached LICENSE file. 17 * If you do not find this file, copies can be found by writing to: 18 * ETH Zurich D-INFK, CAB F.78, Universitaetstrasse 6, CH-8092 Zurich. 19 * Attn: Systems Group. 20 */ 21 22#ifndef __NUMA_H 23#define __NUMA_H 1 24 25#ifdef __cplusplus 26extern "C" { 27#endif 28 29///< the maximum number of nodes supported 30#define NUMA_MAX_NUMNODES 16 31 32#if NUMA_MAX_NUMNODES > MAX_NODEID 33#error maximum node bigger than maximum nodeid 34#endif 35 36///< specify the local node for allocation 37#define NUMA_NODE_LOCAL ((nodeid_t)-1) 38 39///< error value for the numa node size 40#define NUMA_NODE_INVALID ((uintptr_t)-1) 41 42///< error value for invalid cores 43#define NUMA_CORE_INVALID ((coreid_t)-1); 44 45 46typedef enum numa_policy { 47 NUMA_POLICY_DEFAULT, ///< default numa policy 48 NUMA_POLICY_STRICT, ///< strict numa policy 49 NUMA_POLICY_PREFERRED ///< preferred memory policy 50} numa_policy_t; 51 52///< typedef for the nodemask 53typedef struct bitmask nodemask_t; 54 55/** 56 * \brief checks if numa support is available 57 * 58 * \returns NUMA_ERR_NOT_AVAILABLE value all other functions are undefined 59 * SYS_ERR_OK: NUMA functionality is available 60 * 61 * this function must be called before any of the other functions of libnuma. 62 * during the call to numa_available the library also gets initialized 63 */ 64errval_t numa_available(void); 65 66/** 67 * \brief returns the highest node number available on the current system. 68 * 69 * \returns ID of the max NUMA node 70 */ 71nodeid_t numa_max_node(void); 72 73/** 74 * \brief returns the highest ID of the present cores 75 * 76 * \returns the maximum coreID in the system 77 */ 78coreid_t numa_max_core(void); 79 80/** 81 * \brief returns the current node the domain is running on 82 * 83 * \return ID of the current node 84 */ 85nodeid_t numa_current_node(void); 86 87/** 88 * \brief returns the size of the node mask 89 * 90 * \return size of the node mask 91 */ 92nodeid_t numa_num_possible_nodes(void); 93 94/** 95 * \brief Obtains the maximum number of nodes the system can handle 96 * 97 * \return maximum nodes supported 98 * 99 * returns the number of the highest possible node in a system. In other words, 100 * the size of a kernel type nodemask_t (in bits) minus 1 101 */ 102static inline nodeid_t numa_max_possible_node(void) 103{ 104 return numa_num_possible_nodes() - 1; 105} 106 107/** 108 * \brief Obtains the number of all memory nodes in the system 109 * 110 * \return number of memory nodes in the system 111 * 112 * returns the number of memory nodes in the system. This count includes any nodes 113 * that are currently disabled. 114 */ 115nodeid_t numa_num_configured_nodes(void); 116 117/** 118 * \brief obtains the nodes the domain is allowed to allocate memory from 119 * 120 * \returns bitmask representing the allowing nodes 121 * 122 * returns the mask of nodes from which the process is allowed to allocate memory 123 * in it's current cpuset context. 124 */ 125struct bitmap *numa_get_mems_allowed(void); 126 127/** 128 * \brief returns the total numberof CPUs in the system 129 * 130 * \returns total number of CPUs in the system 131 * 132 * returns the number of cpus in the system. This count includes any cpus that are 133 * currently disabled. 134 */ 135coreid_t numa_num_configured_cpus(void); 136 137/** 138 * \brief bitmask that is allocated by the library with bits representing all nodes 139 * on which the calling task may allocate memory. 140 */ 141extern struct bitmap *numa_all_nodes_ptr; 142 143/** 144 * \brief points to a bitmask that is allocated by the library and left all zeroes. 145 */ 146extern struct bitmap *numa_no_nodes_ptr; 147 148/** 149 * \brief points to a bitmask that is allocated by the library with bits 150 * representing all cpus on which the calling task may execute. 151 */ 152extern struct bitmap *numa_all_cpus_ptr; 153 154/** 155 * \brief returns the number of cpus that the calling domain is allowed to use. 156 * 157 * \returns number of CPUs the domain is allowed to use 158 */ 159coreid_t numa_num_task_cpus(void); 160 161/** 162 * \brief returns the number of nodes on which the calling domain is allowed to 163 * allocate memory 164 * 165 * \returns number of nodes the domain is allowed to use 166 */ 167nodeid_t numa_num_task_nodes(void); 168 169/** 170 * \brief parses line , which is a character string 171 * 172 * \param line character string to parse 173 * \param mask bitmap to store the result 174 * 175 * \returns SYS_ERR_OK on SUCCESS 176 * NUMA_ERR_BITMAP_PARSE on FAILURE 177 * 178 * The string contains the hexadecimal representation of a bit map. 179 * 180 * XXX according to the man pages this function is only used internally 181 */ 182errval_t numa_parse_bitmap(char *line, struct bitmap *mask); 183 184/** 185 * \brief parses a character string list of nodes into a bit mask. 186 * 187 * \param string character string to parse 188 * 189 * \returns NUMA bitmask on SUCCESS 190 * NULL if the string is invalid 191 * 192 * The string is a comma-separated list of node numbers or node ranges 193 * Examples: 1-5,7,10 !4-5 +0-3 194 * 195 * If the string length is zero, then the numa_no_nodes_ptr is returned 196 */ 197struct bitmap *numa_parse_nodestring(char *string); 198 199/** 200 * \brief parses a character string list of cpus into a bit mask. 201 * 202 * \param string character string to parse 203 * 204 * \returns NUMA bitmask on SUCCESS 205 * NULL if the string is invalid 206 * 207 * The string is a comma-separated list of cpu numbers or cpu ranges 208 * Examples: 1-5,7,10 !4-5 +0-3 209 */ 210struct bitmap *numa_parse_cpustring(char *string); 211 212/** 213 * \brief obtains the size of a node 214 * 215 * \param node ID of the NUMA node 216 * \param freep 217 * 218 * \returns size of the node in bytes 219 * 220 * returns the memory size of a node. If the argument freep is not NULL, it used 221 * to return the amount of free memory on the node. On error it returns 222 * NUMA_NODE_INVALID 223 */ 224size_t numa_node_size(nodeid_t node, uintptr_t *freep); 225 226///< alias for NUMA node size 64bit variants 227#define numa_node_size64(_node, _freep) numa_node_size(_node, _freep) 228 229/** 230 * \brief obtains the base address of the numa node 231 * 232 * \returns physical address of the start of the numa node 233 */ 234lpaddr_t numa_node_base(nodeid_t node); 235 236/** 237 * \brief returns the preferred node of the current task. 238 * 239 * \returns node ID where memory is preferably allocated 240 */ 241nodeid_t numa_preferred(void); 242 243/** 244 * \brief sets the preferred node for the current task to node 245 * 246 * \param node ID of the node to set preferred 247 * 248 * The system will attempt to allocate memory from the preferred node, but will 249 * fall back to other nodes if no memory is available on the the preferred node 250 * 251 * Passing a node of -1 argument specifies local allocation 252 */ 253void numa_set_preferred(nodeid_t node); 254 255/** 256 * \brief returns the current interleave mask 257 * 258 * \returns bitmask representing the current interleave state 259 * 260 * returns the current interleave mask if the task's memory allocation policy is 261 * page interleaved. Otherwise, this function returns an empty mask. 262 */ 263struct bitmap *numa_get_interleave_mask(void); 264 265/** 266 * \brief sets the memory interleave mask for the current task to nodemask 267 * 268 * \param nodemask bitmask representing the nodes 269 * 270 * All new memory allocations are page interleaved over all nodes in the interleave 271 * mask. Interleaving can be turned off again by passing an empty mask. 272 * 273 * This bitmask is considered to be a hint. Fallback to other nodes may be possible 274 */ 275void numa_set_interleave_mask(struct bitmap *nodemask); 276 277/** 278 * \brief binds the current task and its children to the nodes specified in nodemask. 279 * 280 * \param nodemask bitmap representing the nodes 281 */ 282void numa_bind(struct bitmap *nodemask); 283 284/** 285 * \brief sets the memory allocation policy for the calling task to local allocation. 286 */ 287void numa_set_localalloc(void); 288 289/** 290 * \brief sets the memory allocation mask. 291 * 292 * \param nodemask bitmap representing the nodes 293 * 294 * The task will only allocate memory from the nodes set in nodemask. 295 * 296 * an empty mask or not allowed nodes in the mask will result in an error 297 */ 298errval_t numa_set_membind(struct bitmap *nodemask); 299 300/** 301 * \brief returns the mask of nodes from which memory can currently be allocated. 302 * 303 * \return bitmap of nodes from which can be allocated 304 */ 305struct bitmap *numa_get_membind(void); 306 307/** 308 * \brief allocates memory on a specific node. 309 * 310 * \param size size of the region in bytes 311 * \param node ID of the node to allocate from 312 * \param pagesize page size to be used for the mapping 313 * 314 * \returns pointer to memory region 315 * 316 * The size argument will be rounded up to a multiple of the system page size. 317 * if the specified node is externally denied to this process, this call will fail. 318 * The memory must be freed with numa_free(). On errors NULL is returned. 319 */ 320void *numa_alloc_onnode(size_t size, nodeid_t node, size_t pagesize); 321 322/** 323 * \brief allocates size bytes of memory on the local node 324 * 325 * \param size size of the memory region in bytes 326 * \param pagesize page size to be used for the mapping 327 * 328 * \returns pointer to memory region 329 * 330 * The memory must be freed with numa_free(). On errors NULL is returned. 331 */ 332void *numa_alloc_local(size_t size, size_t pagesize); 333 334/** 335 * \brief allocates size bytes of memory page interleaved on all nodes. 336 * 337 * \param size size of the memory region in bytes 338 * \param pagesize page size to be used for the mapping 339 * 340 * \returns pointer to the mapped memory region 341 * 342 * should only be used for large areas consisting of multiple pages. 343 * The memory must be freed with numa_free(). On errors NULL is returned. 344 */ 345void *numa_alloc_interleaved(size_t size, size_t pagesize); 346 347/** 348 * \brief allocates size bytes of memory page interleaved the nodes specified in 349 * the nodemask. 350 * 351 * \param size size of the memory region in bytes 352 * \param nodemask subset of nodes to consider for allocation 353 * \param pagesize page size to be used for the mapping 354 * 355 * \returns pointer to the mapped memory region 356 * 357 * should only be used for large areas consisting of multiple pages. 358 * The memory must be freed with numa_free(). On errors NULL is returned. 359 */ 360void *numa_alloc_interleaved_subset(size_t size, size_t pagesize, 361 struct bitmap *nodemask); 362 363/** 364 * \brief allocates size bytes of memory with the current NUMA policy. 365 * 366 * \param size size of the memory region in bytes 367 * \param pagesize page size to be used for the mapping 368 * \returns pointer to the mapped memory region 369 * 370 * The memory must be freed with numa_free(). On errors NULL is returned. 371 */ 372void *numa_alloc(size_t size, size_t pagesize); 373 374/** 375 * \brief changes the size of the memory area. 376 * 377 * \param old_addr pointer ot the old memory region 378 * \param old_size size of the old memory region 379 * \param new_size new size to allocate 380 */ 381void *numa_realloc(void *old_addr, size_t old_size, size_t new_size); 382 383/** 384 * \brief frees size bytes of memory starting at start 385 * 386 * \param start start of the memory region 387 * \param size number of bytes to free 388 * 389 * the memory must be previously allocated by one of the numa_alloc* functions 390 */ 391void numa_free(void *start, size_t size); 392 393/** 394 * \brief allocates RAM on a specific node 395 * 396 * \param dest capref to store the RAM cap 397 * \param size size of the RAM region to allocated 398 * \param node node on which the frame should be allocated 399 * \param ret_size returned size of the frame capability 400 * 401 * \returns SYS_ERR_OK on SUCCESS 402 * errval on FAILURE 403 */ 404errval_t numa_ram_alloc_on_node(struct capref *dest, size_t size, 405 nodeid_t node, size_t *ret_size); 406 407/** 408 * \brief allocates a frame on a specific node 409 * 410 * \param dest capref to store the frame 411 * \param size size of the frame to allocated 412 * \param node node on which the frame should be allocated 413 * \param ret_size returned size of the frame capability 414 * 415 * \returns SYS_ERR_OK on SUCCESS 416 * errval on FAILURE 417 */ 418errval_t numa_frame_alloc_on_node(struct capref *dest, 419 size_t size, 420 nodeid_t node, 421 size_t *ret_size); 422 423/** 424 * \brief allocates a frame on the local node 425 * 426 * \param dest capref to store the frame 427 * \param size size of the frame to allocated 428 * \param ret_size returned size of the frame capability 429 * 430 * \returns SYS_ERR_OK on SUCCESS 431 * errval on FAILURE 432 */ 433static inline errval_t numa_frame_alloc_local(struct capref *dest, 434 size_t size, 435 size_t *ret_size) 436{ 437 return numa_frame_alloc_on_node(dest, size, numa_current_node(), ret_size); 438} 439 440/** 441 * \brief frees a previously allocated frame 442 * 443 * \param frame capability to free 444 */ 445errval_t numa_frame_free(struct capref frame); 446 447/** 448 * \brief runs the current domain on a specific node. 449 * 450 * \param node ID of the node to run the domain on 451 * 452 * \returns SYS_ERR_OK on SUCCESS 453 * errval on FAILURE 454 * 455 * Passing -1 permits the kernel to schedule on all nodes again 456 */ 457errval_t numa_run_on_node(nodeid_t node); 458 459/** 460 * \brief runs the current domain only on nodes specified in nodemask. 461 * 462 * \param nodemask bitmap representing the nodes to run the domain on 463 * 464 * \returns SYS_ERR_OK on SUCCESS 465 * errval on FAILURE 466 */ 467errval_t numa_run_on_node_mask(struct bitmap *nodemask); 468 469/** 470 * \brief returns a mask of CPUs on which the current task is allowed to run. 471 * 472 * \returns bitmap represening the coreids the domain is allowed to run 473 */ 474struct bitmap *numa_get_run_node_mask(void); 475 476/** 477 * \brief specify the memory bind policy 478 * 479 * \param strict numa policy to apply 480 * 481 * specifies whether calls that bind memory to a specific node should use the preferred policy or a strict policy. 482 */ 483void numa_set_bind_policy(numa_policy_t strict); 484 485/** 486 * \brief enable or disable the strict allocation policy 487 * 488 * \param strict numa policy to apply 489 * 490 * s a flag that says whether the functions allocating on specific nodes should 491 * use a strict policy. Strict means the allocation will fail if the memory cannot 492 * be allocated on the target node. 493 */ 494void numa_set_strict(numa_policy_t strict); 495 496/** 497 * \brief reports the distance in the machine topology between two nodes 498 * 499 * \param from source node to measure the distance 500 * \param to target node to measure the distance 501 * 502 * \returns distance between two nodes 503 * 0 iff cannot be deterimed 504 * 505 * The factors are a multiple of 10. A node has distance 10 to itself. 506 */ 507uint32_t numa_distance(nodeid_t from, nodeid_t to); 508 509/** 510 * \brief retrieves a bitmask of the cpus on which a domain may run 511 * 512 * \param did domain ID 513 * \param mask returned bitmask 514 * 515 * \returns SYS_ERR_OK on success 516 * errval on FAILURE 517 */ 518errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask); 519 520/** 521 * \brief sets a domain's allowed cpu's to those cpu's specified in mask. 522 * 523 * \param did domain ID 524 * \param mask bitmap representing the CPUs 525 * 526 * \returns SYS_ERR_OK on success 527 * errval on FAILURE 528 */ 529errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask); 530 531/** 532 * \brief returns the page size 533 * 534 * \returns the number of bytes in a page 535 */ 536size_t numa_pagesize(void); 537 538/** 539 * \brief converts a node number to a bitmask of CPUs 540 * 541 * \param node the ID of the node 542 * \param mask bitmap representing the CPUs of this node 543 * 544 * \return SYS_ERR_OK on SUCCESS 545 * NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap) 546 * 547 * The user must pass a bitmask structure with a mask buffer long enough to 548 * represent all possible cpu's 549 */ 550errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask); 551 552 553/** 554 * \brief gets the number of cores for the given numa node 555 * 556 * \param node NUMA node to get the number of cores 557 * 558 * \returns number of cores for the node 559 */ 560coreid_t numa_num_node_cpus(nodeid_t node); 561 562/** 563 * \brief gets the system's core ID for a node/local core id configuration 564 * 565 * \param 566 */ 567coreid_t numa_node_get_core(nodeid_t node, coreid_t local_core_id); 568 569 570/** 571 * \brief returns the node that a cpu belongs to 572 * 573 * \param cpu ID of the core 574 * 575 * \returns node ID on SUCCESS 576 * NUMA_NODE_INVALID on FAILURE 577 */ 578nodeid_t numa_node_of_cpu(coreid_t cpu); 579 580/** 581 * \brief allocates a bit mask to represent the cores in the system 582 * 583 * \returns pointer to a new bitmask 584 * NULL on failure 585 */ 586struct bitmap *numa_allocate_cpumask(void); 587 588/** 589 * \brief frees a previously allocated CPU bitmask 590 * 591 * \param cpumask pointer to a previously allocated CPU bitmask 592 */ 593void numa_free_cpumask(struct bitmap *cpumask); 594 595/** 596 * \brief allocates a bit mask to represent the nodes in the system 597 * 598 * \returns pointer to a new bitmask 599 * NULL on failure 600 */ 601struct bitmap *numa_allocate_nodemask(void); 602 603/** 604 * \brief frees a previously allocated node bitmask 605 * 606 * \param nodemask pointer to a previously allocated node bitmask 607 */ 608void numa_free_nodemask(struct bitmap *nodemask); 609 610/** 611 * \brief allocates a bitmask structure and its associated bit mask 612 * 613 * \param n the number of bits 614 * 615 * \returns pointer to the bitmask 616 * NULL on error 617 */ 618struct bitmap *numa_bitmask_alloc(unsigned int n); 619 620/** 621 * \brief sets all bits in the bit mask to 0. 622 * 623 * \param bmp pointer to the bitmap 624 * 625 * \returns pointer to the cleared bit map 626 */ 627struct bitmap *numa_bitmask_clearall(struct bitmap *bmp); 628 629/** 630 * \brief clears the n-th bit of a bitmask 631 * 632 * \param bmp the bitmask 633 * \param n the bit to clear 634 * 635 * \returns pointer to the bitmask 636 */ 637struct bitmap *numa_bitmask_clearbit(struct bitmap *bmp, unsigned int n); 638 639/** 640 * \brief checks if two bitmasks are equal 641 * 642 * \param bmp1 bitmask 1 643 * \param bmp2 bitmask 2 644 * 645 * \return TRUE if the bitmasks are equal 646 * FALSE if the are distinct 647 */ 648bool numa_bitmask_equal(const struct bitmap *bmp1, const struct bitmap *bmp2); 649 650/** 651 * \brief frees the memory of a bitmask 652 * 653 * \param bmp the bitmask to be freed 654 */ 655void numa_bitmask_free(struct bitmap *bmp); 656 657/** 658 * \brief checks if the n-th bit is set in the bitmask 659 * 660 * \param bmp the bitmap 661 * \param n which bit to check 662 * 663 * \returns TRUE if the n-th bit is set 664 * FALSE otherwise 665 */ 666bool numa_bitmask_isbitset(const struct bitmap *bmp, unsigned int n); 667 668/** 669 * \brief returns the size (in bytes) of the bit mask 670 * 671 * \param bmp the bitmask 672 * 673 * \returns the size of the memory in bytes rounded up to a multiple of wordsize 674 */ 675size_t numa_bitmask_nbytes(struct bitmap *bmp); 676 677/** 678 * \brief sets all bits of a bitmask to 1 679 * 680 * \param bmp the bitmask 681 * 682 * \returns the bitmask 683 */ 684struct bitmap *numa_bitmask_setall(struct bitmap *bmp); 685 686/** 687 * \brief sets the n-th bit of a bitmask to 1 688 * 689 * \param bmp the bitmask 690 * \param n which bit to activate 691 * 692 * \returns the bitmask 693 */ 694struct bitmap *numa_bitmask_setbit(struct bitmap *bmp, unsigned int n); 695 696/** 697 * \brief copies the bitmask to a nodemask 698 * 699 * \param bmp the bitmask to copy 700 * \param nodemask the destination nodemask 701 * 702 * If the two areas differ in size, the copy is truncated to the size of the 703 * receiving field or zero-filled. 704 */ 705void copy_bitmask_to_nodemask(struct bitmap *bmp, nodemask_t *nodemask); 706 707/** 708 * \brief copies the contents of a nodemask into the bitmask 709 * 710 * \param nodemask node mask to copy from 711 * \param bmp bitmap to copy into 712 * 713 * If the two areas differ in size, the copy is truncated to the size of the 714 * receiving field or zero-filled. 715 */ 716void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmap *bmp); 717 718/** 719 * \brief copies one bitmask into another 720 * 721 * \param bmpfrom the source bitmask 722 * \param bmpto the destination bitmask 723 * 724 * If the two areas differ in size, the copy is truncated to the size of the 725 * receiving field or zero-filled. 726 */ 727void copy_bitmask_to_bitmask(struct bitmap *bmpfrom, struct bitmap *bmpto); 728 729/** 730 * \brief returns a count of the bits that are set in the body of the bitmask 731 * 732 * \param bmp the bitmask to count the set bits 733 * 734 * \return number of set bits in this bitmask 735 */ 736uint32_t numa_bitmask_weight(const struct bitmap *bmp); 737 738/** 739 * \brief moves a list of pages in the address space of the current domain 740 * 741 * \param did the domain ID 742 * \param count number of pages to move 743 * \param pages list of pages 744 * \param nodes list of nodes to which the pages can be moved 745 * \param status returns the outcome for each page 746 * \param flags flags for moving the pages 747 * 748 * \returns SYS_ERR_OK on SUCCESS 749 */ 750errval_t numa_move_pages(domainid_t did, 751 size_t count, 752 void **pages, 753 const nodeid_t *nodes, 754 errval_t *status, 755 int flags); 756/** 757 * \brief migrate a domain from one set of nodes to another 758 * 759 * \param did the domain ID 760 * \param fromnodes bitmap representing the current nodes 761 * \param tonodes bitmap representing the 762 * 763 * \returns SYS_ERR_OK on SUCCESS 764 */ 765errval_t numa_migrate_pages(domainid_t did, 766 struct bitmap *fromnodes, 767 struct bitmap *tonodes); 768 769/** 770 * is a libnuma internal function that can be overridden by the user program. This 771 * function is called with a char * argument when a libnuma function fails. 772 * Overriding the library internal definition makes it possible to specify a 773 * different error handling strategy when a libnuma function fails. It does not 774 * affect numa_available(). The numa_error() function defined in libnuma prints an 775 * error on stderr and terminates the program if numa_exit_on_error is set to a 776 * non-zero value. The default value of numa_exit_on_error is zero. 777 * 778 * \param where 779 */ 780void numa_error(char *where); 781 782extern int numa_exit_on_error; 783extern int numa_exit_on_warn; 784 785/** 786 * is a libnuma internal function that can be also overridden by the user program. 787 * It is called to warn the user when a libnuma function encounters a non-fatal 788 * error. The default implementation prints a warning to stderr. The first argument 789 * is a unique number identifying each warning. After that there is a printf(3)-style 790 * format string and a variable number of arguments. numa_warn exits the program 791 * when numa_exit_on_warn is set to a non-zero value. The default value of 792 * numa_exit_on_warn is zero. 793 * 794 * \param number 795 * \param where 796 */ 797void numa_warn(int number, char *where, ...); 798 799#ifdef __cplusplus 800} 801#endif 802 803#endif /* __NUMA_H */ 804