1/**
2 * \file
3 * \brief header specifying the interface of libnuma
4 *
5 * This is derived from:
6 *
7 * Linux man pages "numa"
8 * libnuma from http://oss.sgi.com/projects/libnuma/
9 *
10 */
11
12/*
13 * Copyright (c) 2014, ETH Zurich.
14 * All rights reserved.
15 *
16 * This file is distributed under the terms in the attached LICENSE file.
17 * If you do not find this file, copies can be found by writing to:
18 * ETH Zurich D-INFK, CAB F.78, Universitaetstrasse 6, CH-8092 Zurich.
19 * Attn: Systems Group.
20 */
21
22#ifndef __NUMA_H
23#define __NUMA_H 1
24
25#ifdef __cplusplus
26extern "C" {
27#endif
28
29///< the maximum number of nodes supported
30#define NUMA_MAX_NUMNODES 16
31
32#if NUMA_MAX_NUMNODES > MAX_NODEID
33#error maximum node bigger than maximum nodeid
34#endif
35
36///< specify the local node for allocation
37#define NUMA_NODE_LOCAL ((nodeid_t)-1)
38
39///< error value for the numa node size
40#define NUMA_NODE_INVALID ((uintptr_t)-1)
41
42///< error value for invalid cores
43#define NUMA_CORE_INVALID ((coreid_t)-1);
44
45
46typedef enum numa_policy {
47    NUMA_POLICY_DEFAULT,   ///< default numa policy
48    NUMA_POLICY_STRICT,    ///< strict numa policy
49    NUMA_POLICY_PREFERRED  ///< preferred memory policy
50} numa_policy_t;
51
52///< typedef for the nodemask
53typedef struct bitmask nodemask_t;
54
55/**
56 * \brief checks if numa support is available
57 *
58 * \returns NUMA_ERR_NOT_AVAILABLE  value all other functions are undefined
59 *          SYS_ERR_OK:             NUMA functionality is available
60 *
61 * this function must be called before any of the other functions of libnuma.
62 * during the call to numa_available the library also gets initialized
63 */
64errval_t numa_available(void);
65
66/**
67 * \brief returns the highest node number available on the current system.
68 *
69 * \returns ID of the max NUMA node
70 */
71nodeid_t numa_max_node(void);
72
73/**
74 * \brief returns the highest ID of the present cores
75 *
76 * \returns the maximum coreID in the system
77 */
78coreid_t numa_max_core(void);
79
80/**
81 * \brief returns the current node the domain is running on
82 *
83 * \return ID of the current node
84 */
85nodeid_t numa_current_node(void);
86
87/**
88 * \brief returns the size of the node mask
89 *
90 * \return size of the node mask
91 */
92nodeid_t numa_num_possible_nodes(void);
93
94/**
95 * \brief Obtains the maximum number of nodes the system can handle
96 *
97 * \return maximum nodes supported
98 *
99 * returns the number of the highest possible node in a system. In other words,
100 * the size of a kernel type nodemask_t (in bits) minus 1
101 */
102static inline nodeid_t numa_max_possible_node(void)
103{
104    return numa_num_possible_nodes() - 1;
105}
106
107/**
108 * \brief Obtains the number of all memory nodes in the system
109 *
110 * \return number of memory nodes in the system
111 *
112 * returns the number of memory nodes in the system. This count includes any nodes
113 * that are currently disabled.
114 */
115nodeid_t numa_num_configured_nodes(void);
116
117/**
118 * \brief obtains the nodes the domain is allowed to allocate memory from
119 *
120 * \returns bitmask representing the allowing nodes
121 *
122 * returns the mask of nodes from which the process is allowed to allocate memory
123 * in it's current cpuset context.
124 */
125struct bitmap *numa_get_mems_allowed(void);
126
127/**
128 * \brief returns the total numberof CPUs in the system
129 *
130 * \returns total number of CPUs in the system
131 *
132 * returns the number of cpus in the system. This count includes any cpus that are
133 * currently disabled.
134 */
135coreid_t numa_num_configured_cpus(void);
136
137/**
138 * \brief bitmask that is allocated by the library with bits representing all nodes
139 *        on which the calling task may allocate memory.
140 */
141extern struct bitmap *numa_all_nodes_ptr;
142
143/**
144 * \brief points to a bitmask that is allocated by the library and left all zeroes.
145 */
146extern struct bitmap *numa_no_nodes_ptr;
147
148/**
149 * \brief points to a bitmask that is allocated by the library with bits
150 *        representing all cpus on which the calling task may execute.
151 */
152extern struct bitmap *numa_all_cpus_ptr;
153
154/**
155 * \brief returns the number of cpus that the calling domain is allowed to use.
156 *
157 * \returns number of CPUs the domain is allowed to use
158 */
159coreid_t numa_num_task_cpus(void);
160
161/**
162 * \brief returns the number of nodes on which the calling domain is allowed to
163 *        allocate memory
164 *
165 * \returns number of nodes the domain is allowed to use
166 */
167nodeid_t numa_num_task_nodes(void);
168
169/**
170 * \brief parses line , which is a character string
171 *
172 * \param line  character string to parse
173 * \param mask  bitmap to store the result
174 *
175 * \returns SYS_ERR_OK            on SUCCESS
176 *          NUMA_ERR_BITMAP_PARSE on FAILURE
177 *
178 * The string contains the hexadecimal representation of a bit map.
179 *
180 * XXX according to the man pages this function is only used internally
181 */
182errval_t numa_parse_bitmap(char *line, struct bitmap *mask);
183
184/**
185 * \brief parses a character string list of nodes into a bit mask.
186 *
187 * \param string character string to parse
188 *
189 * \returns NUMA bitmask on SUCCESS
190 *          NULL if the string is invalid
191 *
192 * The string is a comma-separated list of node numbers or node ranges
193 * Examples: 1-5,7,10 !4-5 +0-3
194 *
195 * If the string length is zero, then the numa_no_nodes_ptr is returned
196 */
197struct bitmap *numa_parse_nodestring(char *string);
198
199/**
200 * \brief parses a character string list of cpus into a bit mask.
201 *
202 * \param string character string to parse
203 *
204 * \returns NUMA bitmask on SUCCESS
205 *          NULL if the string is invalid
206 *
207 * The string is a comma-separated list of cpu numbers or cpu ranges
208 * Examples: 1-5,7,10 !4-5 +0-3
209 */
210struct bitmap *numa_parse_cpustring(char *string);
211
212/**
213 * \brief obtains the size of a node
214 *
215 * \param node  ID of the NUMA node
216 * \param freep
217 *
218 * \returns size of the node in bytes
219 *
220 * returns the memory size of a node. If the argument freep is not NULL, it used
221 * to return the amount of free memory on the node. On error it returns
222 * NUMA_NODE_INVALID
223 */
224size_t numa_node_size(nodeid_t node, uintptr_t *freep);
225
226///< alias for NUMA node size 64bit variants
227#define numa_node_size64(_node, _freep) numa_node_size(_node, _freep)
228
229/**
230 * \brief obtains the base address of the numa node
231 *
232 * \returns physical address of the start of the numa node
233 */
234lpaddr_t numa_node_base(nodeid_t node);
235
236/**
237 * \brief returns the preferred node of the current task.
238 *
239 * \returns node ID where memory is preferably allocated
240 */
241nodeid_t numa_preferred(void);
242
243/**
244 * \brief  sets the preferred node for the current task to node
245 *
246 * \param node  ID of the node to set preferred
247 *
248 * The system will attempt to allocate memory from the preferred node, but will
249 * fall back to other nodes if no memory is available on the the preferred node
250 *
251 * Passing a node of -1 argument specifies local allocation
252 */
253void numa_set_preferred(nodeid_t node);
254
255/**
256 * \brief   returns the current interleave mask
257 *
258 * \returns bitmask representing the current interleave state
259 *
260 * returns the current interleave mask if the task's memory allocation policy is
261 * page interleaved. Otherwise, this function returns an empty mask.
262 */
263struct bitmap *numa_get_interleave_mask(void);
264
265/**
266 * \brief sets the memory interleave mask for the current task to nodemask
267 *
268 * \param nodemask bitmask representing the nodes
269 *
270 * All new memory allocations are page interleaved over all nodes in the interleave
271 * mask. Interleaving can be turned off again by passing an empty mask.
272 *
273 * This bitmask is considered to be a hint. Fallback to other nodes may be possible
274 */
275void numa_set_interleave_mask(struct bitmap *nodemask);
276
277/**
278 * \brief binds the current task and its children to the nodes specified in nodemask.
279 *
280 * \param nodemask  bitmap representing the nodes
281 */
282void numa_bind(struct bitmap *nodemask);
283
284/**
285 * \brief sets the memory allocation policy for the calling task to local allocation.
286 */
287void numa_set_localalloc(void);
288
289/**
290 * \brief sets the memory allocation mask.
291 *
292 * \param nodemask  bitmap representing the nodes
293 *
294 * The task will only allocate memory from the nodes set in nodemask.
295 *
296 * an empty mask or not allowed nodes in the mask will result in an error
297 */
298errval_t numa_set_membind(struct bitmap *nodemask);
299
300/**
301 * \brief returns the mask of nodes from which memory can currently be allocated.
302 *
303 * \return bitmap of nodes from which can be allocated
304 */
305struct bitmap *numa_get_membind(void);
306
307/**
308 * \brief allocates memory on a specific node.
309 *
310 * \param size      size of the region in bytes
311 * \param node      ID of the node to allocate from
312 * \param pagesize  page size to be used for the mapping
313 *
314 * \returns pointer to memory region
315 *
316 * The size argument will be rounded up to a multiple of the system page size.
317 * if the specified node is externally denied to this process, this call will fail.
318 * The memory must be freed with numa_free(). On errors NULL is returned.
319 */
320void *numa_alloc_onnode(size_t size, nodeid_t node, size_t pagesize);
321
322/**
323 * \brief allocates size bytes of memory on the local node
324 *
325 * \param size  size of the memory region in bytes
326 * \param pagesize  page size to be used for the mapping
327 *
328 * \returns pointer to memory region
329 *
330 * The memory must be freed with numa_free(). On errors NULL is returned.
331 */
332void *numa_alloc_local(size_t size, size_t pagesize);
333
334/**
335 * \brief allocates size bytes of memory page interleaved on all nodes.
336 *
337 * \param size      size of the memory region in bytes
338 * \param pagesize  page size to be used for the mapping
339 *
340 * \returns pointer to the mapped memory region
341 *
342 * should only be used for large areas consisting of multiple pages.
343 * The memory must be freed with numa_free(). On errors NULL is returned.
344 */
345void *numa_alloc_interleaved(size_t size, size_t pagesize);
346
347/**
348 * \brief allocates size bytes of memory page interleaved the nodes specified in
349 *        the nodemask.
350 *
351 * \param size     size of the memory region in bytes
352 * \param nodemask subset of nodes to consider for allocation
353 * \param pagesize  page size to be used for the mapping
354 *
355 * \returns pointer to the mapped memory region
356 *
357 * should only be used for large areas consisting of multiple pages.
358 * The memory must be freed with numa_free(). On errors NULL is returned.
359 */
360void *numa_alloc_interleaved_subset(size_t size, size_t pagesize,
361                                    struct bitmap *nodemask);
362
363/**
364 * \brief allocates size bytes of memory with the current NUMA policy.
365 *
366 * \param size      size of the memory region in bytes
367 * \param pagesize  page size to be used for the mapping
368 * \returns pointer to the mapped memory region
369 *
370 * The memory must be freed with numa_free(). On errors NULL is returned.
371 */
372void *numa_alloc(size_t size, size_t pagesize);
373
374/**
375 * \brief changes the size of the memory area.
376 *
377 * \param old_addr  pointer ot the old memory region
378 * \param old_size  size of the old memory region
379 * \param new_size  new size to allocate
380 */
381void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
382
383/**
384 * \brief frees size bytes of memory starting at start
385 *
386 * \param start start of the memory region
387 * \param size  number of bytes to free
388 *
389 * the memory must be previously allocated by one of the numa_alloc* functions
390 */
391void numa_free(void *start, size_t size);
392
393/**
394 * \brief allocates RAM on a specific node
395 *
396 * \param dest      capref to store the RAM cap
397 * \param size      size of the RAM region to allocated
398 * \param node      node on which the frame should be allocated
399 * \param ret_size  returned size of the frame capability
400 *
401 * \returns SYS_ERR_OK on SUCCESS
402 *          errval on FAILURE
403 */
404errval_t numa_ram_alloc_on_node(struct capref *dest, size_t size,
405                                nodeid_t node, size_t *ret_size);
406
407/**
408 * \brief allocates a frame on a specific node
409 *
410 * \param dest      capref to store the frame
411 * \param size      size of the frame to allocated
412 * \param node      node on which the frame should be allocated
413 * \param ret_size  returned size of the frame capability
414 *
415 * \returns SYS_ERR_OK on SUCCESS
416 *          errval on FAILURE
417 */
418errval_t numa_frame_alloc_on_node(struct capref *dest,
419                                  size_t size,
420                                  nodeid_t node,
421                                  size_t *ret_size);
422
423/**
424 * \brief allocates a frame on the local node
425 *
426 * \param dest      capref to store the frame
427 * \param size      size of the frame to allocated
428 * \param ret_size  returned size of the frame capability
429 *
430 * \returns SYS_ERR_OK on SUCCESS
431 *          errval on FAILURE
432 */
433static inline errval_t numa_frame_alloc_local(struct capref *dest,
434                                              size_t size,
435                                              size_t *ret_size)
436{
437    return numa_frame_alloc_on_node(dest, size, numa_current_node(), ret_size);
438}
439
440/**
441 * \brief frees a previously allocated frame
442 *
443 * \param frame capability to free
444 */
445errval_t numa_frame_free(struct capref frame);
446
447/**
448 * \brief runs the current domain on a specific node.
449 *
450 * \param node  ID of the node to run the domain on
451 *
452 * \returns SYS_ERR_OK on SUCCESS
453 *          errval on FAILURE
454 *
455 * Passing -1 permits the kernel to schedule on all nodes again
456 */
457errval_t numa_run_on_node(nodeid_t node);
458
459/**
460 * \brief runs the current domain only on nodes specified in nodemask.
461 *
462 * \param nodemask bitmap representing the nodes to run the domain on
463 *
464 * \returns SYS_ERR_OK on SUCCESS
465 *          errval on FAILURE
466 */
467errval_t numa_run_on_node_mask(struct bitmap *nodemask);
468
469/**
470 * \brief returns a mask of CPUs on which the current task is allowed to run.
471 *
472 * \returns bitmap represening the coreids the domain is allowed to run
473 */
474struct bitmap *numa_get_run_node_mask(void);
475
476/**
477 * \brief specify the memory bind policy
478 *
479 * \param strict numa policy to apply
480 *
481 * specifies whether calls that bind memory to a specific node should use the preferred policy or a strict policy.
482 */
483void numa_set_bind_policy(numa_policy_t strict);
484
485/**
486 * \brief enable or disable the strict allocation policy
487 *
488 * \param strict numa policy to apply
489 *
490 * s a flag that says whether the functions allocating on specific nodes should
491 * use a strict policy. Strict means the allocation will fail if the memory cannot
492 * be allocated on the target node.
493 */
494void numa_set_strict(numa_policy_t strict);
495
496/**
497 * \brief reports the distance in the machine topology between two nodes
498 *
499 * \param from source node to measure the distance
500 * \param to   target node to measure the distance
501 *
502 * \returns distance between two nodes
503 *          0 iff cannot be deterimed
504 *
505 * The factors are a multiple of 10.  A node has distance 10 to itself.
506 */
507uint32_t numa_distance(nodeid_t from, nodeid_t to);
508
509/**
510 * \brief retrieves a bitmask of the cpus on which a domain may run
511 *
512 * \param did   domain ID
513 * \param mask  returned bitmask
514 *
515 * \returns SYS_ERR_OK on success
516 *          errval on FAILURE
517 */
518errval_t numa_sched_getaffinity(domainid_t did, struct bitmap *mask);
519
520/**
521 * \brief sets a domain's allowed cpu's to those cpu's specified in mask.
522 *
523 * \param did   domain ID
524 * \param mask  bitmap representing the CPUs
525 *
526 * \returns SYS_ERR_OK on success
527 *          errval on FAILURE
528 */
529errval_t numa_sched_setaffinity(domainid_t did, struct bitmap *mask);
530
531/**
532 * \brief returns the page size
533 *
534 * \returns the number of bytes in a page
535 */
536size_t numa_pagesize(void);
537
538/**
539 * \brief converts a node number to a bitmask of CPUs
540 *
541 * \param node  the ID of the node
542 * \param mask  bitmap representing the CPUs of this node
543 *
544 * \return  SYS_ERR_OK on SUCCESS
545 *          NUMA_ERR_BITMAP_RANGE on FAILURE (too small bitmap)
546 *
547 * The user must pass a bitmask structure with a mask buffer long enough to
548 * represent all possible cpu's
549 */
550errval_t numa_node_to_cpus(nodeid_t node, struct bitmap *mask);
551
552
553/**
554 * \brief gets the number of cores for the given numa node
555 *
556 * \param node NUMA node to get the number of cores
557 *
558 * \returns number of cores for the node
559 */
560coreid_t numa_num_node_cpus(nodeid_t node);
561
562/**
563 * \brief gets the system's core ID for a node/local core id configuration
564 *
565 * \param
566 */
567coreid_t numa_node_get_core(nodeid_t node, coreid_t local_core_id);
568
569
570/**
571 * \brief returns the node that a cpu belongs to
572 *
573 * \param cpu   ID of the core
574 *
575 * \returns node ID on SUCCESS
576 *          NUMA_NODE_INVALID on FAILURE
577 */
578nodeid_t numa_node_of_cpu(coreid_t cpu);
579
580/**
581 * \brief allocates a bit mask to represent the cores in the system
582 *
583 * \returns pointer to a new bitmask
584 *          NULL on failure
585 */
586struct bitmap *numa_allocate_cpumask(void);
587
588/**
589 * \brief frees a previously allocated CPU bitmask
590 *
591 * \param cpumask pointer to a previously allocated CPU bitmask
592 */
593void numa_free_cpumask(struct bitmap *cpumask);
594
595/**
596 * \brief allocates a bit mask to represent the nodes in the system
597 *
598 * \returns pointer to a new bitmask
599 *          NULL on failure
600 */
601struct bitmap *numa_allocate_nodemask(void);
602
603/**
604 * \brief frees a previously allocated node bitmask
605 *
606 * \param nodemask pointer to a previously allocated node bitmask
607 */
608void numa_free_nodemask(struct bitmap *nodemask);
609
610/**
611 * \brief allocates a bitmask structure and its associated bit mask
612 *
613 * \param n the number of bits
614 *
615 * \returns pointer to the bitmask
616 *          NULL on error
617 */
618struct bitmap *numa_bitmask_alloc(unsigned int n);
619
620/**
621 * \brief sets all bits in the bit mask to 0.
622 *
623 * \param bmp   pointer to the bitmap
624 *
625 * \returns pointer to the cleared bit map
626 */
627struct bitmap *numa_bitmask_clearall(struct bitmap *bmp);
628
629/**
630 * \brief clears the n-th bit of a bitmask
631 *
632 * \param bmp   the bitmask
633 * \param n     the bit to clear
634 *
635 * \returns pointer to the bitmask
636 */
637struct bitmap *numa_bitmask_clearbit(struct bitmap *bmp, unsigned int n);
638
639/**
640 * \brief checks if two bitmasks are equal
641 *
642 * \param bmp1  bitmask 1
643 * \param bmp2  bitmask 2
644 *
645 * \return TRUE if the bitmasks are equal
646 *         FALSE if the are distinct
647 */
648bool numa_bitmask_equal(const struct bitmap *bmp1, const struct bitmap *bmp2);
649
650/**
651 * \brief frees the memory of a bitmask
652 *
653 * \param bmp the bitmask to be freed
654 */
655void numa_bitmask_free(struct bitmap *bmp);
656
657/**
658 * \brief checks if the n-th bit is set in the bitmask
659 *
660 * \param bmp   the bitmap
661 * \param n     which bit to check
662 *
663 * \returns TRUE if the n-th bit is set
664 *          FALSE otherwise
665 */
666bool numa_bitmask_isbitset(const struct bitmap *bmp, unsigned int n);
667
668/**
669 * \brief returns the size (in bytes) of the bit mask
670 *
671 * \param bmp   the bitmask
672 *
673 * \returns the size of the memory in bytes rounded up to a multiple of wordsize
674 */
675size_t numa_bitmask_nbytes(struct bitmap *bmp);
676
677/**
678 * \brief sets all bits of a bitmask to 1
679 *
680 * \param bmp the bitmask
681 *
682 * \returns the bitmask
683 */
684struct bitmap *numa_bitmask_setall(struct bitmap *bmp);
685
686/**
687 * \brief sets the n-th bit of a bitmask to 1
688 *
689 * \param bmp   the bitmask
690 * \param n     which bit to activate
691 *
692 * \returns the bitmask
693 */
694struct bitmap *numa_bitmask_setbit(struct bitmap *bmp, unsigned int n);
695
696/**
697 * \brief copies the bitmask to a nodemask
698 *
699 * \param bmp       the bitmask to copy
700 * \param nodemask  the destination nodemask
701 *
702 * If the two areas differ in size, the copy is truncated to the size of the
703 * receiving field or zero-filled.
704 */
705void copy_bitmask_to_nodemask(struct bitmap *bmp, nodemask_t *nodemask);
706
707/**
708 * \brief copies the contents of a nodemask into the bitmask
709 *
710 * \param nodemask  node mask to copy from
711 * \param bmp       bitmap to copy into
712 *
713 * If the two areas differ in size, the copy is truncated to the size of the
714 * receiving field or zero-filled.
715 */
716void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmap *bmp);
717
718/**
719 * \brief copies one bitmask into another
720 *
721 * \param bmpfrom   the source bitmask
722 * \param bmpto     the destination bitmask
723 *
724 * If the two areas differ in size, the copy is truncated to the size of the
725 * receiving field or zero-filled.
726 */
727void copy_bitmask_to_bitmask(struct bitmap *bmpfrom, struct bitmap *bmpto);
728
729/**
730 * \brief returns a count of the bits that are set in the body of the bitmask
731 *
732 * \param bmp   the bitmask to count the set bits
733 *
734 * \return number of set bits in this bitmask
735 */
736uint32_t numa_bitmask_weight(const struct bitmap *bmp);
737
738/**
739 * \brief  moves a list of pages in the address space of the current domain
740 *
741 * \param did    the domain ID
742 * \param count  number of pages to move
743 * \param pages  list of pages
744 * \param nodes  list of nodes to which the pages can be moved
745 * \param status returns the outcome for each page
746 * \param flags  flags for moving the pages
747 *
748 * \returns SYS_ERR_OK on SUCCESS
749 */
750errval_t numa_move_pages(domainid_t did,
751                         size_t count,
752                         void **pages,
753                         const nodeid_t *nodes,
754                         errval_t *status,
755                         int flags);
756/**
757 * \brief migrate a domain from one set of nodes to another
758 *
759 * \param did        the domain ID
760 * \param fromnodes  bitmap representing the current nodes
761 * \param tonodes    bitmap representing the
762 *
763 * \returns SYS_ERR_OK on SUCCESS
764 */
765errval_t numa_migrate_pages(domainid_t did,
766                            struct bitmap *fromnodes,
767                            struct bitmap *tonodes);
768
769/**
770 * is a libnuma internal function that can be overridden by the user program. This
771 * function is called with a char * argument when a libnuma function fails.
772 * Overriding the library internal definition makes it possible to specify a
773 * different error handling strategy when a libnuma function fails. It does not
774 * affect numa_available(). The numa_error() function defined in libnuma prints an
775 * error on stderr and terminates the program if numa_exit_on_error is set to a
776 * non-zero value. The default value of numa_exit_on_error is zero.
777 *
778 * \param where
779 */
780void numa_error(char *where);
781
782extern int numa_exit_on_error;
783extern int numa_exit_on_warn;
784
785/**
786 * is a libnuma internal function that can be also overridden by the user program.
787 * It is called to warn the user when a libnuma function encounters a non-fatal
788 * error. The default implementation prints a warning to stderr. The first argument
789 * is a unique number identifying each warning. After that there is a printf(3)-style
790 * format string and a variable number of arguments. numa_warn exits the program
791 * when numa_exit_on_warn is set to a non-zero value. The default value of
792 * numa_exit_on_warn is zero.
793 *
794 * \param number
795 * \param where
796 */
797void numa_warn(int number, char *where, ...);
798
799#ifdef __cplusplus
800}
801#endif
802
803#endif /* __NUMA_H */
804