1/*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 */ 11 12#ifndef _SYS_SMP_H_ 13#define _SYS_SMP_H_ 14 15#ifdef _KERNEL 16 17#ifndef LOCORE 18 19#include <sys/cpuset.h> 20#include <sys/queue.h> 21 22/* 23 * Types of nodes in the topological tree. 24 */ 25typedef enum { 26 /* No node has this type; can be used in topo API calls. */ 27 TOPO_TYPE_DUMMY, 28 /* Processing unit aka computing unit aka logical CPU. */ 29 TOPO_TYPE_PU, 30 /* Physical subdivision of a package. */ 31 TOPO_TYPE_CORE, 32 /* CPU L1/L2/L3 cache. */ 33 TOPO_TYPE_CACHE, 34 /* Package aka chip, equivalent to socket. */ 35 TOPO_TYPE_PKG, 36 /* NUMA node. */ 37 TOPO_TYPE_NODE, 38 /* Other logical or physical grouping of PUs. */ 39 /* E.g. PUs on the same dye, or PUs sharing an FPU. */ 40 TOPO_TYPE_GROUP, 41 /* The whole system. */ 42 TOPO_TYPE_SYSTEM 43} topo_node_type; 44 45/* Hardware indenitifier of a topology component. */ 46typedef unsigned int hwid_t; 47/* Logical CPU idenitifier. */ 48typedef int cpuid_t; 49 50/* A node in the topology. */ 51struct topo_node { 52 struct topo_node *parent; 53 TAILQ_HEAD(topo_children, topo_node) children; 54 TAILQ_ENTRY(topo_node) siblings; 55 cpuset_t cpuset; 56 topo_node_type type; 57 uintptr_t subtype; 58 hwid_t hwid; 59 cpuid_t id; 60 int nchildren; 61 int cpu_count; 62}; 63 64/* 65 * Scheduling topology of a NUMA or SMP system. 66 * 67 * The top level topology is an array of pointers to groups. Each group 68 * contains a bitmask of cpus in its group or subgroups. It may also 69 * contain a pointer to an array of child groups. 70 * 71 * The bitmasks at non leaf groups may be used by consumers who support 72 * a smaller depth than the hardware provides. 73 * 74 * The topology may be omitted by systems where all CPUs are equal. 75 */ 76 77struct cpu_group { 78 struct cpu_group *cg_parent; /* Our parent group. */ 79 struct cpu_group *cg_child; /* Optional children groups. */ 80 cpuset_t cg_mask; /* Mask of cpus in this group. */ 81 int32_t cg_count; /* Count of cpus in this group. */ 82 int32_t cg_first; /* First cpu in this group. */ 83 int32_t cg_last; /* Last cpu in this group. */ 84 int16_t cg_children; /* Number of children groups. */ 85 int8_t cg_level; /* Shared cache level. */ 86 int8_t cg_flags; /* Traversal modifiers. */ 87}; 88 89typedef struct cpu_group *cpu_group_t; 90 91/* 92 * Defines common resources for CPUs in the group. The highest level 93 * resource should be used when multiple are shared. 94 */ 95#define CG_SHARE_NONE 0 96#define CG_SHARE_L1 1 97#define CG_SHARE_L2 2 98#define CG_SHARE_L3 3 99 100#define MAX_CACHE_LEVELS CG_SHARE_L3 101 102/* 103 * Behavior modifiers for load balancing and affinity. 104 */ 105#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */ 106#define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */ 107#define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */ 108#define CG_FLAG_NODE 0x04 /* NUMA node. */ 109 110/* 111 * Convenience routines for building and traversing topologies. 112 */ 113#ifdef SMP 114void topo_init_node(struct topo_node *node); 115void topo_init_root(struct topo_node *root); 116struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid, 117 topo_node_type type, uintptr_t subtype); 118struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid, 119 topo_node_type type, uintptr_t subtype); 120void topo_promote_child(struct topo_node *child); 121struct topo_node * topo_next_node(struct topo_node *top, 122 struct topo_node *node); 123struct topo_node * topo_next_nonchild_node(struct topo_node *top, 124 struct topo_node *node); 125void topo_set_pu_id(struct topo_node *node, cpuid_t id); 126 127enum topo_level { 128 TOPO_LEVEL_PKG = 0, 129 /* 130 * Some systems have useful sub-package core organizations. On these, 131 * a package has one or more subgroups. Each subgroup contains one or 132 * more cache groups (cores that share a last level cache). 133 */ 134 TOPO_LEVEL_GROUP, 135 TOPO_LEVEL_CACHEGROUP, 136 TOPO_LEVEL_CORE, 137 TOPO_LEVEL_THREAD, 138 TOPO_LEVEL_COUNT /* Must be last */ 139}; 140struct topo_analysis { 141 int entities[TOPO_LEVEL_COUNT]; 142}; 143int topo_analyze(struct topo_node *topo_root, int all, 144 struct topo_analysis *results); 145 146#define TOPO_FOREACH(i, root) \ 147 for (i = root; i != NULL; i = topo_next_node(root, i)) 148 149struct cpu_group *smp_topo(void); 150struct cpu_group *smp_topo_alloc(u_int count); 151struct cpu_group *smp_topo_none(void); 152struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags); 153struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share, 154 int l1count, int l1flags); 155struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); 156 157extern void (*cpustop_restartfunc)(void); 158/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */ 159extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */ 160extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */ 161extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */ 162extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */ 163extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */ 164extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */ 165extern cpuset_t logical_cpus_mask; 166#endif /* SMP */ 167 168extern u_int mp_maxid; 169extern int mp_maxcpus; 170extern int mp_ncores; 171extern int mp_ncpus; 172extern int smp_cpus; 173extern volatile int smp_started; 174extern int smp_threads_per_core; 175 176extern cpuset_t all_cpus; 177extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */ 178 179struct pcb; 180extern struct pcb *stoppcbs; 181 182/* 183 * Macro allowing us to determine whether a CPU is absent at any given 184 * time, thus permitting us to configure sparse maps of cpuid-dependent 185 * (per-CPU) structures. 186 */ 187#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) 188 189/* 190 * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an 191 * integer iterator and iterates over the available set of CPUs. 192 * CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT() 193 * returns the id of the next non-absent CPU. It will wrap back to 194 * CPU_FIRST() once the end of the list is reached. The iterators are 195 * currently implemented via inline functions. 196 */ 197#define CPU_FOREACH(i) \ 198 for ((i) = 0; (i) <= mp_maxid; (i)++) \ 199 if (!CPU_ABSENT((i))) 200 201static __inline int 202cpu_first(void) 203{ 204 int i; 205 206 for (i = 0;; i++) 207 if (!CPU_ABSENT(i)) 208 return (i); 209} 210 211static __inline int 212cpu_next(int i) 213{ 214 215 for (;;) { 216 i++; 217 if ((u_int)i > mp_maxid) 218 i = 0; 219 if (!CPU_ABSENT(i)) 220 return (i); 221 } 222} 223 224#define CPU_FIRST() cpu_first() 225#define CPU_NEXT(i) cpu_next((i)) 226 227#ifdef SMP 228/* 229 * Machine dependent functions used to initialize MP support. 230 * 231 * The cpu_mp_probe() should check to see if MP support is present and return 232 * zero if it is not or non-zero if it is. If MP support is present, then 233 * cpu_mp_start() will be called so that MP can be enabled. This function 234 * should do things such as startup secondary processors. It should also 235 * setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that 236 * smp_started is initialized at the appropriate time. 237 * Once cpu_mp_start() returns, machine independent MP startup code will be 238 * executed and a simple message will be output to the console. Finally, 239 * cpu_mp_announce() will be called so that machine dependent messages about 240 * the MP support may be output to the console if desired. 241 * 242 * The cpu_setmaxid() function is called very early during the boot process 243 * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs 244 * that other subsystems may use. If a platform is not able to determine 245 * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1. 246 */ 247struct thread; 248 249struct cpu_group *cpu_topo(void); 250void cpu_mp_announce(void); 251int cpu_mp_probe(void); 252void cpu_mp_setmaxid(void); 253void cpu_mp_start(void); 254 255void forward_signal(struct thread *); 256int restart_cpus(cpuset_t); 257int stop_cpus(cpuset_t); 258int stop_cpus_hard(cpuset_t); 259#if defined(__amd64__) || defined(__i386__) 260int suspend_cpus(cpuset_t); 261int resume_cpus(cpuset_t); 262#endif 263 264void smp_rendezvous_action(void); 265extern struct mtx smp_ipi_mtx; 266 267#endif /* SMP */ 268 269int quiesce_all_cpus(const char *, int); 270int quiesce_cpus(cpuset_t, const char *, int); 271void quiesce_all_critical(void); 272void cpus_fence_seq_cst(void); 273void smp_no_rendezvous_barrier(void *); 274void smp_rendezvous(void (*)(void *), 275 void (*)(void *), 276 void (*)(void *), 277 void *arg); 278void smp_rendezvous_cpus(cpuset_t, 279 void (*)(void *), 280 void (*)(void *), 281 void (*)(void *), 282 void *arg); 283 284struct smp_rendezvous_cpus_retry_arg { 285 cpuset_t cpus; 286}; 287void smp_rendezvous_cpus_retry(cpuset_t, 288 void (*)(void *), 289 void (*)(void *), 290 void (*)(void *), 291 void (*)(void *, int), 292 struct smp_rendezvous_cpus_retry_arg *); 293 294void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *); 295 296#endif /* !LOCORE */ 297#endif /* _KERNEL */ 298#endif /* _SYS_SMP_H_ */ 299