1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30#include <sys/cdefs.h> 31#include "opt_vm.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/bitset.h> 36#include <sys/domainset.h> 37#include <sys/proc.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/malloc.h> 41#include <sys/rwlock.h> 42#include <sys/vmmeter.h> 43 44#include <vm/vm.h> 45#include <vm/vm_param.h> 46#include <vm/vm_domainset.h> 47#include <vm/vm_object.h> 48#include <vm/vm_page.h> 49#include <vm/vm_phys.h> 50 51#ifdef NUMA 52/* 53 * Iterators are written such that the first nowait pass has as short a 54 * codepath as possible to eliminate bloat from the allocator. It is 55 * assumed that most allocations are successful. 56 */ 57 58static int vm_domainset_default_stride = 64; 59 60/* 61 * Determine which policy is to be used for this allocation. 62 */ 63static void 64vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds, 65 int *iter, struct vm_object *obj, vm_pindex_t pindex) 66{ 67 68 di->di_domain = ds; 69 di->di_iter = iter; 70 di->di_policy = ds->ds_policy; 71 DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); 72 if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { 73#if VM_NRESERVLEVEL > 0 74 if (vm_object_reserv(obj)) { 75 /* 76 * Color the pindex so we end up on the correct 77 * reservation boundary. 78 */ 79 pindex += obj->pg_color; 80 pindex >>= VM_LEVEL_0_ORDER; 81 } else 82#endif 83 pindex /= vm_domainset_default_stride; 84 /* 85 * Offset pindex so the first page of each object does 86 * not end up in domain 0. 87 */ 88 if (obj != NULL) 89 pindex += (((uintptr_t)obj) / sizeof(*obj)); 90 di->di_offset = pindex; 91 } 92 /* Skip domains below min on the first pass. */ 93 di->di_minskip = true; 94} 95 96static void 97vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain) 98{ 99 100 *domain = di->di_domain->ds_order[ 101 ++(*di->di_iter) % di->di_domain->ds_cnt]; 102} 103 104static void 105vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain) 106{ 107 int d; 108 109 do { 110 d = di->di_domain->ds_order[ 111 ++(*di->di_iter) % di->di_domain->ds_cnt]; 112 } while (d == di->di_domain->ds_prefer); 113 *domain = d; 114} 115 116static void 117vm_domainset_iter_interleave(struct vm_domainset_iter *di, int *domain) 118{ 119 int d; 120 121 d = di->di_offset % di->di_domain->ds_cnt; 122 *di->di_iter = d; 123 *domain = di->di_domain->ds_order[d]; 124} 125 126static void 127vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) 128{ 129 130 KASSERT(di->di_n > 0, 131 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 132 switch (di->di_policy) { 133 case DOMAINSET_POLICY_FIRSTTOUCH: 134 /* 135 * To prevent impossible allocations we convert an invalid 136 * first-touch to round-robin. 137 */ 138 /* FALLTHROUGH */ 139 case DOMAINSET_POLICY_INTERLEAVE: 140 /* FALLTHROUGH */ 141 case DOMAINSET_POLICY_ROUNDROBIN: 142 vm_domainset_iter_rr(di, domain); 143 break; 144 case DOMAINSET_POLICY_PREFER: 145 vm_domainset_iter_prefer(di, domain); 146 break; 147 default: 148 panic("vm_domainset_iter_first: Unknown policy %d", 149 di->di_policy); 150 } 151 KASSERT(*domain < vm_ndomains, 152 ("vm_domainset_iter_next: Invalid domain %d", *domain)); 153} 154 155static void 156vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) 157{ 158 159 switch (di->di_policy) { 160 case DOMAINSET_POLICY_FIRSTTOUCH: 161 *domain = PCPU_GET(domain); 162 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) { 163 /* 164 * Add an extra iteration because we will visit the 165 * current domain a second time in the rr iterator. 166 */ 167 di->di_n = di->di_domain->ds_cnt + 1; 168 break; 169 } 170 /* 171 * To prevent impossible allocations we convert an invalid 172 * first-touch to round-robin. 173 */ 174 /* FALLTHROUGH */ 175 case DOMAINSET_POLICY_ROUNDROBIN: 176 di->di_n = di->di_domain->ds_cnt; 177 vm_domainset_iter_rr(di, domain); 178 break; 179 case DOMAINSET_POLICY_PREFER: 180 *domain = di->di_domain->ds_prefer; 181 di->di_n = di->di_domain->ds_cnt; 182 break; 183 case DOMAINSET_POLICY_INTERLEAVE: 184 vm_domainset_iter_interleave(di, domain); 185 di->di_n = di->di_domain->ds_cnt; 186 break; 187 default: 188 panic("vm_domainset_iter_first: Unknown policy %d", 189 di->di_policy); 190 } 191 KASSERT(di->di_n > 0, 192 ("vm_domainset_iter_first: Invalid n %d", di->di_n)); 193 KASSERT(*domain < vm_ndomains, 194 ("vm_domainset_iter_first: Invalid domain %d", *domain)); 195} 196 197void 198vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 199 vm_pindex_t pindex, int *domain, int *req) 200{ 201 struct domainset_ref *dr; 202 203 /* 204 * Object policy takes precedence over thread policy. The policies 205 * are immutable and unsynchronized. Updates can race but pointer 206 * loads are assumed to be atomic. 207 */ 208 if (obj != NULL && obj->domain.dr_policy != NULL) 209 dr = &obj->domain; 210 else 211 dr = &curthread->td_domain; 212 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); 213 di->di_flags = *req; 214 *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | 215 VM_ALLOC_NOWAIT; 216 vm_domainset_iter_first(di, domain); 217 if (vm_page_count_min_domain(*domain)) 218 vm_domainset_iter_page(di, obj, domain); 219} 220 221int 222vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 223 int *domain) 224{ 225 if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) 226 return (ENOMEM); 227 228 /* If there are more domains to visit we run the iterator. */ 229 while (--di->di_n != 0) { 230 vm_domainset_iter_next(di, domain); 231 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 232 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 233 return (0); 234 } 235 236 /* If we skipped domains below min restart the search. */ 237 if (di->di_minskip) { 238 di->di_minskip = false; 239 vm_domainset_iter_first(di, domain); 240 return (0); 241 } 242 243 /* If we visited all domains and this was a NOWAIT we return error. */ 244 if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) 245 return (ENOMEM); 246 247 /* Wait for one of the domains to accumulate some free pages. */ 248 if (obj != NULL) 249 VM_OBJECT_WUNLOCK(obj); 250 vm_wait_doms(&di->di_valid_mask, 0); 251 if (obj != NULL) 252 VM_OBJECT_WLOCK(obj); 253 if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) 254 return (ENOMEM); 255 256 /* Restart the search. */ 257 vm_domainset_iter_first(di, domain); 258 259 return (0); 260} 261 262static void 263_vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, 264 int *flags) 265{ 266 267 di->di_flags = *flags; 268 *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; 269 vm_domainset_iter_first(di, domain); 270 if (vm_page_count_min_domain(*domain)) 271 vm_domainset_iter_policy(di, domain); 272} 273 274void 275vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 276 struct domainset *ds, int *domain, int *flags) 277{ 278 279 vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); 280 _vm_domainset_iter_policy_init(di, domain, flags); 281} 282 283void 284vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 285 struct domainset_ref *dr, int *domain, int *flags) 286{ 287 288 vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); 289 _vm_domainset_iter_policy_init(di, domain, flags); 290} 291 292int 293vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 294{ 295 if (DOMAINSET_EMPTY(&di->di_valid_mask)) 296 return (ENOMEM); 297 298 /* If there are more domains to visit we run the iterator. */ 299 while (--di->di_n != 0) { 300 vm_domainset_iter_next(di, domain); 301 if (DOMAINSET_ISSET(*domain, &di->di_valid_mask) && 302 (!di->di_minskip || !vm_page_count_min_domain(*domain))) 303 return (0); 304 } 305 306 /* If we skipped domains below min restart the search. */ 307 if (di->di_minskip) { 308 di->di_minskip = false; 309 vm_domainset_iter_first(di, domain); 310 return (0); 311 } 312 313 /* If we visited all domains and this was a NOWAIT we return error. */ 314 if ((di->di_flags & M_WAITOK) == 0) 315 return (ENOMEM); 316 317 /* Wait for one of the domains to accumulate some free pages. */ 318 vm_wait_doms(&di->di_valid_mask, 0); 319 320 /* Restart the search. */ 321 vm_domainset_iter_first(di, domain); 322 323 return (0); 324} 325 326void 327vm_domainset_iter_ignore(struct vm_domainset_iter *di, int domain) 328{ 329 KASSERT(DOMAINSET_ISSET(domain, &di->di_valid_mask), 330 ("%s: domain %d not present in di_valid_mask for di %p", 331 __func__, domain, di)); 332 DOMAINSET_CLR(domain, &di->di_valid_mask); 333} 334 335#else /* !NUMA */ 336 337int 338vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, 339 int *domain) 340{ 341 342 return (EJUSTRETURN); 343} 344 345void 346vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, 347 vm_pindex_t pindex, int *domain, int *flags) 348{ 349 350 *domain = 0; 351} 352 353int 354vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) 355{ 356 357 return (EJUSTRETURN); 358} 359 360void 361vm_domainset_iter_policy_init(struct vm_domainset_iter *di, 362 struct domainset *ds, int *domain, int *flags) 363{ 364 365 *domain = 0; 366} 367 368void 369vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, 370 struct domainset_ref *dr, int *domain, int *flags) 371{ 372 373 *domain = 0; 374} 375 376void 377vm_domainset_iter_ignore(struct vm_domainset_iter *di __unused, 378 int domain __unused) 379{ 380} 381 382#endif /* NUMA */ 383