intel_utils.c revision 279470
1/*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 6 * under sponsorship from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/intel_utils.c 279470 2015-03-01 04:22:06Z rstone $"); 32 33#include <sys/param.h> 34#include <sys/bus.h> 35#include <sys/kernel.h> 36#include <sys/lock.h> 37#include <sys/malloc.h> 38#include <sys/memdesc.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/queue.h> 42#include <sys/rman.h> 43#include <sys/rwlock.h> 44#include <sys/sched.h> 45#include <sys/sf_buf.h> 46#include <sys/sysctl.h> 47#include <sys/systm.h> 48#include <sys/taskqueue.h> 49#include <sys/tree.h> 50#include <dev/pci/pcivar.h> 51#include <vm/vm.h> 52#include <vm/vm_extern.h> 53#include <vm/vm_kern.h> 54#include <vm/vm_object.h> 55#include <vm/vm_page.h> 56#include <vm/vm_map.h> 57#include <vm/vm_pageout.h> 58#include <machine/bus.h> 59#include <machine/cpu.h> 60#include <x86/include/busdma_impl.h> 61#include <x86/iommu/intel_reg.h> 62#include <x86/iommu/busdma_dmar.h> 63#include <x86/iommu/intel_dmar.h> 64 65u_int 66dmar_nd2mask(u_int nd) 67{ 68 static const u_int masks[] = { 69 0x000f, /* nd == 0 */ 70 0x002f, /* nd == 1 */ 71 0x00ff, /* nd == 2 */ 72 0x02ff, /* nd == 3 */ 73 0x0fff, /* nd == 4 */ 74 0x2fff, /* nd == 5 */ 75 0xffff, /* nd == 6 */ 76 0x0000, /* nd == 7 reserved */ 77 }; 78 79 KASSERT(nd <= 6, ("number of domains %d", nd)); 80 return (masks[nd]); 81} 82 83static const struct sagaw_bits_tag { 84 int agaw; 85 int cap; 86 int awlvl; 87 int pglvl; 88} sagaw_bits[] = { 89 {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL, 90 .pglvl = 2}, 91 {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL, 92 .pglvl = 3}, 93 {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL, 94 .pglvl = 4}, 95 {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL, 96 .pglvl = 5}, 97 {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL, 98 .pglvl = 6} 99}; 100#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0])) 101 102bool 103dmar_pglvl_supported(struct dmar_unit *unit, int pglvl) 104{ 105 int i; 106 107 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { 108 if (sagaw_bits[i].pglvl != pglvl) 109 continue; 110 if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) 111 return (true); 112 } 113 return (false); 114} 115 116int 117ctx_set_agaw(struct dmar_ctx *ctx, int mgaw) 118{ 119 int sagaw, i; 120 121 ctx->mgaw = mgaw; 122 sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap); 123 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { 124 if (sagaw_bits[i].agaw >= mgaw) { 125 ctx->agaw = sagaw_bits[i].agaw; 126 ctx->pglvl = sagaw_bits[i].pglvl; 127 ctx->awlvl = sagaw_bits[i].awlvl; 128 return (0); 129 } 130 } 131 device_printf(ctx->dmar->dev, 132 "context request mgaw %d for pci%d:%d:%d:%d, " 133 "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, 134 pci_get_bus(ctx->ctx_tag.owner), 135 pci_get_slot(ctx->ctx_tag.owner), 136 pci_get_function(ctx->ctx_tag.owner), sagaw); 137 return (EINVAL); 138} 139 140/* 141 * Find a best fit mgaw for the given maxaddr: 142 * - if allow_less is false, must find sagaw which maps all requested 143 * addresses (used by identity mappings); 144 * - if allow_less is true, and no supported sagaw can map all requested 145 * address space, accept the biggest sagaw, whatever is it. 146 */ 147int 148dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less) 149{ 150 int i; 151 152 for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { 153 if ((1ULL << sagaw_bits[i].agaw) >= maxaddr && 154 (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) 155 break; 156 } 157 if (allow_less && i == SIZEOF_SAGAW_BITS) { 158 do { 159 i--; 160 } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) 161 == 0); 162 } 163 if (i < SIZEOF_SAGAW_BITS) 164 return (sagaw_bits[i].agaw); 165 KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d", 166 (uintmax_t) maxaddr, allow_less)); 167 return (-1); 168} 169 170/* 171 * Calculate the total amount of page table pages needed to map the 172 * whole bus address space on the context with the selected agaw. 173 */ 174vm_pindex_t 175pglvl_max_pages(int pglvl) 176{ 177 vm_pindex_t res; 178 int i; 179 180 for (res = 0, i = pglvl; i > 0; i--) { 181 res *= DMAR_NPTEPG; 182 res++; 183 } 184 return (res); 185} 186 187/* 188 * Return true if the page table level lvl supports the superpage for 189 * the context ctx. 190 */ 191int 192ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl) 193{ 194 int alvl, cap_sps; 195 static const int sagaw_sp[] = { 196 DMAR_CAP_SPS_2M, 197 DMAR_CAP_SPS_1G, 198 DMAR_CAP_SPS_512G, 199 DMAR_CAP_SPS_1T 200 }; 201 202 alvl = ctx->pglvl - lvl - 1; 203 cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap); 204 return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) && 205 (sagaw_sp[alvl] & cap_sps) != 0); 206} 207 208dmar_gaddr_t 209pglvl_page_size(int total_pglvl, int lvl) 210{ 211 int rlvl; 212 static const dmar_gaddr_t pg_sz[] = { 213 (dmar_gaddr_t)DMAR_PAGE_SIZE, 214 (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT, 215 (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT), 216 (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT), 217 (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT), 218 (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT) 219 }; 220 221 KASSERT(lvl >= 0 && lvl < total_pglvl, 222 ("total %d lvl %d", total_pglvl, lvl)); 223 rlvl = total_pglvl - lvl - 1; 224 KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]), 225 ("sizeof pg_sz lvl %d", lvl)); 226 return (pg_sz[rlvl]); 227} 228 229dmar_gaddr_t 230ctx_page_size(struct dmar_ctx *ctx, int lvl) 231{ 232 233 return (pglvl_page_size(ctx->pglvl, lvl)); 234} 235 236int 237calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size, 238 dmar_gaddr_t *isizep) 239{ 240 dmar_gaddr_t isize; 241 int am; 242 243 for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) { 244 isize = 1ULL << (am + DMAR_PAGE_SHIFT); 245 if ((base & (isize - 1)) == 0 && size >= isize) 246 break; 247 if (am == 0) 248 break; 249 } 250 *isizep = isize; 251 return (am); 252} 253 254dmar_haddr_t dmar_high; 255int haw; 256int dmar_tbl_pagecnt; 257 258vm_page_t 259dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) 260{ 261 vm_page_t m; 262 int zeroed; 263 264 zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; 265 for (;;) { 266 if ((flags & DMAR_PGF_OBJL) == 0) 267 VM_OBJECT_WLOCK(obj); 268 m = vm_page_lookup(obj, idx); 269 if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) { 270 if ((flags & DMAR_PGF_OBJL) == 0) 271 VM_OBJECT_WUNLOCK(obj); 272 break; 273 } 274 m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY | 275 VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0, 276 dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); 277 if ((flags & DMAR_PGF_OBJL) == 0) 278 VM_OBJECT_WUNLOCK(obj); 279 if (m != NULL) { 280 if (zeroed && (m->flags & PG_ZERO) == 0) 281 pmap_zero_page(m); 282 atomic_add_int(&dmar_tbl_pagecnt, 1); 283 break; 284 } 285 if ((flags & DMAR_PGF_WAITOK) == 0) 286 break; 287 if ((flags & DMAR_PGF_OBJL) != 0) 288 VM_OBJECT_WUNLOCK(obj); 289 VM_WAIT; 290 if ((flags & DMAR_PGF_OBJL) != 0) 291 VM_OBJECT_WLOCK(obj); 292 } 293 return (m); 294} 295 296void 297dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) 298{ 299 vm_page_t m; 300 301 if ((flags & DMAR_PGF_OBJL) == 0) 302 VM_OBJECT_WLOCK(obj); 303 m = vm_page_lookup(obj, idx); 304 if (m != NULL) { 305 vm_page_free(m); 306 atomic_subtract_int(&dmar_tbl_pagecnt, 1); 307 } 308 if ((flags & DMAR_PGF_OBJL) == 0) 309 VM_OBJECT_WUNLOCK(obj); 310} 311 312void * 313dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, 314 struct sf_buf **sf) 315{ 316 vm_page_t m; 317 bool allocated; 318 319 if ((flags & DMAR_PGF_OBJL) == 0) 320 VM_OBJECT_WLOCK(obj); 321 m = vm_page_lookup(obj, idx); 322 if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) { 323 m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL); 324 allocated = true; 325 } else 326 allocated = false; 327 if (m == NULL) { 328 if ((flags & DMAR_PGF_OBJL) == 0) 329 VM_OBJECT_WUNLOCK(obj); 330 return (NULL); 331 } 332 /* Sleepable allocations cannot fail. */ 333 if ((flags & DMAR_PGF_WAITOK) != 0) 334 VM_OBJECT_WUNLOCK(obj); 335 sched_pin(); 336 *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK) 337 == 0 ? SFB_NOWAIT : 0)); 338 if (*sf == NULL) { 339 sched_unpin(); 340 if (allocated) { 341 VM_OBJECT_ASSERT_WLOCKED(obj); 342 dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL); 343 } 344 if ((flags & DMAR_PGF_OBJL) == 0) 345 VM_OBJECT_WUNLOCK(obj); 346 return (NULL); 347 } 348 if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 349 (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) 350 VM_OBJECT_WLOCK(obj); 351 else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0) 352 VM_OBJECT_WUNLOCK(obj); 353 return ((void *)sf_buf_kva(*sf)); 354} 355 356void 357dmar_unmap_pgtbl(struct sf_buf *sf) 358{ 359 360 sf_buf_free(sf); 361 sched_unpin(); 362} 363 364static void 365dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz) 366{ 367 368 if (DMAR_IS_COHERENT(unit)) 369 return; 370 /* 371 * If DMAR does not snoop paging structures accesses, flush 372 * CPU cache to memory. 373 */ 374 pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz, 375 TRUE); 376} 377 378void 379dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst) 380{ 381 382 dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); 383} 384 385void 386dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst) 387{ 388 389 dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); 390} 391 392void 393dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst) 394{ 395 396 dmar_flush_transl_to_ram(unit, dst, sizeof(*dst)); 397} 398 399/* 400 * Load the root entry pointer into the hardware, busily waiting for 401 * the completion. 402 */ 403int 404dmar_load_root_entry_ptr(struct dmar_unit *unit) 405{ 406 vm_page_t root_entry; 407 408 /* 409 * Access to the GCMD register must be serialized while the 410 * command is submitted. 411 */ 412 DMAR_ASSERT_LOCKED(unit); 413 414 VM_OBJECT_RLOCK(unit->ctx_obj); 415 root_entry = vm_page_lookup(unit->ctx_obj, 0); 416 VM_OBJECT_RUNLOCK(unit->ctx_obj); 417 dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry)); 418 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP); 419 /* XXXKIB should have a timeout */ 420 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0) 421 cpu_spinwait(); 422 return (0); 423} 424 425/* 426 * Globally invalidate the context entries cache, busily waiting for 427 * the completion. 428 */ 429int 430dmar_inv_ctx_glob(struct dmar_unit *unit) 431{ 432 433 /* 434 * Access to the CCMD register must be serialized while the 435 * command is submitted. 436 */ 437 DMAR_ASSERT_LOCKED(unit); 438 KASSERT(!unit->qi_enabled, ("QI enabled")); 439 440 /* 441 * The DMAR_CCMD_ICC bit in the upper dword should be written 442 * after the low dword write is completed. Amd64 443 * dmar_write8() does not have this issue, i386 dmar_write8() 444 * writes the upper dword last. 445 */ 446 dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB); 447 /* XXXKIB should have a timeout */ 448 while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0) 449 cpu_spinwait(); 450 return (0); 451} 452 453/* 454 * Globally invalidate the IOTLB, busily waiting for the completion. 455 */ 456int 457dmar_inv_iotlb_glob(struct dmar_unit *unit) 458{ 459 int reg; 460 461 DMAR_ASSERT_LOCKED(unit); 462 KASSERT(!unit->qi_enabled, ("QI enabled")); 463 464 reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap); 465 /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */ 466 dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | 467 DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW); 468 /* XXXKIB should have a timeout */ 469 while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) & 470 DMAR_IOTLB_IVT32) != 0) 471 cpu_spinwait(); 472 return (0); 473} 474 475/* 476 * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing" 477 * in the architecture specification. 478 */ 479int 480dmar_flush_write_bufs(struct dmar_unit *unit) 481{ 482 483 DMAR_ASSERT_LOCKED(unit); 484 485 /* 486 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported. 487 */ 488 KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0, 489 ("dmar%d: no RWBF", unit->unit)); 490 491 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF); 492 /* XXXKIB should have a timeout */ 493 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0) 494 cpu_spinwait(); 495 return (0); 496} 497 498int 499dmar_enable_translation(struct dmar_unit *unit) 500{ 501 502 DMAR_ASSERT_LOCKED(unit); 503 unit->hw_gcmd |= DMAR_GCMD_TE; 504 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 505 /* XXXKIB should have a timeout */ 506 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0) 507 cpu_spinwait(); 508 return (0); 509} 510 511int 512dmar_disable_translation(struct dmar_unit *unit) 513{ 514 515 DMAR_ASSERT_LOCKED(unit); 516 unit->hw_gcmd &= ~DMAR_GCMD_TE; 517 dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); 518 /* XXXKIB should have a timeout */ 519 while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0) 520 cpu_spinwait(); 521 return (0); 522} 523 524#define BARRIER_F \ 525 u_int f_done, f_inproc, f_wakeup; \ 526 \ 527 f_done = 1 << (barrier_id * 3); \ 528 f_inproc = 1 << (barrier_id * 3 + 1); \ 529 f_wakeup = 1 << (barrier_id * 3 + 2) 530 531bool 532dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id) 533{ 534 BARRIER_F; 535 536 DMAR_LOCK(dmar); 537 if ((dmar->barrier_flags & f_done) != 0) { 538 DMAR_UNLOCK(dmar); 539 return (false); 540 } 541 542 if ((dmar->barrier_flags & f_inproc) != 0) { 543 while ((dmar->barrier_flags & f_inproc) != 0) { 544 dmar->barrier_flags |= f_wakeup; 545 msleep(&dmar->barrier_flags, &dmar->lock, 0, 546 "dmarb", 0); 547 } 548 KASSERT((dmar->barrier_flags & f_done) != 0, 549 ("dmar%d barrier %d missing done", dmar->unit, barrier_id)); 550 DMAR_UNLOCK(dmar); 551 return (false); 552 } 553 554 dmar->barrier_flags |= f_inproc; 555 DMAR_UNLOCK(dmar); 556 return (true); 557} 558 559void 560dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id) 561{ 562 BARRIER_F; 563 564 DMAR_ASSERT_LOCKED(dmar); 565 KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc, 566 ("dmar%d barrier %d missed entry", dmar->unit, barrier_id)); 567 dmar->barrier_flags |= f_done; 568 if ((dmar->barrier_flags & f_wakeup) != 0) 569 wakeup(&dmar->barrier_flags); 570 dmar->barrier_flags &= ~(f_inproc | f_wakeup); 571 DMAR_UNLOCK(dmar); 572} 573 574int dmar_match_verbose; 575 576static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, 577 ""); 578SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN, 579 &dmar_tbl_pagecnt, 0, 580 "Count of pages used for DMAR pagetables"); 581SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN, 582 &dmar_match_verbose, 0, 583 "Verbose matching of the PCI devices to DMAR paths"); 584#ifdef INVARIANTS 585int dmar_check_free; 586SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN, 587 &dmar_check_free, 0, 588 "Check the GPA RBtree for free_down and free_after validity"); 589#endif 590 591