1/* $NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $ */ 2 3/* 4 * Copyright 2001 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38#include <sys/cdefs.h> 39__FBSDID("$FreeBSD$"); 40 41#include <sys/types.h> 42#include <sys/systm.h> 43#include <sys/param.h> 44 45#include <machine/cache.h> 46#include <machine/cache_r4k.h> 47#include <machine/cpuinfo.h> 48 49#define round_line16(x) (((x) + 15) & ~15) 50#define trunc_line16(x) ((x) & ~15) 51 52#define round_line32(x) (((x) + 31) & ~31) 53#define trunc_line32(x) ((x) & ~31) 54 55#if defined(CPU_NLM) 56static __inline void 57xlp_sync(void) 58{ 59 __asm __volatile ( 60 ".set push \n" 61 ".set noreorder \n" 62 ".set mips64 \n" 63 "dla $8, 1f \n" 64 "/* jr.hb $8 */ \n" 65 ".word 0x1000408 \n" 66 "nop \n" 67 "1: nop \n" 68 ".set pop \n" 69 : : : "$8"); 70} 71#endif 72 73#if defined(SB1250_PASS1) 74#define SYNC __asm volatile("sync; sync") 75#elif defined(CPU_NLM) 76#define SYNC xlp_sync() 77#else 78#define SYNC __asm volatile("sync") 79#endif 80 81#if defined(CPU_CNMIPS) 82#define SYNCI mips_sync_icache(); 83#elif defined(CPU_NLM) 84#define SYNCI xlp_sync() 85#else 86#define SYNCI 87#endif 88 89/* 90 * Exported variables for consumers like bus_dma code 91 */ 92int mips_picache_linesize; 93int mips_pdcache_linesize; 94 95static int picache_size; 96static int picache_stride; 97static int picache_loopcount; 98static int picache_way_mask; 99static int pdcache_size; 100static int pdcache_stride; 101static int pdcache_loopcount; 102static int pdcache_way_mask; 103 104void 105mipsNN_cache_init(struct mips_cpuinfo * cpuinfo) 106{ 107 int flush_multiple_lines_per_way; 108 109 flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE; 110 if (cpuinfo->icache_virtual) { 111 /* 112 * With a virtual Icache we don't need to flush 113 * multiples of the page size with index ops; we just 114 * need to flush one pages' worth. 115 */ 116 flush_multiple_lines_per_way = 0; 117 } 118 119 if (flush_multiple_lines_per_way) { 120 picache_stride = PAGE_SIZE; 121 picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) * 122 cpuinfo->l1.ic_nways; 123 } else { 124 picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize; 125 picache_loopcount = cpuinfo->l1.ic_nways; 126 } 127 128 if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) { 129 pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize; 130 pdcache_loopcount = cpuinfo->l1.dc_nways; 131 } else { 132 pdcache_stride = PAGE_SIZE; 133 pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) * 134 cpuinfo->l1.dc_nways; 135 } 136 137 mips_picache_linesize = cpuinfo->l1.ic_linesize; 138 mips_pdcache_linesize = cpuinfo->l1.dc_linesize; 139 140 picache_size = cpuinfo->l1.ic_size; 141 picache_way_mask = cpuinfo->l1.ic_nways - 1; 142 pdcache_size = cpuinfo->l1.dc_size; 143 pdcache_way_mask = cpuinfo->l1.dc_nways - 1; 144 145#define CACHE_DEBUG 146#ifdef CACHE_DEBUG 147 printf("Cache info:\n"); 148 if (cpuinfo->icache_virtual) 149 printf(" icache is virtual\n"); 150 printf(" picache_stride = %d\n", picache_stride); 151 printf(" picache_loopcount = %d\n", picache_loopcount); 152 printf(" pdcache_stride = %d\n", pdcache_stride); 153 printf(" pdcache_loopcount = %d\n", pdcache_loopcount); 154#endif 155} 156 157void 158mipsNN_icache_sync_all_16(void) 159{ 160 vm_offset_t va, eva; 161 162 va = MIPS_PHYS_TO_KSEG0(0); 163 eva = va + picache_size; 164 165 /* 166 * Since we're hitting the whole thing, we don't have to 167 * worry about the N different "ways". 168 */ 169 170 mips_intern_dcache_wbinv_all(); 171 172 while (va < eva) { 173 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 174 va += (32 * 16); 175 } 176 177 SYNC; 178} 179 180void 181mipsNN_icache_sync_all_32(void) 182{ 183 vm_offset_t va, eva; 184 185 va = MIPS_PHYS_TO_KSEG0(0); 186 eva = va + picache_size; 187 188 /* 189 * Since we're hitting the whole thing, we don't have to 190 * worry about the N different "ways". 191 */ 192 193 mips_intern_dcache_wbinv_all(); 194 195 while (va < eva) { 196 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 197 va += (32 * 32); 198 } 199 200 SYNC; 201} 202 203void 204mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size) 205{ 206 vm_offset_t eva; 207 208 eva = round_line16(va + size); 209 va = trunc_line16(va); 210 211 mips_intern_dcache_wb_range(va, (eva - va)); 212 213 while ((eva - va) >= (32 * 16)) { 214 cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 215 va += (32 * 16); 216 } 217 218 while (va < eva) { 219 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 220 va += 16; 221 } 222 223 SYNC; 224} 225 226void 227mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size) 228{ 229 vm_offset_t eva; 230 231 eva = round_line32(va + size); 232 va = trunc_line32(va); 233 234 mips_intern_dcache_wb_range(va, (eva - va)); 235 236 while ((eva - va) >= (32 * 32)) { 237 cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 238 va += (32 * 32); 239 } 240 241 while (va < eva) { 242 cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV); 243 va += 32; 244 } 245 246 SYNC; 247} 248 249void 250mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size) 251{ 252 vm_offset_t eva, tmpva; 253 int i, stride, loopcount; 254 255 /* 256 * Since we're doing Index ops, we expect to not be able 257 * to access the address we've been given. So, get the 258 * bits that determine the cache index, and make a KSEG0 259 * address out of them. 260 */ 261 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 262 263 eva = round_line16(va + size); 264 va = trunc_line16(va); 265 266 /* 267 * GCC generates better code in the loops if we reference local 268 * copies of these global variables. 269 */ 270 stride = picache_stride; 271 loopcount = picache_loopcount; 272 273 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 274 275 while ((eva - va) >= (8 * 16)) { 276 tmpva = va; 277 for (i = 0; i < loopcount; i++, tmpva += stride) 278 cache_r4k_op_8lines_16(tmpva, 279 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 280 va += 8 * 16; 281 } 282 283 while (va < eva) { 284 tmpva = va; 285 for (i = 0; i < loopcount; i++, tmpva += stride) 286 cache_op_r4k_line(tmpva, 287 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 288 va += 16; 289 } 290} 291 292void 293mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size) 294{ 295 vm_offset_t eva, tmpva; 296 int i, stride, loopcount; 297 298 /* 299 * Since we're doing Index ops, we expect to not be able 300 * to access the address we've been given. So, get the 301 * bits that determine the cache index, and make a KSEG0 302 * address out of them. 303 */ 304 va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask); 305 306 eva = round_line32(va + size); 307 va = trunc_line32(va); 308 309 /* 310 * GCC generates better code in the loops if we reference local 311 * copies of these global variables. 312 */ 313 stride = picache_stride; 314 loopcount = picache_loopcount; 315 316 mips_intern_dcache_wbinv_range_index(va, (eva - va)); 317 318 while ((eva - va) >= (8 * 32)) { 319 tmpva = va; 320 for (i = 0; i < loopcount; i++, tmpva += stride) 321 cache_r4k_op_8lines_32(tmpva, 322 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 323 va += 8 * 32; 324 } 325 326 while (va < eva) { 327 tmpva = va; 328 for (i = 0; i < loopcount; i++, tmpva += stride) 329 cache_op_r4k_line(tmpva, 330 CACHE_R4K_I|CACHEOP_R4K_INDEX_INV); 331 va += 32; 332 } 333} 334 335void 336mipsNN_pdcache_wbinv_all_16(void) 337{ 338 vm_offset_t va, eva; 339 340 va = MIPS_PHYS_TO_KSEG0(0); 341 eva = va + pdcache_size; 342 343 /* 344 * Since we're hitting the whole thing, we don't have to 345 * worry about the N different "ways". 346 */ 347 348 while (va < eva) { 349 cache_r4k_op_32lines_16(va, 350 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 351 va += (32 * 16); 352 } 353 354 SYNC; 355} 356 357void 358mipsNN_pdcache_wbinv_all_32(void) 359{ 360 vm_offset_t va, eva; 361 362 va = MIPS_PHYS_TO_KSEG0(0); 363 eva = va + pdcache_size; 364 365 /* 366 * Since we're hitting the whole thing, we don't have to 367 * worry about the N different "ways". 368 */ 369 370 while (va < eva) { 371 cache_r4k_op_32lines_32(va, 372 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 373 va += (32 * 32); 374 } 375 376 SYNC; 377} 378 379void 380mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size) 381{ 382 vm_offset_t eva; 383 384 eva = round_line16(va + size); 385 va = trunc_line16(va); 386 387 while ((eva - va) >= (32 * 16)) { 388 cache_r4k_op_32lines_16(va, 389 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 390 va += (32 * 16); 391 } 392 393 while (va < eva) { 394 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 395 va += 16; 396 } 397 398 SYNC; 399} 400 401void 402mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size) 403{ 404 vm_offset_t eva; 405 406 eva = round_line32(va + size); 407 va = trunc_line32(va); 408 409 while ((eva - va) >= (32 * 32)) { 410 cache_r4k_op_32lines_32(va, 411 CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 412 va += (32 * 32); 413 } 414 415 while (va < eva) { 416 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV); 417 va += 32; 418 } 419 420 SYNC; 421} 422 423void 424mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size) 425{ 426 vm_offset_t eva, tmpva; 427 int i, stride, loopcount; 428 429 /* 430 * Since we're doing Index ops, we expect to not be able 431 * to access the address we've been given. So, get the 432 * bits that determine the cache index, and make a KSEG0 433 * address out of them. 434 */ 435 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 436 437 eva = round_line16(va + size); 438 va = trunc_line16(va); 439 440 /* 441 * GCC generates better code in the loops if we reference local 442 * copies of these global variables. 443 */ 444 stride = pdcache_stride; 445 loopcount = pdcache_loopcount; 446 447 while ((eva - va) >= (8 * 16)) { 448 tmpva = va; 449 for (i = 0; i < loopcount; i++, tmpva += stride) 450 cache_r4k_op_8lines_16(tmpva, 451 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 452 va += 8 * 16; 453 } 454 455 while (va < eva) { 456 tmpva = va; 457 for (i = 0; i < loopcount; i++, tmpva += stride) 458 cache_op_r4k_line(tmpva, 459 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 460 va += 16; 461 } 462} 463 464void 465mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size) 466{ 467 vm_offset_t eva, tmpva; 468 int i, stride, loopcount; 469 470 /* 471 * Since we're doing Index ops, we expect to not be able 472 * to access the address we've been given. So, get the 473 * bits that determine the cache index, and make a KSEG0 474 * address out of them. 475 */ 476 va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask); 477 478 eva = round_line32(va + size); 479 va = trunc_line32(va); 480 481 /* 482 * GCC generates better code in the loops if we reference local 483 * copies of these global variables. 484 */ 485 stride = pdcache_stride; 486 loopcount = pdcache_loopcount; 487 488 while ((eva - va) >= (8 * 32)) { 489 tmpva = va; 490 for (i = 0; i < loopcount; i++, tmpva += stride) 491 cache_r4k_op_8lines_32(tmpva, 492 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 493 va += 8 * 32; 494 } 495 496 while (va < eva) { 497 tmpva = va; 498 for (i = 0; i < loopcount; i++, tmpva += stride) 499 cache_op_r4k_line(tmpva, 500 CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV); 501 va += 32; 502 } 503} 504 505void 506mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size) 507{ 508 vm_offset_t eva; 509 510 eva = round_line16(va + size); 511 va = trunc_line16(va); 512 513 while ((eva - va) >= (32 * 16)) { 514 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 515 va += (32 * 16); 516 } 517 518 while (va < eva) { 519 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 520 va += 16; 521 } 522 523 SYNC; 524} 525 526void 527mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size) 528{ 529 vm_offset_t eva; 530 531 eva = round_line32(va + size); 532 va = trunc_line32(va); 533 534 while ((eva - va) >= (32 * 32)) { 535 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 536 va += (32 * 32); 537 } 538 539 while (va < eva) { 540 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV); 541 va += 32; 542 } 543 544 SYNC; 545} 546 547void 548mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size) 549{ 550 vm_offset_t eva; 551 552 eva = round_line16(va + size); 553 va = trunc_line16(va); 554 555 while ((eva - va) >= (32 * 16)) { 556 cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 557 va += (32 * 16); 558 } 559 560 while (va < eva) { 561 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 562 va += 16; 563 } 564 565 SYNC; 566} 567 568void 569mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size) 570{ 571 vm_offset_t eva; 572 573 eva = round_line32(va + size); 574 va = trunc_line32(va); 575 576 while ((eva - va) >= (32 * 32)) { 577 cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 578 va += (32 * 32); 579 } 580 581 while (va < eva) { 582 cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB); 583 va += 32; 584 } 585 586 SYNC; 587} 588 589 590#ifdef CPU_CNMIPS 591 592void 593mipsNN_icache_sync_all_128(void) 594{ 595 SYNCI 596} 597 598void 599mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size) 600{ 601 SYNC; 602} 603 604void 605mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size) 606{ 607} 608 609 610void 611mipsNN_pdcache_wbinv_all_128(void) 612{ 613} 614 615 616void 617mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size) 618{ 619 SYNC; 620} 621 622void 623mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size) 624{ 625} 626 627void 628mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size) 629{ 630} 631 632void 633mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size) 634{ 635 SYNC; 636} 637 638#endif 639