1/*- 2 * Copyright (c) 2003 Jake Burkholder. 3 * Copyright (c) 2005 - 2011 Marius Strobl <marius@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD$"); 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/smp.h> 36 37#include <vm/vm.h> 38#include <vm/pmap.h> 39 40#include <machine/asi.h> 41#include <machine/cache.h> 42#include <machine/cpu.h> 43#include <machine/cpufunc.h> 44#include <machine/dcr.h> 45#include <machine/lsu.h> 46#include <machine/smp.h> 47#include <machine/tlb.h> 48#include <machine/ver.h> 49#include <machine/vmparam.h> 50 51#define CHEETAH_ICACHE_TAG_LOWER 0x30 52#define CHEETAH_T16_ENTRIES 16 53#define CHEETAH_DT512_ENTRIES 512 54#define CHEETAH_IT128_ENTRIES 128 55#define CHEETAH_IT512_ENTRIES 512 56 57/* 58 * CPU-specific initialization for Sun Cheetah and later CPUs 59 */ 60void 61cheetah_init(u_int cpu_impl) 62{ 63 u_long val; 64 65 /* Ensure the TSB Extension Registers hold 0 as TSB_Base. */ 66 67 stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0); 68 stxa(AA_IMMU_TSB_PEXT_REG, ASI_IMMU, 0); 69 membar(Sync); 70 71 stxa(AA_DMMU_TSB_SEXT_REG, ASI_DMMU, 0); 72 /* 73 * NB: the secondary context was removed from the iMMU. 74 */ 75 membar(Sync); 76 77 stxa(AA_DMMU_TSB_NEXT_REG, ASI_DMMU, 0); 78 stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0); 79 membar(Sync); 80 81 /* 82 * Configure the first large dTLB to hold 4MB pages (e.g. for direct 83 * mappings) for all three contexts and ensure the second one is set 84 * up to hold 8k pages for them. Note that this is constraint by 85 * US-IV+, whose large dTLBs can only hold entries of certain page 86 * sizes each. 87 * For US-IV+, additionally ensure that the large iTLB is set up to 88 * hold 8k pages for nucleus and primary context (still no secondary 89 * iMMU context. 90 * NB: according to documentation, changing the page size of the same 91 * context requires a context demap before changing the corresponding 92 * page size, but we hardly can flush our locked pages here, so we use 93 * a demap all instead. 94 */ 95 stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0); 96 membar(Sync); 97 val = (TS_4M << TLB_PCXR_N_PGSZ0_SHIFT) | 98 (TS_8K << TLB_PCXR_N_PGSZ1_SHIFT) | 99 (TS_4M << TLB_PCXR_P_PGSZ0_SHIFT) | 100 (TS_8K << TLB_PCXR_P_PGSZ1_SHIFT); 101 if (cpu_impl == CPU_IMPL_ULTRASPARCIVp) 102 val |= (TS_8K << TLB_PCXR_N_PGSZ_I_SHIFT) | 103 (TS_8K << TLB_PCXR_P_PGSZ_I_SHIFT); 104 stxa(AA_DMMU_PCXR, ASI_DMMU, val); 105 val = (TS_4M << TLB_SCXR_S_PGSZ0_SHIFT) | 106 (TS_8K << TLB_SCXR_S_PGSZ1_SHIFT); 107 stxa(AA_DMMU_SCXR, ASI_DMMU, val); 108 flush(KERNBASE); 109 110 /* 111 * Ensure DCR_IFPOE is disabled as long as we haven't implemented 112 * support for it (if ever) as most if not all firmware versions 113 * apparently turn it on. Not making use of DCR_IFPOE should also 114 * avoid Cheetah erratum #109. 115 */ 116 val = rd(asr18) & ~DCR_IFPOE; 117 if (cpu_impl == CPU_IMPL_ULTRASPARCIVp) { 118 /* 119 * Ensure the branch prediction mode is set to PC indexing 120 * in order to work around US-IV+ erratum #2. 121 */ 122 val = (val & ~DCR_BPM_MASK) | DCR_BPM_PC; 123 /* 124 * XXX disable dTLB parity error reporting as otherwise we 125 * get seemingly false positives when copying in the user 126 * window by simulating a fill trap on return to usermode in 127 * case single issue is disabled, which thus appears to be 128 * a CPU bug. 129 */ 130 val &= ~DCR_DTPE; 131 } 132 wr(asr18, val, 0); 133} 134 135/* 136 * Enable level 1 caches. 137 */ 138void 139cheetah_cache_enable(u_int cpu_impl) 140{ 141 u_long lsu; 142 143 lsu = ldxa(0, ASI_LSU_CTL_REG); 144 if (cpu_impl == CPU_IMPL_ULTRASPARCIII) { 145 /* Disable P$ due to US-III erratum #18. */ 146 lsu &= ~LSU_PE; 147 } 148 stxa(0, ASI_LSU_CTL_REG, lsu | LSU_IC | LSU_DC); 149 flush(KERNBASE); 150} 151 152/* 153 * Flush all lines from the level 1 caches. 154 */ 155void 156cheetah_cache_flush(void) 157{ 158 u_long addr, lsu; 159 register_t s; 160 161 s = intr_disable(); 162 for (addr = 0; addr < PCPU_GET(cache.dc_size); 163 addr += PCPU_GET(cache.dc_linesize)) 164 /* 165 * Note that US-IV+ additionally require a membar #Sync before 166 * a load or store to ASI_DCACHE_TAG. 167 */ 168 __asm __volatile( 169 "membar #Sync;" 170 "stxa %%g0, [%0] %1;" 171 "membar #Sync" 172 : : "r" (addr), "n" (ASI_DCACHE_TAG)); 173 174 /* The I$ must be disabled when flushing it so ensure it's off. */ 175 lsu = ldxa(0, ASI_LSU_CTL_REG); 176 stxa(0, ASI_LSU_CTL_REG, lsu & ~(LSU_IC)); 177 flush(KERNBASE); 178 for (addr = CHEETAH_ICACHE_TAG_LOWER; 179 addr < PCPU_GET(cache.ic_size) * 2; 180 addr += PCPU_GET(cache.ic_linesize) * 2) 181 __asm __volatile( 182 "stxa %%g0, [%0] %1;" 183 "membar #Sync" 184 : : "r" (addr), "n" (ASI_ICACHE_TAG)); 185 stxa(0, ASI_LSU_CTL_REG, lsu); 186 flush(KERNBASE); 187 intr_restore(s); 188} 189 190/* 191 * Flush a physical page from the data cache. 192 */ 193void 194cheetah_dcache_page_inval(vm_paddr_t spa) 195{ 196 vm_paddr_t pa; 197 void *cookie; 198 199 KASSERT((spa & PAGE_MASK) == 0, 200 ("%s: pa not page aligned", __func__)); 201 cookie = ipi_dcache_page_inval(tl_ipi_cheetah_dcache_page_inval, spa); 202 for (pa = spa; pa < spa + PAGE_SIZE; 203 pa += PCPU_GET(cache.dc_linesize)) 204 stxa_sync(pa, ASI_DCACHE_INVALIDATE, 0); 205 ipi_wait(cookie); 206} 207 208/* 209 * Flush a physical page from the intsruction cache. Instruction cache 210 * consistency is maintained by hardware. 211 */ 212void 213cheetah_icache_page_inval(vm_paddr_t pa __unused) 214{ 215 216} 217 218/* 219 * Flush all non-locked mappings from the TLBs. 220 */ 221void 222cheetah_tlb_flush_nonlocked(void) 223{ 224 225 stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0); 226 stxa(TLB_DEMAP_ALL, ASI_IMMU_DEMAP, 0); 227 flush(KERNBASE); 228} 229 230/* 231 * Flush all user mappings from the TLBs. 232 */ 233void 234cheetah_tlb_flush_user(void) 235{ 236 u_long data, tag; 237 register_t s; 238 u_int i, slot; 239 240 /* 241 * We read ASI_{D,I}TLB_DATA_ACCESS_REG twice back-to-back in order 242 * to work around errata of USIII and beyond. 243 */ 244 for (i = 0; i < CHEETAH_T16_ENTRIES; i++) { 245 slot = TLB_DAR_SLOT(TLB_DAR_T16, i); 246 s = intr_disable(); 247 (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG); 248 data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG); 249 intr_restore(s); 250 tag = ldxa(slot, ASI_DTLB_TAG_READ_REG); 251 if ((data & TD_V) != 0 && (data & TD_L) == 0 && 252 TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 253 stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0); 254 s = intr_disable(); 255 (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 256 data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 257 intr_restore(s); 258 tag = ldxa(slot, ASI_ITLB_TAG_READ_REG); 259 if ((data & TD_V) != 0 && (data & TD_L) == 0 && 260 TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 261 stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0); 262 } 263 for (i = 0; i < CHEETAH_DT512_ENTRIES; i++) { 264 slot = TLB_DAR_SLOT(TLB_DAR_DT512_0, i); 265 s = intr_disable(); 266 (void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG); 267 data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG); 268 intr_restore(s); 269 tag = ldxa(slot, ASI_DTLB_TAG_READ_REG); 270 if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 271 stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0); 272 slot = TLB_DAR_SLOT(TLB_DAR_DT512_1, i); 273 s = intr_disable(); 274 (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 275 data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG); 276 intr_restore(s); 277 tag = ldxa(slot, ASI_DTLB_TAG_READ_REG); 278 if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 279 stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0); 280 } 281 if (PCPU_GET(impl) == CPU_IMPL_ULTRASPARCIVp) { 282 for (i = 0; i < CHEETAH_IT512_ENTRIES; i++) { 283 slot = TLB_DAR_SLOT(TLB_DAR_IT512, i); 284 s = intr_disable(); 285 (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 286 data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 287 intr_restore(s); 288 tag = ldxa(slot, ASI_ITLB_TAG_READ_REG); 289 if ((data & TD_V) != 0 && 290 TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 291 stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0); 292 } 293 } else { 294 for (i = 0; i < CHEETAH_IT128_ENTRIES; i++) { 295 slot = TLB_DAR_SLOT(TLB_DAR_IT128, i); 296 s = intr_disable(); 297 (void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 298 data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG); 299 tag = ldxa(slot, ASI_ITLB_TAG_READ_REG); 300 intr_restore(s); 301 if ((data & TD_V) != 0 && 302 TLB_TAR_CTX(tag) != TLB_CTX_KERNEL) 303 stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0); 304 } 305 } 306} 307