1/*-
2 * Copyright (c) 2003 Jake Burkholder.
3 * Copyright (c) 2005 - 2011 Marius Strobl <marius@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/smp.h>
36
37#include <vm/vm.h>
38#include <vm/pmap.h>
39
40#include <machine/asi.h>
41#include <machine/cache.h>
42#include <machine/cpu.h>
43#include <machine/cpufunc.h>
44#include <machine/dcr.h>
45#include <machine/lsu.h>
46#include <machine/smp.h>
47#include <machine/tlb.h>
48#include <machine/ver.h>
49#include <machine/vmparam.h>
50
51#define	CHEETAH_ICACHE_TAG_LOWER	0x30
52#define	CHEETAH_T16_ENTRIES		16
53#define	CHEETAH_DT512_ENTRIES		512
54#define	CHEETAH_IT128_ENTRIES		128
55#define	CHEETAH_IT512_ENTRIES		512
56
57/*
58 * CPU-specific initialization for Sun Cheetah and later CPUs
59 */
60void
61cheetah_init(u_int cpu_impl)
62{
63	u_long val;
64
65	/* Ensure the TSB Extension Registers hold 0 as TSB_Base. */
66
67	stxa(AA_DMMU_TSB_PEXT_REG, ASI_DMMU, 0);
68	stxa(AA_IMMU_TSB_PEXT_REG, ASI_IMMU, 0);
69	membar(Sync);
70
71	stxa(AA_DMMU_TSB_SEXT_REG, ASI_DMMU, 0);
72	/*
73	 * NB: the secondary context was removed from the iMMU.
74	 */
75	membar(Sync);
76
77	stxa(AA_DMMU_TSB_NEXT_REG, ASI_DMMU, 0);
78	stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0);
79	membar(Sync);
80
81	/*
82	 * Configure the first large dTLB to hold 4MB pages (e.g. for direct
83	 * mappings) for all three contexts and ensure the second one is set
84	 * up to hold 8k pages for them.  Note that this is constraint by
85	 * US-IV+, whose large dTLBs can only hold entries of certain page
86	 * sizes each.
87	 * For US-IV+, additionally ensure that the large iTLB is set up to
88	 * hold 8k pages for nucleus and primary context (still no secondary
89	 * iMMU context.
90	 * NB: according to documentation, changing the page size of the same
91	 * context requires a context demap before changing the corresponding
92	 * page size, but we hardly can flush our locked pages here, so we use
93	 * a demap all instead.
94	 */
95	stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
96	membar(Sync);
97	val = (TS_4M << TLB_PCXR_N_PGSZ0_SHIFT) |
98	    (TS_8K << TLB_PCXR_N_PGSZ1_SHIFT) |
99	    (TS_4M << TLB_PCXR_P_PGSZ0_SHIFT) |
100	    (TS_8K << TLB_PCXR_P_PGSZ1_SHIFT);
101	if (cpu_impl == CPU_IMPL_ULTRASPARCIVp)
102		val |= (TS_8K << TLB_PCXR_N_PGSZ_I_SHIFT) |
103		    (TS_8K << TLB_PCXR_P_PGSZ_I_SHIFT);
104	stxa(AA_DMMU_PCXR, ASI_DMMU, val);
105	val = (TS_4M << TLB_SCXR_S_PGSZ0_SHIFT) |
106	    (TS_8K << TLB_SCXR_S_PGSZ1_SHIFT);
107	stxa(AA_DMMU_SCXR, ASI_DMMU, val);
108	flush(KERNBASE);
109
110	/*
111	 * Ensure DCR_IFPOE is disabled as long as we haven't implemented
112	 * support for it (if ever) as most if not all firmware versions
113	 * apparently turn it on.  Not making use of DCR_IFPOE should also
114	 * avoid Cheetah erratum #109.
115	 */
116	val = rd(asr18) & ~DCR_IFPOE;
117	if (cpu_impl == CPU_IMPL_ULTRASPARCIVp) {
118		/*
119		 * Ensure the branch prediction mode is set to PC indexing
120		 * in order to work around US-IV+ erratum #2.
121		 */
122		val = (val & ~DCR_BPM_MASK) | DCR_BPM_PC;
123		/*
124		 * XXX disable dTLB parity error reporting as otherwise we
125		 * get seemingly false positives when copying in the user
126		 * window by simulating a fill trap on return to usermode in
127		 * case single issue is disabled, which thus appears to be
128		 * a CPU bug.
129		 */
130		val &= ~DCR_DTPE;
131	}
132	wr(asr18, val, 0);
133}
134
135/*
136 * Enable level 1 caches.
137 */
138void
139cheetah_cache_enable(u_int cpu_impl)
140{
141	u_long lsu;
142
143	lsu = ldxa(0, ASI_LSU_CTL_REG);
144	if (cpu_impl == CPU_IMPL_ULTRASPARCIII) {
145		/* Disable P$ due to US-III erratum #18. */
146		lsu &= ~LSU_PE;
147	}
148	stxa(0, ASI_LSU_CTL_REG, lsu | LSU_IC | LSU_DC);
149	flush(KERNBASE);
150}
151
152/*
153 * Flush all lines from the level 1 caches.
154 */
155void
156cheetah_cache_flush(void)
157{
158	u_long addr, lsu;
159	register_t s;
160
161	s = intr_disable();
162	for (addr = 0; addr < PCPU_GET(cache.dc_size);
163	    addr += PCPU_GET(cache.dc_linesize))
164		/*
165		 * Note that US-IV+ additionally require a membar #Sync before
166		 * a load or store to ASI_DCACHE_TAG.
167		 */
168		__asm __volatile(
169		    "membar #Sync;"
170		    "stxa %%g0, [%0] %1;"
171		    "membar #Sync"
172		    : : "r" (addr), "n" (ASI_DCACHE_TAG));
173
174	/* The I$ must be disabled when flushing it so ensure it's off. */
175	lsu = ldxa(0, ASI_LSU_CTL_REG);
176	stxa(0, ASI_LSU_CTL_REG, lsu & ~(LSU_IC));
177	flush(KERNBASE);
178	for (addr = CHEETAH_ICACHE_TAG_LOWER;
179	    addr < PCPU_GET(cache.ic_size) * 2;
180	    addr += PCPU_GET(cache.ic_linesize) * 2)
181		__asm __volatile(
182		    "stxa %%g0, [%0] %1;"
183		    "membar #Sync"
184		    : : "r" (addr), "n" (ASI_ICACHE_TAG));
185	stxa(0, ASI_LSU_CTL_REG, lsu);
186	flush(KERNBASE);
187	intr_restore(s);
188}
189
190/*
191 * Flush a physical page from the data cache.
192 */
193void
194cheetah_dcache_page_inval(vm_paddr_t spa)
195{
196	vm_paddr_t pa;
197	void *cookie;
198
199	KASSERT((spa & PAGE_MASK) == 0,
200	    ("%s: pa not page aligned", __func__));
201	cookie = ipi_dcache_page_inval(tl_ipi_cheetah_dcache_page_inval, spa);
202	for (pa = spa; pa < spa + PAGE_SIZE;
203	    pa += PCPU_GET(cache.dc_linesize))
204		stxa_sync(pa, ASI_DCACHE_INVALIDATE, 0);
205	ipi_wait(cookie);
206}
207
208/*
209 * Flush a physical page from the intsruction cache.  Instruction cache
210 * consistency is maintained by hardware.
211 */
212void
213cheetah_icache_page_inval(vm_paddr_t pa __unused)
214{
215
216}
217
218/*
219 * Flush all non-locked mappings from the TLBs.
220 */
221void
222cheetah_tlb_flush_nonlocked(void)
223{
224
225	stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0);
226	stxa(TLB_DEMAP_ALL, ASI_IMMU_DEMAP, 0);
227	flush(KERNBASE);
228}
229
230/*
231 * Flush all user mappings from the TLBs.
232 */
233void
234cheetah_tlb_flush_user(void)
235{
236	u_long data, tag;
237	register_t s;
238	u_int i, slot;
239
240	/*
241	 * We read ASI_{D,I}TLB_DATA_ACCESS_REG twice back-to-back in order
242	 * to work around errata of USIII and beyond.
243	 */
244	for (i = 0; i < CHEETAH_T16_ENTRIES; i++) {
245		slot = TLB_DAR_SLOT(TLB_DAR_T16, i);
246		s = intr_disable();
247		(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
248		data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
249		intr_restore(s);
250		tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
251		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
252		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
253			stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
254		s = intr_disable();
255		(void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
256		data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
257		intr_restore(s);
258		tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
259		if ((data & TD_V) != 0 && (data & TD_L) == 0 &&
260		    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
261			stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
262	}
263	for (i = 0; i < CHEETAH_DT512_ENTRIES; i++) {
264		slot = TLB_DAR_SLOT(TLB_DAR_DT512_0, i);
265		s = intr_disable();
266		(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
267		data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
268		intr_restore(s);
269		tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
270		if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
271			stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
272		slot = TLB_DAR_SLOT(TLB_DAR_DT512_1, i);
273		s = intr_disable();
274		(void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
275		data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
276		intr_restore(s);
277		tag = ldxa(slot, ASI_DTLB_TAG_READ_REG);
278		if ((data & TD_V) != 0 && TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
279			stxa_sync(slot, ASI_DTLB_DATA_ACCESS_REG, 0);
280	}
281	if (PCPU_GET(impl) == CPU_IMPL_ULTRASPARCIVp) {
282		for (i = 0; i < CHEETAH_IT512_ENTRIES; i++) {
283			slot = TLB_DAR_SLOT(TLB_DAR_IT512, i);
284			s = intr_disable();
285			(void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
286			data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
287			intr_restore(s);
288			tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
289			if ((data & TD_V) != 0 &&
290			    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
291				stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
292		}
293	} else {
294		for (i = 0; i < CHEETAH_IT128_ENTRIES; i++) {
295			slot = TLB_DAR_SLOT(TLB_DAR_IT128, i);
296			s = intr_disable();
297			(void)ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
298			data = ldxa(slot, ASI_ITLB_DATA_ACCESS_REG);
299			tag = ldxa(slot, ASI_ITLB_TAG_READ_REG);
300			intr_restore(s);
301			if ((data & TD_V) != 0 &&
302			    TLB_TAR_CTX(tag) != TLB_CTX_KERNEL)
303				stxa_sync(slot, ASI_ITLB_DATA_ACCESS_REG, 0);
304		}
305	}
306}
307