1/*	$NetBSD: cache_mipsNN.c,v 1.10 2005/12/24 20:07:19 perry Exp $	*/
2
3/*
4 * Copyright 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Jason R. Thorpe and Simon Burge for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41#include <sys/types.h>
42#include <sys/systm.h>
43#include <sys/param.h>
44
45#include <machine/cache.h>
46#include <machine/cache_r4k.h>
47#include <machine/cpuinfo.h>
48
49#define	round_line16(x)		(((x) + 15) & ~15)
50#define	trunc_line16(x)		((x) & ~15)
51
52#define	round_line32(x)		(((x) + 31) & ~31)
53#define	trunc_line32(x)		((x) & ~31)
54
55#if defined(CPU_NLM)
56static __inline void
57xlp_sync(void)
58{
59        __asm __volatile (
60	    ".set push              \n"
61	    ".set noreorder         \n"
62	    ".set mips64            \n"
63	    "dla    $8, 1f          \n"
64	    "/* jr.hb $8 */         \n"
65	    ".word 0x1000408        \n"
66	    "nop                    \n"
67	 "1: nop                    \n"
68	    ".set pop               \n"
69	    : : : "$8");
70}
71#endif
72
73#if defined(SB1250_PASS1)
74#define	SYNC	__asm volatile("sync; sync")
75#elif defined(CPU_NLM)
76#define SYNC	xlp_sync()
77#else
78#define	SYNC	__asm volatile("sync")
79#endif
80
81#if defined(CPU_CNMIPS)
82#define SYNCI  mips_sync_icache();
83#elif defined(CPU_NLM)
84#define SYNCI	xlp_sync()
85#else
86#define SYNCI
87#endif
88
89/*
90 * Exported variables for consumers like bus_dma code
91 */
92int mips_picache_linesize;
93int mips_pdcache_linesize;
94
95static int picache_size;
96static int picache_stride;
97static int picache_loopcount;
98static int picache_way_mask;
99static int pdcache_size;
100static int pdcache_stride;
101static int pdcache_loopcount;
102static int pdcache_way_mask;
103
104void
105mipsNN_cache_init(struct mips_cpuinfo * cpuinfo)
106{
107	int flush_multiple_lines_per_way;
108
109	flush_multiple_lines_per_way = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize * cpuinfo->l1.ic_linesize > PAGE_SIZE;
110	if (cpuinfo->icache_virtual) {
111		/*
112		 * With a virtual Icache we don't need to flush
113		 * multiples of the page size with index ops; we just
114		 * need to flush one pages' worth.
115		 */
116		flush_multiple_lines_per_way = 0;
117	}
118
119	if (flush_multiple_lines_per_way) {
120		picache_stride = PAGE_SIZE;
121		picache_loopcount = (cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize / PAGE_SIZE) *
122		    cpuinfo->l1.ic_nways;
123	} else {
124		picache_stride = cpuinfo->l1.ic_nsets * cpuinfo->l1.ic_linesize;
125		picache_loopcount = cpuinfo->l1.ic_nways;
126	}
127
128	if (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize < PAGE_SIZE) {
129		pdcache_stride = cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize;
130		pdcache_loopcount = cpuinfo->l1.dc_nways;
131	} else {
132		pdcache_stride = PAGE_SIZE;
133		pdcache_loopcount = (cpuinfo->l1.dc_nsets * cpuinfo->l1.dc_linesize / PAGE_SIZE) *
134		    cpuinfo->l1.dc_nways;
135	}
136
137	mips_picache_linesize = cpuinfo->l1.ic_linesize;
138	mips_pdcache_linesize = cpuinfo->l1.dc_linesize;
139
140	picache_size = cpuinfo->l1.ic_size;
141	picache_way_mask = cpuinfo->l1.ic_nways - 1;
142	pdcache_size = cpuinfo->l1.dc_size;
143	pdcache_way_mask = cpuinfo->l1.dc_nways - 1;
144
145#define CACHE_DEBUG
146#ifdef CACHE_DEBUG
147	printf("Cache info:\n");
148	if (cpuinfo->icache_virtual)
149		printf("  icache is virtual\n");
150	printf("  picache_stride    = %d\n", picache_stride);
151	printf("  picache_loopcount = %d\n", picache_loopcount);
152	printf("  pdcache_stride    = %d\n", pdcache_stride);
153	printf("  pdcache_loopcount = %d\n", pdcache_loopcount);
154#endif
155}
156
157void
158mipsNN_icache_sync_all_16(void)
159{
160	vm_offset_t va, eva;
161
162	va = MIPS_PHYS_TO_KSEG0(0);
163	eva = va + picache_size;
164
165	/*
166	 * Since we're hitting the whole thing, we don't have to
167	 * worry about the N different "ways".
168	 */
169
170	mips_intern_dcache_wbinv_all();
171
172	while (va < eva) {
173		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
174		va += (32 * 16);
175	}
176
177	SYNC;
178}
179
180void
181mipsNN_icache_sync_all_32(void)
182{
183	vm_offset_t va, eva;
184
185	va = MIPS_PHYS_TO_KSEG0(0);
186	eva = va + picache_size;
187
188	/*
189	 * Since we're hitting the whole thing, we don't have to
190	 * worry about the N different "ways".
191	 */
192
193	mips_intern_dcache_wbinv_all();
194
195	while (va < eva) {
196		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
197		va += (32 * 32);
198	}
199
200	SYNC;
201}
202
203void
204mipsNN_icache_sync_range_16(vm_offset_t va, vm_size_t size)
205{
206	vm_offset_t eva;
207
208	eva = round_line16(va + size);
209	va = trunc_line16(va);
210
211	mips_intern_dcache_wb_range(va, (eva - va));
212
213	while ((eva - va) >= (32 * 16)) {
214		cache_r4k_op_32lines_16(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
215		va += (32 * 16);
216	}
217
218	while (va < eva) {
219		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
220		va += 16;
221	}
222
223	SYNC;
224}
225
226void
227mipsNN_icache_sync_range_32(vm_offset_t va, vm_size_t size)
228{
229	vm_offset_t eva;
230
231	eva = round_line32(va + size);
232	va = trunc_line32(va);
233
234	mips_intern_dcache_wb_range(va, (eva - va));
235
236	while ((eva - va) >= (32 * 32)) {
237		cache_r4k_op_32lines_32(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
238		va += (32 * 32);
239	}
240
241	while (va < eva) {
242		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
243		va += 32;
244	}
245
246	SYNC;
247}
248
249void
250mipsNN_icache_sync_range_index_16(vm_offset_t va, vm_size_t size)
251{
252	vm_offset_t eva, tmpva;
253	int i, stride, loopcount;
254
255	/*
256	 * Since we're doing Index ops, we expect to not be able
257	 * to access the address we've been given.  So, get the
258	 * bits that determine the cache index, and make a KSEG0
259	 * address out of them.
260	 */
261	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
262
263	eva = round_line16(va + size);
264	va = trunc_line16(va);
265
266	/*
267	 * GCC generates better code in the loops if we reference local
268	 * copies of these global variables.
269	 */
270	stride = picache_stride;
271	loopcount = picache_loopcount;
272
273	mips_intern_dcache_wbinv_range_index(va, (eva - va));
274
275	while ((eva - va) >= (8 * 16)) {
276		tmpva = va;
277		for (i = 0; i < loopcount; i++, tmpva += stride)
278			cache_r4k_op_8lines_16(tmpva,
279			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
280		va += 8 * 16;
281	}
282
283	while (va < eva) {
284		tmpva = va;
285		for (i = 0; i < loopcount; i++, tmpva += stride)
286			cache_op_r4k_line(tmpva,
287			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
288		va += 16;
289	}
290}
291
292void
293mipsNN_icache_sync_range_index_32(vm_offset_t va, vm_size_t size)
294{
295	vm_offset_t eva, tmpva;
296	int i, stride, loopcount;
297
298	/*
299	 * Since we're doing Index ops, we expect to not be able
300	 * to access the address we've been given.  So, get the
301	 * bits that determine the cache index, and make a KSEG0
302	 * address out of them.
303	 */
304	va = MIPS_PHYS_TO_KSEG0(va & picache_way_mask);
305
306	eva = round_line32(va + size);
307	va = trunc_line32(va);
308
309	/*
310	 * GCC generates better code in the loops if we reference local
311	 * copies of these global variables.
312	 */
313	stride = picache_stride;
314	loopcount = picache_loopcount;
315
316	mips_intern_dcache_wbinv_range_index(va, (eva - va));
317
318	while ((eva - va) >= (8 * 32)) {
319		tmpva = va;
320		for (i = 0; i < loopcount; i++, tmpva += stride)
321			cache_r4k_op_8lines_32(tmpva,
322			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
323		va += 8 * 32;
324	}
325
326	while (va < eva) {
327		tmpva = va;
328		for (i = 0; i < loopcount; i++, tmpva += stride)
329			cache_op_r4k_line(tmpva,
330			    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
331		va += 32;
332	}
333}
334
335void
336mipsNN_pdcache_wbinv_all_16(void)
337{
338	vm_offset_t va, eva;
339
340	va = MIPS_PHYS_TO_KSEG0(0);
341	eva = va + pdcache_size;
342
343	/*
344	 * Since we're hitting the whole thing, we don't have to
345	 * worry about the N different "ways".
346	 */
347
348	while (va < eva) {
349		cache_r4k_op_32lines_16(va,
350		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
351		va += (32 * 16);
352	}
353
354	SYNC;
355}
356
357void
358mipsNN_pdcache_wbinv_all_32(void)
359{
360	vm_offset_t va, eva;
361
362	va = MIPS_PHYS_TO_KSEG0(0);
363	eva = va + pdcache_size;
364
365	/*
366	 * Since we're hitting the whole thing, we don't have to
367	 * worry about the N different "ways".
368	 */
369
370	while (va < eva) {
371		cache_r4k_op_32lines_32(va,
372		    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
373		va += (32 * 32);
374	}
375
376	SYNC;
377}
378
379void
380mipsNN_pdcache_wbinv_range_16(vm_offset_t va, vm_size_t size)
381{
382	vm_offset_t eva;
383
384	eva = round_line16(va + size);
385	va = trunc_line16(va);
386
387	while ((eva - va) >= (32 * 16)) {
388		cache_r4k_op_32lines_16(va,
389		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
390		va += (32 * 16);
391	}
392
393	while (va < eva) {
394		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
395		va += 16;
396	}
397
398	SYNC;
399}
400
401void
402mipsNN_pdcache_wbinv_range_32(vm_offset_t va, vm_size_t size)
403{
404	vm_offset_t eva;
405
406	eva = round_line32(va + size);
407	va = trunc_line32(va);
408
409	while ((eva - va) >= (32 * 32)) {
410		cache_r4k_op_32lines_32(va,
411		    CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
412		va += (32 * 32);
413	}
414
415	while (va < eva) {
416		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
417		va += 32;
418	}
419
420	SYNC;
421}
422
423void
424mipsNN_pdcache_wbinv_range_index_16(vm_offset_t va, vm_size_t size)
425{
426	vm_offset_t eva, tmpva;
427	int i, stride, loopcount;
428
429	/*
430	 * Since we're doing Index ops, we expect to not be able
431	 * to access the address we've been given.  So, get the
432	 * bits that determine the cache index, and make a KSEG0
433	 * address out of them.
434	 */
435	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
436
437	eva = round_line16(va + size);
438	va = trunc_line16(va);
439
440	/*
441	 * GCC generates better code in the loops if we reference local
442	 * copies of these global variables.
443	 */
444	stride = pdcache_stride;
445	loopcount = pdcache_loopcount;
446
447	while ((eva - va) >= (8 * 16)) {
448		tmpva = va;
449		for (i = 0; i < loopcount; i++, tmpva += stride)
450			cache_r4k_op_8lines_16(tmpva,
451			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
452		va += 8 * 16;
453	}
454
455	while (va < eva) {
456		tmpva = va;
457		for (i = 0; i < loopcount; i++, tmpva += stride)
458			cache_op_r4k_line(tmpva,
459			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
460		va += 16;
461	}
462}
463
464void
465mipsNN_pdcache_wbinv_range_index_32(vm_offset_t va, vm_size_t size)
466{
467	vm_offset_t eva, tmpva;
468	int i, stride, loopcount;
469
470	/*
471	 * Since we're doing Index ops, we expect to not be able
472	 * to access the address we've been given.  So, get the
473	 * bits that determine the cache index, and make a KSEG0
474	 * address out of them.
475	 */
476	va = MIPS_PHYS_TO_KSEG0(va & pdcache_way_mask);
477
478	eva = round_line32(va + size);
479	va = trunc_line32(va);
480
481	/*
482	 * GCC generates better code in the loops if we reference local
483	 * copies of these global variables.
484	 */
485	stride = pdcache_stride;
486	loopcount = pdcache_loopcount;
487
488	while ((eva - va) >= (8 * 32)) {
489		tmpva = va;
490		for (i = 0; i < loopcount; i++, tmpva += stride)
491			cache_r4k_op_8lines_32(tmpva,
492			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
493		va += 8 * 32;
494	}
495
496	while (va < eva) {
497		tmpva = va;
498		for (i = 0; i < loopcount; i++, tmpva += stride)
499			cache_op_r4k_line(tmpva,
500			    CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
501		va += 32;
502	}
503}
504
505void
506mipsNN_pdcache_inv_range_16(vm_offset_t va, vm_size_t size)
507{
508	vm_offset_t eva;
509
510	eva = round_line16(va + size);
511	va = trunc_line16(va);
512
513	while ((eva - va) >= (32 * 16)) {
514		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
515		va += (32 * 16);
516	}
517
518	while (va < eva) {
519		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
520		va += 16;
521	}
522
523	SYNC;
524}
525
526void
527mipsNN_pdcache_inv_range_32(vm_offset_t va, vm_size_t size)
528{
529	vm_offset_t eva;
530
531	eva = round_line32(va + size);
532	va = trunc_line32(va);
533
534	while ((eva - va) >= (32 * 32)) {
535		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
536		va += (32 * 32);
537	}
538
539	while (va < eva) {
540		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
541		va += 32;
542	}
543
544	SYNC;
545}
546
547void
548mipsNN_pdcache_wb_range_16(vm_offset_t va, vm_size_t size)
549{
550	vm_offset_t eva;
551
552	eva = round_line16(va + size);
553	va = trunc_line16(va);
554
555	while ((eva - va) >= (32 * 16)) {
556		cache_r4k_op_32lines_16(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
557		va += (32 * 16);
558	}
559
560	while (va < eva) {
561		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
562		va += 16;
563	}
564
565	SYNC;
566}
567
568void
569mipsNN_pdcache_wb_range_32(vm_offset_t va, vm_size_t size)
570{
571	vm_offset_t eva;
572
573	eva = round_line32(va + size);
574	va = trunc_line32(va);
575
576	while ((eva - va) >= (32 * 32)) {
577		cache_r4k_op_32lines_32(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
578		va += (32 * 32);
579	}
580
581	while (va < eva) {
582		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB);
583		va += 32;
584	}
585
586	SYNC;
587}
588
589
590#ifdef CPU_CNMIPS
591
592void
593mipsNN_icache_sync_all_128(void)
594{
595        SYNCI
596}
597
598void
599mipsNN_icache_sync_range_128(vm_offset_t va, vm_size_t size)
600{
601	SYNC;
602}
603
604void
605mipsNN_icache_sync_range_index_128(vm_offset_t va, vm_size_t size)
606{
607}
608
609
610void
611mipsNN_pdcache_wbinv_all_128(void)
612{
613}
614
615
616void
617mipsNN_pdcache_wbinv_range_128(vm_offset_t va, vm_size_t size)
618{
619	SYNC;
620}
621
622void
623mipsNN_pdcache_wbinv_range_index_128(vm_offset_t va, vm_size_t size)
624{
625}
626
627void
628mipsNN_pdcache_inv_range_128(vm_offset_t va, vm_size_t size)
629{
630}
631
632void
633mipsNN_pdcache_wb_range_128(vm_offset_t va, vm_size_t size)
634{
635	SYNC;
636}
637
638#endif
639