1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License.  See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
7 */
8#include <linux/init.h>
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/mm.h>
12#include <linux/module.h>
13#include <linux/proc_fs.h>
14
15#include <asm/cacheops.h>
16#include <asm/inst.h>
17#include <asm/io.h>
18#include <asm/page.h>
19#include <asm/pgtable.h>
20#include <asm/prefetch.h>
21#include <asm/system.h>
22#include <asm/bootinfo.h>
23#include <asm/mipsregs.h>
24#include <asm/mmu_context.h>
25#include <asm/cpu.h>
26#include <asm/war.h>
27
28#define half_scache_line_size()	(cpu_scache_line_size() >> 1)
29#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
30#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
31
32
33/*
34 * Maximum sizes:
35 *
36 * R4000 128 bytes S-cache:		0x58 bytes
37 * R4600 v1.7:				0x5c bytes
38 * R4600 v2.0:				0x60 bytes
39 * With prefetching, 16 byte strides	0xa0 bytes
40 */
41
42static unsigned int clear_page_array[0x130 / 4];
43
44void clear_page(void * page) __attribute__((alias("clear_page_array")));
45
46EXPORT_SYMBOL(clear_page);
47
48/*
49 * Maximum sizes:
50 *
51 * R4000 128 bytes S-cache:		0x11c bytes
52 * R4600 v1.7:				0x080 bytes
53 * R4600 v2.0:				0x07c bytes
54 * With prefetching, 16 byte strides	0x0b8 bytes
55 */
56static unsigned int copy_page_array[0x148 / 4];
57
58void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
59
60EXPORT_SYMBOL(copy_page);
61
62/*
63 * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
64 * with 64-bit kernels.  The prefetch offsets have been experimentally tuned
65 * an Origin 200.
66 */
67static int pref_offset_clear __initdata = 512;
68static int pref_offset_copy  __initdata = 256;
69
70static unsigned int pref_src_mode __initdata;
71static unsigned int pref_dst_mode __initdata;
72
73static int load_offset __initdata;
74static int store_offset __initdata;
75
76static unsigned int __initdata *dest, *epc;
77
78static unsigned int instruction_pending;
79static union mips_instruction delayed_mi;
80
81static void __init emit_instruction(union mips_instruction mi)
82{
83	if (instruction_pending)
84		*epc++ = delayed_mi.word;
85
86	instruction_pending = 1;
87	delayed_mi = mi;
88}
89
90static inline void flush_delay_slot_or_nop(void)
91{
92	if (instruction_pending) {
93		*epc++ = delayed_mi.word;
94		instruction_pending = 0;
95		return;
96	}
97
98	*epc++ = 0;
99}
100
101static inline unsigned int *label(void)
102{
103	if (instruction_pending) {
104		*epc++ = delayed_mi.word;
105		instruction_pending = 0;
106	}
107
108	return epc;
109}
110
111static inline void build_insn_word(unsigned int word)
112{
113	union mips_instruction mi;
114
115	mi.word		 = word;
116
117	emit_instruction(mi);
118}
119
120static inline void build_nop(void)
121{
122	build_insn_word(0);			/* nop */
123}
124
125static inline void build_src_pref(int advance)
126{
127	if (!(load_offset & (cpu_dcache_line_size() - 1)) && advance) {
128		union mips_instruction mi;
129
130		mi.i_format.opcode     = pref_op;
131		mi.i_format.rs         = 5;		/* $a1 */
132		mi.i_format.rt         = pref_src_mode;
133		mi.i_format.simmediate = load_offset + advance;
134
135		emit_instruction(mi);
136	}
137}
138
139static inline void __build_load_reg(int reg)
140{
141	union mips_instruction mi;
142	unsigned int width;
143
144	if (cpu_has_64bit_gp_regs) {
145		mi.i_format.opcode     = ld_op;
146		width = 8;
147	} else {
148		mi.i_format.opcode     = lw_op;
149		width = 4;
150	}
151	mi.i_format.rs         = 5;		/* $a1 */
152	mi.i_format.rt         = reg;		/* $reg */
153	mi.i_format.simmediate = load_offset;
154
155	load_offset += width;
156	emit_instruction(mi);
157}
158
159static inline void build_load_reg(int reg)
160{
161	if (cpu_has_prefetch)
162		build_src_pref(pref_offset_copy);
163
164	__build_load_reg(reg);
165}
166
167static inline void build_dst_pref(int advance)
168{
169	if (!(store_offset & (cpu_dcache_line_size() - 1)) && advance) {
170		union mips_instruction mi;
171
172		mi.i_format.opcode     = pref_op;
173		mi.i_format.rs         = 4;		/* $a0 */
174		mi.i_format.rt         = pref_dst_mode;
175		mi.i_format.simmediate = store_offset + advance;
176
177		emit_instruction(mi);
178	}
179}
180
181static inline void build_cdex_s(void)
182{
183	union mips_instruction mi;
184
185	if ((store_offset & (cpu_scache_line_size() - 1)))
186		return;
187
188	mi.c_format.opcode     = cache_op;
189	mi.c_format.rs         = 4;		/* $a0 */
190	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
191	mi.c_format.cache      = 3;		/* Secondary Data Cache */
192	mi.c_format.simmediate = store_offset;
193
194	emit_instruction(mi);
195}
196
197static inline void build_cdex_p(void)
198{
199	union mips_instruction mi;
200
201	if (store_offset & (cpu_dcache_line_size() - 1))
202		return;
203
204	if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
205		build_nop();
206		build_nop();
207		build_nop();
208		build_nop();
209	}
210
211	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
212		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
213
214	mi.c_format.opcode     = cache_op;
215	mi.c_format.rs         = 4;		/* $a0 */
216	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
217	mi.c_format.cache      = 1;		/* Data Cache */
218	mi.c_format.simmediate = store_offset;
219
220	emit_instruction(mi);
221}
222
223static void __init __build_store_reg(int reg)
224{
225	union mips_instruction mi;
226	unsigned int width;
227
228	if (cpu_has_64bit_gp_regs ||
229	    (cpu_has_64bit_zero_reg && reg == 0)) {
230		mi.i_format.opcode     = sd_op;
231		width = 8;
232	} else {
233		mi.i_format.opcode     = sw_op;
234		width = 4;
235	}
236	mi.i_format.rs         = 4;		/* $a0 */
237	mi.i_format.rt         = reg;		/* $reg */
238	mi.i_format.simmediate = store_offset;
239
240	store_offset += width;
241	emit_instruction(mi);
242}
243
244static inline void build_store_reg(int reg)
245{
246	int pref_off = cpu_has_prefetch ?
247		(reg ? pref_offset_copy : pref_offset_clear) : 0;
248	if (pref_off)
249		build_dst_pref(pref_off);
250	else if (cpu_has_cache_cdex_s)
251		build_cdex_s();
252	else if (cpu_has_cache_cdex_p)
253		build_cdex_p();
254
255	__build_store_reg(reg);
256}
257
258static inline void build_addiu_a2_a0(unsigned long offset)
259{
260	union mips_instruction mi;
261
262	BUG_ON(offset > 0x7fff);
263
264	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
265	mi.i_format.rs         = 4;		/* $a0 */
266	mi.i_format.rt         = 6;		/* $a2 */
267	mi.i_format.simmediate = offset;
268
269	emit_instruction(mi);
270}
271
272static inline void build_addiu_a2(unsigned long offset)
273{
274	union mips_instruction mi;
275
276	BUG_ON(offset > 0x7fff);
277
278	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
279	mi.i_format.rs         = 6;		/* $a2 */
280	mi.i_format.rt         = 6;		/* $a2 */
281	mi.i_format.simmediate = offset;
282
283	emit_instruction(mi);
284}
285
286static inline void build_addiu_a1(unsigned long offset)
287{
288	union mips_instruction mi;
289
290	BUG_ON(offset > 0x7fff);
291
292	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
293	mi.i_format.rs         = 5;		/* $a1 */
294	mi.i_format.rt         = 5;		/* $a1 */
295	mi.i_format.simmediate = offset;
296
297	load_offset -= offset;
298
299	emit_instruction(mi);
300}
301
302static inline void build_addiu_a0(unsigned long offset)
303{
304	union mips_instruction mi;
305
306	BUG_ON(offset > 0x7fff);
307
308	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
309	mi.i_format.rs         = 4;		/* $a0 */
310	mi.i_format.rt         = 4;		/* $a0 */
311	mi.i_format.simmediate = offset;
312
313	store_offset -= offset;
314
315	emit_instruction(mi);
316}
317
318static inline void build_bne(unsigned int *dest)
319{
320	union mips_instruction mi;
321
322	mi.i_format.opcode = bne_op;
323	mi.i_format.rs     = 6;			/* $a2 */
324	mi.i_format.rt     = 4;			/* $a0 */
325	mi.i_format.simmediate = dest - epc - 1;
326
327	*epc++ = mi.word;
328	flush_delay_slot_or_nop();
329}
330
331static inline void build_jr_ra(void)
332{
333	union mips_instruction mi;
334
335	mi.r_format.opcode = spec_op;
336	mi.r_format.rs     = 31;
337	mi.r_format.rt     = 0;
338	mi.r_format.rd     = 0;
339	mi.r_format.re     = 0;
340	mi.r_format.func   = jr_op;
341
342	*epc++ = mi.word;
343	flush_delay_slot_or_nop();
344}
345
346void __init build_clear_page(void)
347{
348	unsigned int loop_start;
349	unsigned long off;
350
351	epc = (unsigned int *) &clear_page_array;
352	instruction_pending = 0;
353	store_offset = 0;
354
355	if (cpu_has_prefetch) {
356		switch (current_cpu_data.cputype) {
357		case CPU_TX49XX:
358			/* TX49 supports only Pref_Load */
359			pref_offset_clear = 0;
360			pref_offset_copy = 0;
361			break;
362
363		case CPU_RM9000:
364
365		case CPU_R10000:
366		case CPU_R12000:
367		case CPU_R14000:
368			pref_src_mode = Pref_LoadStreamed;
369			pref_dst_mode = Pref_StoreStreamed;
370			break;
371
372		default:
373			pref_src_mode = Pref_LoadStreamed;
374			pref_dst_mode = Pref_PrepareForStore;
375			break;
376		}
377	}
378
379        off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0);
380	if (off > 0x7fff) {
381		build_addiu_a2_a0(off >> 1);
382		build_addiu_a2(off >> 1);
383	} else
384		build_addiu_a2_a0(off);
385
386	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
387		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
388
389dest = label();
390	do {
391		build_store_reg(0);
392		build_store_reg(0);
393		build_store_reg(0);
394		build_store_reg(0);
395	} while (store_offset < half_scache_line_size());
396	build_addiu_a0(2 * store_offset);
397	loop_start = store_offset;
398	do {
399		build_store_reg(0);
400		build_store_reg(0);
401		build_store_reg(0);
402		build_store_reg(0);
403	} while ((store_offset - loop_start) < half_scache_line_size());
404	build_bne(dest);
405
406	if (cpu_has_prefetch && pref_offset_clear) {
407		build_addiu_a2_a0(pref_offset_clear);
408	dest = label();
409		loop_start = store_offset;
410		do {
411			__build_store_reg(0);
412			__build_store_reg(0);
413			__build_store_reg(0);
414			__build_store_reg(0);
415		} while ((store_offset - loop_start) < half_scache_line_size());
416		build_addiu_a0(2 * store_offset);
417		loop_start = store_offset;
418		do {
419			__build_store_reg(0);
420			__build_store_reg(0);
421			__build_store_reg(0);
422			__build_store_reg(0);
423		} while ((store_offset - loop_start) < half_scache_line_size());
424		build_bne(dest);
425	}
426
427	build_jr_ra();
428
429	BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
430}
431
432void __init build_copy_page(void)
433{
434	unsigned int loop_start;
435	unsigned long off;
436
437	epc = (unsigned int *) &copy_page_array;
438	store_offset = load_offset = 0;
439	instruction_pending = 0;
440
441	off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0);
442	if (off > 0x7fff) {
443		build_addiu_a2_a0(off >> 1);
444		build_addiu_a2(off >> 1);
445	} else
446		build_addiu_a2_a0(off);
447
448	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
449		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
450
451dest = label();
452	loop_start = store_offset;
453	do {
454		build_load_reg( 8);
455		build_load_reg( 9);
456		build_load_reg(10);
457		build_load_reg(11);
458		build_store_reg( 8);
459		build_store_reg( 9);
460		build_store_reg(10);
461		build_store_reg(11);
462	} while ((store_offset - loop_start) < half_scache_line_size());
463	build_addiu_a0(2 * store_offset);
464	build_addiu_a1(2 * load_offset);
465	loop_start = store_offset;
466	do {
467		build_load_reg( 8);
468		build_load_reg( 9);
469		build_load_reg(10);
470		build_load_reg(11);
471		build_store_reg( 8);
472		build_store_reg( 9);
473		build_store_reg(10);
474		build_store_reg(11);
475	} while ((store_offset - loop_start) < half_scache_line_size());
476	build_bne(dest);
477
478	if (cpu_has_prefetch && pref_offset_copy) {
479		build_addiu_a2_a0(pref_offset_copy);
480	dest = label();
481		loop_start = store_offset;
482		do {
483			__build_load_reg( 8);
484			__build_load_reg( 9);
485			__build_load_reg(10);
486			__build_load_reg(11);
487			__build_store_reg( 8);
488			__build_store_reg( 9);
489			__build_store_reg(10);
490			__build_store_reg(11);
491		} while ((store_offset - loop_start) < half_scache_line_size());
492		build_addiu_a0(2 * store_offset);
493		build_addiu_a1(2 * load_offset);
494		loop_start = store_offset;
495		do {
496			__build_load_reg( 8);
497			__build_load_reg( 9);
498			__build_load_reg(10);
499			__build_load_reg(11);
500			__build_store_reg( 8);
501			__build_store_reg( 9);
502			__build_store_reg(10);
503			__build_store_reg(11);
504		} while ((store_offset - loop_start) < half_scache_line_size());
505		build_bne(dest);
506	}
507
508	build_jr_ra();
509
510	BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
511}
512