1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25/*
26 * Since we are compiled outside of the normal kernel build process, we
27 * need to include opt_global.h manually.
28 */
29#include "opt_global.h"
30#include "opt_kernname.h"
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34#include <machine/asm.h>
35#include <sys/param.h>
36#include <sys/elf32.h>
37#include <sys/inflate.h>
38#include <machine/elf.h>
39#include <machine/pte.h>
40#include <machine/cpufunc.h>
41#include <machine/armreg.h>
42
43extern char kernel_start[];
44extern char kernel_end[];
45
46extern void *_end;
47
48void _start(void);
49void __start(void);
50void __startC(void);
51
52extern unsigned int cpufunc_id(void);
53extern void armv6_idcache_wbinv_all(void);
54extern void armv7_idcache_wbinv_all(void);
55extern void do_call(void *, void *, void *, int);
56
57#define GZ_HEAD	0xa
58
59#if defined(CPU_ARM9)
60#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
61extern void arm9_idcache_wbinv_all(void);
62#elif defined(CPU_FA526) || defined(CPU_FA626TE)
63#define cpu_idcache_wbinv_all	fa526_idcache_wbinv_all
64extern void fa526_idcache_wbinv_all(void);
65#elif defined(CPU_ARM9E)
66#define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
67extern void armv5_ec_idcache_wbinv_all(void);
68#elif defined(CPU_ARM10)
69#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
70extern void arm10_idcache_wbinv_all(void);
71#elif defined(CPU_ARM1136) || defined(CPU_ARM1176)
72#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
73#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
74  defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
75  defined(CPU_XSCALE_80219)
76#define cpu_idcache_wbinv_all	xscale_cache_purgeID
77extern void xscale_cache_purgeID(void);
78#elif defined(CPU_XSCALE_81342)
79#define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
80extern void xscalec3_cache_purgeID(void);
81#elif defined(CPU_MV_PJ4B)
82#if !defined(SOC_MV_ARMADAXP)
83#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
84extern void armv6_idcache_wbinv_all(void);
85#else
86#define cpu_idcache_wbinv_all()	armadaxp_idcache_wbinv_all
87#endif
88#endif /* CPU_MV_PJ4B */
89#ifdef CPU_XSCALE_81342
90#define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
91extern void xscalec3_l2cache_purge(void);
92#elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
93#define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
94extern void sheeva_l2cache_wbinv_all(void);
95#elif defined(CPU_CORTEXA) || defined(CPU_KRAIT)
96#define cpu_idcache_wbinv_all	armv7_idcache_wbinv_all
97#define cpu_l2cache_wbinv_all()
98#else
99#define cpu_l2cache_wbinv_all()
100#endif
101
102static void armadaxp_idcache_wbinv_all(void);
103
104int     arm_picache_size;
105int     arm_picache_line_size;
106int     arm_picache_ways;
107
108int     arm_pdcache_size;       /* and unified */
109int     arm_pdcache_line_size = 32;
110int     arm_pdcache_ways;
111
112int     arm_pcache_type;
113int     arm_pcache_unified;
114
115int     arm_dcache_align;
116int     arm_dcache_align_mask;
117
118u_int	arm_cache_level;
119u_int	arm_cache_type[14];
120u_int	arm_cache_loc;
121
122/* Additional cache information local to this file.  Log2 of some of the
123      above numbers.  */
124static int      arm_dcache_l2_nsets;
125static int      arm_dcache_l2_assoc;
126static int      arm_dcache_l2_linesize;
127
128
129int block_userspace_access = 0;
130extern int arm9_dcache_sets_inc;
131extern int arm9_dcache_sets_max;
132extern int arm9_dcache_index_max;
133extern int arm9_dcache_index_inc;
134
135static __inline void *
136memcpy(void *dst, const void *src, int len)
137{
138	const char *s = src;
139    	char *d = dst;
140
141	while (len) {
142		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
143		    !((vm_offset_t)s & 3)) {
144			*(uint32_t *)d = *(uint32_t *)s;
145			s += 4;
146			d += 4;
147			len -= 4;
148		} else {
149			*d++ = *s++;
150			len--;
151		}
152	}
153	return (dst);
154}
155
156static __inline void
157bzero(void *addr, int count)
158{
159	char *tmp = (char *)addr;
160
161	while (count > 0) {
162		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
163			*(uint32_t *)tmp = 0;
164			tmp += 4;
165			count -= 4;
166		} else {
167			*tmp = 0;
168			tmp++;
169			count--;
170		}
171	}
172}
173
174static void arm9_setup(void);
175
176void
177_startC(void)
178{
179	int tmp1;
180	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
181	unsigned int pc, kernphysaddr;
182
183	/*
184	 * Figure out the physical address the kernel was loaded at.  This
185	 * assumes the entry point (this code right here) is in the first page,
186	 * which will always be the case for this trampoline code.
187	 */
188	__asm __volatile("mov %0, pc\n"
189	    : "=r" (pc));
190	kernphysaddr = pc & ~PAGE_MASK;
191
192#if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
193	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
194	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
195		/*
196		 * We're running from flash, so just copy the whole thing
197		 * from flash to memory.
198		 * This is far from optimal, we could do the relocation or
199		 * the unzipping directly from flash to memory to avoid this
200		 * needless copy, but it would require to know the flash
201		 * physical address.
202		 */
203		unsigned int target_addr;
204		unsigned int tmp_sp;
205		uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
206		    + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
207
208		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
209		tmp_sp = target_addr + 0x100000 +
210		    (unsigned int)&_end - (unsigned int)&_start;
211		memcpy((char *)target_addr, (char *)src_addr,
212		    (unsigned int)&_end - (unsigned int)&_start);
213		/* Temporary set the sp and jump to the new location. */
214		__asm __volatile(
215		    "mov sp, %1\n"
216		    "mov pc, %0\n"
217		    : : "r" (target_addr), "r" (tmp_sp));
218
219	}
220#endif
221#ifdef KZIP
222	sp += KERNSIZE + 0x100;
223	sp &= ~(L1_TABLE_SIZE - 1);
224	sp += 2 * L1_TABLE_SIZE;
225#endif
226	sp += 1024 * 1024; /* Should be enough for a stack */
227
228	__asm __volatile("adr %0, 2f\n"
229	    		 "bic %0, %0, #0xff000000\n"
230			 "and %1, %1, #0xff000000\n"
231			 "orr %0, %0, %1\n"
232			 "mrc p15, 0, %1, c1, c0, 0\n"
233			 "bic %1, %1, #1\n" /* Disable MMU */
234			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
235						     WBUF enable */
236			 "orr %1, %1, #0x1000\n" /* Add IC enable */
237			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
238
239			 "mcr p15, 0, %1, c1, c0, 0\n"
240			 "nop\n"
241			 "nop\n"
242			 "nop\n"
243			 "mov pc, %0\n"
244			 "2: nop\n"
245			 "mov sp, %2\n"
246			 : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
247#ifndef KZIP
248#ifdef CPU_ARM9
249	/* So that idcache_wbinv works; */
250	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
251		arm9_setup();
252#endif
253#endif
254	__start();
255}
256
257static void
258get_cachetype_cp15()
259{
260	u_int ctype, isize, dsize, cpuid;
261	u_int clevel, csize, i, sel;
262	u_int multiplier;
263	u_char type;
264
265	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
266		: "=r" (ctype));
267
268	cpuid = cpufunc_id();
269	/*
270	 * ...and thus spake the ARM ARM:
271	 *
272	 * If an <opcode2> value corresponding to an unimplemented or
273	 * reserved ID register is encountered, the System Control
274	 * processor returns the value of the main ID register.
275	 */
276	if (ctype == cpuid)
277		goto out;
278
279	if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
280		__asm __volatile("mrc p15, 1, %0, c0, c0, 1"
281		    : "=r" (clevel));
282		arm_cache_level = clevel;
283		arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
284		i = 0;
285		while ((type = (clevel & 0x7)) && i < 7) {
286			if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
287			    type == CACHE_SEP_CACHE) {
288				sel = i << 1;
289				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
290				    : : "r" (sel));
291				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
292				    : "=r" (csize));
293				arm_cache_type[sel] = csize;
294			}
295			if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
296				sel = (i << 1) | 1;
297				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
298				    : : "r" (sel));
299				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
300				    : "=r" (csize));
301				arm_cache_type[sel] = csize;
302			}
303			i++;
304			clevel >>= 3;
305		}
306	} else {
307		if ((ctype & CPU_CT_S) == 0)
308			arm_pcache_unified = 1;
309
310		/*
311		 * If you want to know how this code works, go read the ARM ARM.
312		 */
313
314		arm_pcache_type = CPU_CT_CTYPE(ctype);
315
316		if (arm_pcache_unified == 0) {
317			isize = CPU_CT_ISIZE(ctype);
318			multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
319			arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
320			if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
321				if (isize & CPU_CT_xSIZE_M)
322					arm_picache_line_size = 0; /* not present */
323				else
324					arm_picache_ways = 1;
325			} else {
326				arm_picache_ways = multiplier <<
327				    (CPU_CT_xSIZE_ASSOC(isize) - 1);
328			}
329			arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
330		}
331
332		dsize = CPU_CT_DSIZE(ctype);
333		multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
334		arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
335		if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
336			if (dsize & CPU_CT_xSIZE_M)
337				arm_pdcache_line_size = 0; /* not present */
338			else
339				arm_pdcache_ways = 1;
340		} else {
341			arm_pdcache_ways = multiplier <<
342			    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
343		}
344		arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
345
346		arm_dcache_align = arm_pdcache_line_size;
347
348		arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
349		arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
350		arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
351		    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
352
353	out:
354		arm_dcache_align_mask = arm_dcache_align - 1;
355	}
356}
357
358static void
359arm9_setup(void)
360{
361
362	get_cachetype_cp15();
363	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
364	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
365	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
366	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
367	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
368}
369
370static void
371armadaxp_idcache_wbinv_all(void)
372{
373	uint32_t feat;
374
375	__asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
376	if (feat & ARM_PFR0_THUMBEE_MASK)
377		armv7_idcache_wbinv_all();
378	else
379		armv6_idcache_wbinv_all();
380
381}
382#ifdef KZIP
383static  unsigned char *orig_input, *i_input, *i_output;
384
385
386static u_int memcnt;		/* Memory allocated: blocks */
387static size_t memtot;		/* Memory allocated: bytes */
388/*
389 * Library functions required by inflate().
390 */
391
392#define MEMSIZ 0x8000
393
394/*
395 * Allocate memory block.
396 */
397unsigned char *
398kzipmalloc(int size)
399{
400	void *ptr;
401	static u_char mem[MEMSIZ];
402
403	if (memtot + size > MEMSIZ)
404		return NULL;
405	ptr = mem + memtot;
406	memtot += size;
407	memcnt++;
408	return ptr;
409}
410
411/*
412 * Free allocated memory block.
413 */
414void
415kzipfree(void *ptr)
416{
417	memcnt--;
418	if (!memcnt)
419		memtot = 0;
420}
421
422void
423putstr(char *dummy)
424{
425}
426
427static int
428input(void *dummy)
429{
430	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
431		return (GZ_EOF);
432	}
433	return *i_input++;
434}
435
436static int
437output(void *dummy, unsigned char *ptr, unsigned long len)
438{
439
440
441	memcpy(i_output, ptr, len);
442	i_output += len;
443	return (0);
444}
445
446static void *
447inflate_kernel(void *kernel, void *startaddr)
448{
449	struct inflate infl;
450	unsigned char slide[GZ_WSIZE];
451
452	orig_input = kernel;
453	memcnt = memtot = 0;
454	i_input = (unsigned char *)kernel + GZ_HEAD;
455	if (((char *)kernel)[3] & 0x18) {
456		while (*i_input)
457			i_input++;
458		i_input++;
459	}
460	i_output = startaddr;
461	bzero(&infl, sizeof(infl));
462	infl.gz_input = input;
463	infl.gz_output = output;
464	infl.gz_slide = slide;
465	inflate(&infl);
466	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
467}
468
469#endif
470
471void *
472load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
473    int d)
474{
475	Elf32_Ehdr *eh;
476	Elf32_Phdr phdr[64] /* XXX */, *php;
477	Elf32_Shdr shdr[64] /* XXX */;
478	int i,j;
479	void *entry_point;
480	int symtabindex = -1;
481	int symstrindex = -1;
482	vm_offset_t lastaddr = 0;
483	Elf_Addr ssym = 0;
484	Elf_Dyn *dp;
485
486	eh = (Elf32_Ehdr *)kstart;
487	ssym = 0;
488	entry_point = (void*)eh->e_entry;
489	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
490	    eh->e_phnum * sizeof(phdr[0]));
491
492	/* Determine lastaddr. */
493	for (i = 0; i < eh->e_phnum; i++) {
494		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
495		    + phdr[i].p_memsz))
496			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
497			    curaddr + phdr[i].p_memsz;
498	}
499
500	/* Save the symbol tables, as there're about to be scratched. */
501	memcpy(shdr, (void *)(kstart + eh->e_shoff),
502	    sizeof(*shdr) * eh->e_shnum);
503	if (eh->e_shnum * eh->e_shentsize != 0 &&
504	    eh->e_shoff != 0) {
505		for (i = 0; i < eh->e_shnum; i++) {
506			if (shdr[i].sh_type == SHT_SYMTAB) {
507				for (j = 0; j < eh->e_phnum; j++) {
508					if (phdr[j].p_type == PT_LOAD &&
509					    shdr[i].sh_offset >=
510					    phdr[j].p_offset &&
511					    (shdr[i].sh_offset +
512					     shdr[i].sh_size <=
513					     phdr[j].p_offset +
514					     phdr[j].p_filesz)) {
515						shdr[i].sh_offset = 0;
516						shdr[i].sh_size = 0;
517						j = eh->e_phnum;
518					}
519				}
520				if (shdr[i].sh_offset != 0 &&
521				    shdr[i].sh_size != 0) {
522					symtabindex = i;
523					symstrindex = shdr[i].sh_link;
524				}
525			}
526		}
527		func_end = roundup(func_end, sizeof(long));
528		if (symtabindex >= 0 && symstrindex >= 0) {
529			ssym = lastaddr;
530			if (d) {
531				memcpy((void *)func_end, (void *)(
532				    shdr[symtabindex].sh_offset + kstart),
533				    shdr[symtabindex].sh_size);
534				memcpy((void *)(func_end +
535				    shdr[symtabindex].sh_size),
536				    (void *)(shdr[symstrindex].sh_offset +
537				    kstart), shdr[symstrindex].sh_size);
538			} else {
539				lastaddr += shdr[symtabindex].sh_size;
540				lastaddr = roundup(lastaddr,
541				    sizeof(shdr[symtabindex].sh_size));
542				lastaddr += sizeof(shdr[symstrindex].sh_size);
543				lastaddr += shdr[symstrindex].sh_size;
544				lastaddr = roundup(lastaddr,
545				    sizeof(shdr[symstrindex].sh_size));
546			}
547
548		}
549	}
550	if (!d)
551		return ((void *)lastaddr);
552
553	j = eh->e_phnum;
554	for (i = 0; i < j; i++) {
555		volatile char c;
556
557		if (phdr[i].p_type != PT_LOAD)
558			continue;
559		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
560		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
561		/* Clean space from oversized segments, eg: bss. */
562		if (phdr[i].p_filesz < phdr[i].p_memsz)
563			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
564			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
565			    phdr[i].p_filesz);
566	}
567	/* Now grab the symbol tables. */
568	if (symtabindex >= 0 && symstrindex >= 0) {
569		*(Elf_Size *)lastaddr =
570		    shdr[symtabindex].sh_size;
571		lastaddr += sizeof(shdr[symtabindex].sh_size);
572		memcpy((void*)lastaddr,
573		    (void *)func_end,
574		    shdr[symtabindex].sh_size);
575		lastaddr += shdr[symtabindex].sh_size;
576		lastaddr = roundup(lastaddr,
577		    sizeof(shdr[symtabindex].sh_size));
578		*(Elf_Size *)lastaddr =
579		    shdr[symstrindex].sh_size;
580		lastaddr += sizeof(shdr[symstrindex].sh_size);
581		memcpy((void*)lastaddr,
582		    (void*)(func_end +
583			    shdr[symtabindex].sh_size),
584		    shdr[symstrindex].sh_size);
585		lastaddr += shdr[symstrindex].sh_size;
586		lastaddr = roundup(lastaddr,
587   		    sizeof(shdr[symstrindex].sh_size));
588		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
589		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
590		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
591	} else
592		*(Elf_Addr *)curaddr = 0;
593	/* Invalidate the instruction cache. */
594	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
595	    		 "mcr p15, 0, %0, c7, c10, 4\n"
596			 : : "r" (curaddr));
597	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
598	    "bic %0, %0, #1\n" /* MMU_ENABLE */
599	    "mcr p15, 0, %0, c1, c0, 0\n"
600	    : "=r" (ssym));
601	/* Jump to the entry point. */
602	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
603	__asm __volatile(".globl func_end\n"
604	    "func_end:");
605
606	/* NOTREACHED */
607	return NULL;
608}
609
610extern char func_end[];
611
612
613#define PMAP_DOMAIN_KERNEL	0 /*
614				    * Just define it instead of including the
615				    * whole VM headers set.
616				    */
617int __hack;
618static __inline void
619setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
620    int write_back)
621{
622	unsigned int *pd = (unsigned int *)pt_addr;
623	vm_paddr_t addr;
624	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
625	int tmp;
626
627	bzero(pd, L1_TABLE_SIZE);
628	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
629		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
630		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
631		if (write_back && 0)
632			pd[addr >> L1_S_SHIFT] |= L1_S_B;
633	}
634	/* XXX: See below */
635	if (0xfff00000 < physstart || 0xfff00000 > physend)
636		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
637		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
638	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
639	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
640			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
641			 "mrc p15, 0, %0, c1, c0, 0\n"
642			 "orr %0, %0, #1\n" /* MMU_ENABLE */
643			 "mcr p15, 0, %0, c1, c0, 0\n"
644			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
645			 "mov r0, r0\n"
646			 "sub pc, pc, #4\n" :
647			 "=r" (tmp) : "r" (pd), "r" (domain));
648
649	/*
650	 * XXX: This is the most stupid workaround I've ever wrote.
651	 * For some reason, the KB9202 won't boot the kernel unless
652	 * we access an address which is not in the
653	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
654	 * what's going on later.
655	 */
656	__hack = *(volatile int *)0xfffff21c;
657}
658
659void
660__start(void)
661{
662	void *curaddr;
663	void *dst, *altdst;
664	char *kernel = (char *)&kernel_start;
665	int sp;
666	int pt_addr;
667
668	__asm __volatile("mov %0, pc"  :
669	    "=r" (curaddr));
670	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
671#ifdef KZIP
672	if (*kernel == 0x1f && kernel[1] == 0x8b) {
673		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
674		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
675
676#ifdef CPU_ARM9
677		/* So that idcache_wbinv works; */
678		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
679			arm9_setup();
680#endif
681		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
682		    (vm_paddr_t)curaddr + 0x10000000, 1);
683		/* Gzipped kernel */
684		dst = inflate_kernel(kernel, &_end);
685		kernel = (char *)&_end;
686		altdst = 4 + load_kernel((unsigned int)kernel,
687		    (unsigned int)curaddr,
688		    (unsigned int)&func_end + 800 , 0);
689		if (altdst > dst)
690			dst = altdst;
691
692		/*
693		 * Disable MMU.  Otherwise, setup_pagetables call below
694		 * might overwrite the L1 table we are currently using.
695		 */
696		cpu_idcache_wbinv_all();
697		cpu_l2cache_wbinv_all();
698		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
699		  "bic %0, %0, #1\n" /* MMU_DISABLE */
700		  "mcr p15, 0, %0, c1, c0, 0\n"
701		  :"=r" (pt_addr));
702	} else
703#endif
704		dst = 4 + load_kernel((unsigned int)&kernel_start,
705	    (unsigned int)curaddr,
706	    (unsigned int)&func_end, 0);
707	dst = (void *)(((vm_offset_t)dst & ~3));
708	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
709	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
710	    (vm_paddr_t)curaddr + 0x10000000, 0);
711	sp = pt_addr + L1_TABLE_SIZE + 8192;
712	sp = sp &~3;
713	dst = (void *)(sp + 4);
714	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
715	    (unsigned int)&load_kernel + 800);
716	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
717	    (unsigned int)(&load_kernel) + 800, sp);
718}
719
720#ifdef __ARM_EABI__
721/* We need to provide these functions but never call them */
722void __aeabi_unwind_cpp_pr0(void);
723void __aeabi_unwind_cpp_pr1(void);
724void __aeabi_unwind_cpp_pr2(void);
725
726__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
727__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
728void
729__aeabi_unwind_cpp_pr0(void)
730{
731}
732#endif
733
734