elf_trampoline.c revision 248364
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25/*
26 * Since we are compiled outside of the normal kernel build process, we
27 * need to include opt_global.h manually.
28 */
29#include "opt_global.h"
30#include "opt_kernname.h"
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 248364 2013-03-16 03:50:27Z andrew $");
34#include <machine/asm.h>
35#include <sys/param.h>
36#include <sys/elf32.h>
37#include <sys/inflate.h>
38#include <machine/elf.h>
39#include <machine/pte.h>
40#include <machine/cpufunc.h>
41#include <machine/armreg.h>
42
43extern char kernel_start[];
44extern char kernel_end[];
45
46extern void *_end;
47
48void _start(void);
49void __start(void);
50void __startC(void);
51
52#define GZ_HEAD	0xa
53
54#ifdef CPU_ARM7TDMI
55#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
56#elif defined(CPU_ARM8)
57#define cpu_idcache_wbinv_all	arm8_cache_purgeID
58#elif defined(CPU_ARM9)
59#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
60#elif defined(CPU_FA526) || defined(CPU_FA626TE)
61#define cpu_idcache_wbinv_all	fa526_idcache_wbinv_all
62#elif defined(CPU_ARM9E)
63#define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
64#elif defined(CPU_ARM10)
65#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
66#elif defined(CPU_ARM1136) || defined(CPU_ARM1176)
67#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
68#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
69    defined(CPU_IXP12X0)
70#define cpu_idcache_wbinv_all	sa1_cache_purgeID
71#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
72  defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
73  defined(CPU_XSCALE_80219)
74#define cpu_idcache_wbinv_all	xscale_cache_purgeID
75#elif defined(CPU_XSCALE_81342)
76#define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
77#elif defined(CPU_MV_PJ4B)
78#if !defined(SOC_MV_ARMADAXP)
79#define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
80#else
81#define cpu_idcache_wbinv_all()	armadaxp_idcache_wbinv_all
82#endif
83#endif /* CPU_MV_PJ4B */
84#ifdef CPU_XSCALE_81342
85#define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
86#elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
87#define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
88#elif defined(CPU_CORTEXA)
89#define cpu_idcache_wbinv_all	armv7_idcache_wbinv_all
90#define cpu_l2cache_wbinv_all()
91#else
92#define cpu_l2cache_wbinv_all()
93#endif
94
95static void armadaxp_idcache_wbinv_all(void);
96
97int     arm_picache_size;
98int     arm_picache_line_size;
99int     arm_picache_ways;
100
101int     arm_pdcache_size;       /* and unified */
102int     arm_pdcache_line_size = 32;
103int     arm_pdcache_ways;
104
105int     arm_pcache_type;
106int     arm_pcache_unified;
107
108int     arm_dcache_align;
109int     arm_dcache_align_mask;
110
111u_int	arm_cache_level;
112u_int	arm_cache_type[14];
113u_int	arm_cache_loc;
114
115/* Additional cache information local to this file.  Log2 of some of the
116      above numbers.  */
117static int      arm_dcache_l2_nsets;
118static int      arm_dcache_l2_assoc;
119static int      arm_dcache_l2_linesize;
120
121
122int block_userspace_access = 0;
123extern int arm9_dcache_sets_inc;
124extern int arm9_dcache_sets_max;
125extern int arm9_dcache_index_max;
126extern int arm9_dcache_index_inc;
127
128static __inline void *
129memcpy(void *dst, const void *src, int len)
130{
131	const char *s = src;
132    	char *d = dst;
133
134	while (len) {
135		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
136		    !((vm_offset_t)s & 3)) {
137			*(uint32_t *)d = *(uint32_t *)s;
138			s += 4;
139			d += 4;
140			len -= 4;
141		} else {
142			*d++ = *s++;
143			len--;
144		}
145	}
146	return (dst);
147}
148
149static __inline void
150bzero(void *addr, int count)
151{
152	char *tmp = (char *)addr;
153
154	while (count > 0) {
155		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
156			*(uint32_t *)tmp = 0;
157			tmp += 4;
158			count -= 4;
159		} else {
160			*tmp = 0;
161			tmp++;
162			count--;
163		}
164	}
165}
166
167static void arm9_setup(void);
168
169void
170_startC(void)
171{
172	int physaddr = KERNPHYSADDR;
173	int tmp1;
174	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
175#if defined(FLASHADDR) && defined(LOADERRAMADDR)
176	unsigned int pc;
177
178	__asm __volatile("mov %0, pc\n"
179	    : "=r" (pc));
180	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
181	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
182		/*
183		 * We're running from flash, so just copy the whole thing
184		 * from flash to memory.
185		 * This is far from optimal, we could do the relocation or
186		 * the unzipping directly from flash to memory to avoid this
187		 * needless copy, but it would require to know the flash
188		 * physical address.
189		 */
190		unsigned int target_addr;
191		unsigned int tmp_sp;
192		uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
193		    + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
194
195		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
196		tmp_sp = target_addr + 0x100000 +
197		    (unsigned int)&_end - (unsigned int)&_start;
198		memcpy((char *)target_addr, (char *)src_addr,
199		    (unsigned int)&_end - (unsigned int)&_start);
200		/* Temporary set the sp and jump to the new location. */
201		__asm __volatile(
202		    "mov sp, %1\n"
203		    "mov pc, %0\n"
204		    : : "r" (target_addr), "r" (tmp_sp));
205
206	}
207#endif
208#ifdef KZIP
209	sp += KERNSIZE + 0x100;
210	sp &= ~(L1_TABLE_SIZE - 1);
211	sp += 2 * L1_TABLE_SIZE;
212#endif
213	sp += 1024 * 1024; /* Should be enough for a stack */
214
215	__asm __volatile("adr %0, 2f\n"
216	    		 "bic %0, %0, #0xff000000\n"
217			 "and %1, %1, #0xff000000\n"
218			 "orr %0, %0, %1\n"
219			 "mrc p15, 0, %1, c1, c0, 0\n"
220			 "bic %1, %1, #1\n" /* Disable MMU */
221			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
222						     WBUF enable */
223			 "orr %1, %1, #0x1000\n" /* Add IC enable */
224			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
225
226			 "mcr p15, 0, %1, c1, c0, 0\n"
227			 "nop\n"
228			 "nop\n"
229			 "nop\n"
230			 "mov pc, %0\n"
231			 "2: nop\n"
232			 "mov sp, %2\n"
233			 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
234#ifndef KZIP
235#ifdef CPU_ARM9
236	/* So that idcache_wbinv works; */
237	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
238		arm9_setup();
239#endif
240#endif
241	__start();
242}
243
244static void
245get_cachetype_cp15()
246{
247	u_int ctype, isize, dsize, cpuid;
248	u_int clevel, csize, i, sel;
249	u_int multiplier;
250	u_char type;
251
252	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
253		: "=r" (ctype));
254
255	cpuid = cpufunc_id();
256	/*
257	 * ...and thus spake the ARM ARM:
258	 *
259	 * If an <opcode2> value corresponding to an unimplemented or
260	 * reserved ID register is encountered, the System Control
261	 * processor returns the value of the main ID register.
262	 */
263	if (ctype == cpuid)
264		goto out;
265
266	if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
267		__asm __volatile("mrc p15, 1, %0, c0, c0, 1"
268		    : "=r" (clevel));
269		arm_cache_level = clevel;
270		arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
271		i = 0;
272		while ((type = (clevel & 0x7)) && i < 7) {
273			if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
274			    type == CACHE_SEP_CACHE) {
275				sel = i << 1;
276				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
277				    : : "r" (sel));
278				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
279				    : "=r" (csize));
280				arm_cache_type[sel] = csize;
281			}
282			if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
283				sel = (i << 1) | 1;
284				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
285				    : : "r" (sel));
286				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
287				    : "=r" (csize));
288				arm_cache_type[sel] = csize;
289			}
290			i++;
291			clevel >>= 3;
292		}
293	} else {
294		if ((ctype & CPU_CT_S) == 0)
295			arm_pcache_unified = 1;
296
297		/*
298		 * If you want to know how this code works, go read the ARM ARM.
299		 */
300
301		arm_pcache_type = CPU_CT_CTYPE(ctype);
302
303		if (arm_pcache_unified == 0) {
304			isize = CPU_CT_ISIZE(ctype);
305			multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
306			arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
307			if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
308				if (isize & CPU_CT_xSIZE_M)
309					arm_picache_line_size = 0; /* not present */
310				else
311					arm_picache_ways = 1;
312			} else {
313				arm_picache_ways = multiplier <<
314				    (CPU_CT_xSIZE_ASSOC(isize) - 1);
315			}
316			arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
317		}
318
319		dsize = CPU_CT_DSIZE(ctype);
320		multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
321		arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
322		if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
323			if (dsize & CPU_CT_xSIZE_M)
324				arm_pdcache_line_size = 0; /* not present */
325			else
326				arm_pdcache_ways = 1;
327		} else {
328			arm_pdcache_ways = multiplier <<
329			    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
330		}
331		arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
332
333		arm_dcache_align = arm_pdcache_line_size;
334
335		arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
336		arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
337		arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
338		    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
339
340	out:
341		arm_dcache_align_mask = arm_dcache_align - 1;
342	}
343}
344
345static void
346arm9_setup(void)
347{
348
349	get_cachetype_cp15();
350	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
351	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
352	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
353	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
354	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
355}
356
357static void
358armadaxp_idcache_wbinv_all(void)
359{
360	uint32_t feat;
361
362	__asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
363	if (feat & ARM_PFR0_THUMBEE_MASK)
364		armv7_idcache_wbinv_all();
365	else
366		armv6_idcache_wbinv_all();
367
368}
369#ifdef KZIP
370static  unsigned char *orig_input, *i_input, *i_output;
371
372
373static u_int memcnt;		/* Memory allocated: blocks */
374static size_t memtot;		/* Memory allocated: bytes */
375/*
376 * Library functions required by inflate().
377 */
378
379#define MEMSIZ 0x8000
380
381/*
382 * Allocate memory block.
383 */
384unsigned char *
385kzipmalloc(int size)
386{
387	void *ptr;
388	static u_char mem[MEMSIZ];
389
390	if (memtot + size > MEMSIZ)
391		return NULL;
392	ptr = mem + memtot;
393	memtot += size;
394	memcnt++;
395	return ptr;
396}
397
398/*
399 * Free allocated memory block.
400 */
401void
402kzipfree(void *ptr)
403{
404	memcnt--;
405	if (!memcnt)
406		memtot = 0;
407}
408
409void
410putstr(char *dummy)
411{
412}
413
414static int
415input(void *dummy)
416{
417	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
418		return (GZ_EOF);
419	}
420	return *i_input++;
421}
422
423static int
424output(void *dummy, unsigned char *ptr, unsigned long len)
425{
426
427
428	memcpy(i_output, ptr, len);
429	i_output += len;
430	return (0);
431}
432
433static void *
434inflate_kernel(void *kernel, void *startaddr)
435{
436	struct inflate infl;
437	char slide[GZ_WSIZE];
438
439	orig_input = kernel;
440	memcnt = memtot = 0;
441	i_input = (char *)kernel + GZ_HEAD;
442	if (((char *)kernel)[3] & 0x18) {
443		while (*i_input)
444			i_input++;
445		i_input++;
446	}
447	i_output = startaddr;
448	bzero(&infl, sizeof(infl));
449	infl.gz_input = input;
450	infl.gz_output = output;
451	infl.gz_slide = slide;
452	inflate(&infl);
453	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
454}
455
456#endif
457
458void *
459load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
460    int d)
461{
462	Elf32_Ehdr *eh;
463	Elf32_Phdr phdr[64] /* XXX */, *php;
464	Elf32_Shdr shdr[64] /* XXX */;
465	int i,j;
466	void *entry_point;
467	int symtabindex = -1;
468	int symstrindex = -1;
469	vm_offset_t lastaddr = 0;
470	Elf_Addr ssym = 0;
471	Elf_Dyn *dp;
472
473	eh = (Elf32_Ehdr *)kstart;
474	ssym = 0;
475	entry_point = (void*)eh->e_entry;
476	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
477	    eh->e_phnum * sizeof(phdr[0]));
478
479	/* Determine lastaddr. */
480	for (i = 0; i < eh->e_phnum; i++) {
481		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
482		    + phdr[i].p_memsz))
483			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
484			    curaddr + phdr[i].p_memsz;
485	}
486
487	/* Save the symbol tables, as there're about to be scratched. */
488	memcpy(shdr, (void *)(kstart + eh->e_shoff),
489	    sizeof(*shdr) * eh->e_shnum);
490	if (eh->e_shnum * eh->e_shentsize != 0 &&
491	    eh->e_shoff != 0) {
492		for (i = 0; i < eh->e_shnum; i++) {
493			if (shdr[i].sh_type == SHT_SYMTAB) {
494				for (j = 0; j < eh->e_phnum; j++) {
495					if (phdr[j].p_type == PT_LOAD &&
496					    shdr[i].sh_offset >=
497					    phdr[j].p_offset &&
498					    (shdr[i].sh_offset +
499					     shdr[i].sh_size <=
500					     phdr[j].p_offset +
501					     phdr[j].p_filesz)) {
502						shdr[i].sh_offset = 0;
503						shdr[i].sh_size = 0;
504						j = eh->e_phnum;
505					}
506				}
507				if (shdr[i].sh_offset != 0 &&
508				    shdr[i].sh_size != 0) {
509					symtabindex = i;
510					symstrindex = shdr[i].sh_link;
511				}
512			}
513		}
514		func_end = roundup(func_end, sizeof(long));
515		if (symtabindex >= 0 && symstrindex >= 0) {
516			ssym = lastaddr;
517			if (d) {
518				memcpy((void *)func_end, (void *)(
519				    shdr[symtabindex].sh_offset + kstart),
520				    shdr[symtabindex].sh_size);
521				memcpy((void *)(func_end +
522				    shdr[symtabindex].sh_size),
523				    (void *)(shdr[symstrindex].sh_offset +
524				    kstart), shdr[symstrindex].sh_size);
525			} else {
526				lastaddr += shdr[symtabindex].sh_size;
527				lastaddr = roundup(lastaddr,
528				    sizeof(shdr[symtabindex].sh_size));
529				lastaddr += sizeof(shdr[symstrindex].sh_size);
530				lastaddr += shdr[symstrindex].sh_size;
531				lastaddr = roundup(lastaddr,
532				    sizeof(shdr[symstrindex].sh_size));
533			}
534
535		}
536	}
537	if (!d)
538		return ((void *)lastaddr);
539
540	j = eh->e_phnum;
541	for (i = 0; i < j; i++) {
542		volatile char c;
543
544		if (phdr[i].p_type != PT_LOAD)
545			continue;
546		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
547		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
548		/* Clean space from oversized segments, eg: bss. */
549		if (phdr[i].p_filesz < phdr[i].p_memsz)
550			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
551			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
552			    phdr[i].p_filesz);
553	}
554	/* Now grab the symbol tables. */
555	if (symtabindex >= 0 && symstrindex >= 0) {
556		*(Elf_Size *)lastaddr =
557		    shdr[symtabindex].sh_size;
558		lastaddr += sizeof(shdr[symtabindex].sh_size);
559		memcpy((void*)lastaddr,
560		    (void *)func_end,
561		    shdr[symtabindex].sh_size);
562		lastaddr += shdr[symtabindex].sh_size;
563		lastaddr = roundup(lastaddr,
564		    sizeof(shdr[symtabindex].sh_size));
565		*(Elf_Size *)lastaddr =
566		    shdr[symstrindex].sh_size;
567		lastaddr += sizeof(shdr[symstrindex].sh_size);
568		memcpy((void*)lastaddr,
569		    (void*)(func_end +
570			    shdr[symtabindex].sh_size),
571		    shdr[symstrindex].sh_size);
572		lastaddr += shdr[symstrindex].sh_size;
573		lastaddr = roundup(lastaddr,
574   		    sizeof(shdr[symstrindex].sh_size));
575		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
576		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
577		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
578	} else
579		*(Elf_Addr *)curaddr = 0;
580	/* Invalidate the instruction cache. */
581	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
582	    		 "mcr p15, 0, %0, c7, c10, 4\n"
583			 : : "r" (curaddr));
584	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
585	    "bic %0, %0, #1\n" /* MMU_ENABLE */
586	    "mcr p15, 0, %0, c1, c0, 0\n"
587	    : "=r" (ssym));
588	/* Jump to the entry point. */
589	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
590	__asm __volatile(".globl func_end\n"
591	    "func_end:");
592
593}
594
595extern char func_end[];
596
597
598#define PMAP_DOMAIN_KERNEL	0 /*
599				    * Just define it instead of including the
600				    * whole VM headers set.
601				    */
602int __hack;
603static __inline void
604setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
605    int write_back)
606{
607	unsigned int *pd = (unsigned int *)pt_addr;
608	vm_paddr_t addr;
609	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
610	int tmp;
611
612	bzero(pd, L1_TABLE_SIZE);
613	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
614		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
615		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
616		if (write_back && 0)
617			pd[addr >> L1_S_SHIFT] |= L1_S_B;
618	}
619	/* XXX: See below */
620	if (0xfff00000 < physstart || 0xfff00000 > physend)
621		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
622		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
623	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
624	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
625			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
626			 "mrc p15, 0, %0, c1, c0, 0\n"
627			 "orr %0, %0, #1\n" /* MMU_ENABLE */
628			 "mcr p15, 0, %0, c1, c0, 0\n"
629			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
630			 "mov r0, r0\n"
631			 "sub pc, pc, #4\n" :
632			 "=r" (tmp) : "r" (pd), "r" (domain));
633
634	/*
635	 * XXX: This is the most stupid workaround I've ever wrote.
636	 * For some reason, the KB9202 won't boot the kernel unless
637	 * we access an address which is not in the
638	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
639	 * what's going on later.
640	 */
641	__hack = *(volatile int *)0xfffff21c;
642}
643
644void
645__start(void)
646{
647	void *curaddr;
648	void *dst, *altdst;
649	char *kernel = (char *)&kernel_start;
650	int sp;
651	int pt_addr;
652
653	__asm __volatile("mov %0, pc"  :
654	    "=r" (curaddr));
655	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
656#ifdef KZIP
657	if (*kernel == 0x1f && kernel[1] == 0x8b) {
658		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
659		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
660
661#ifdef CPU_ARM9
662		/* So that idcache_wbinv works; */
663		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
664			arm9_setup();
665#endif
666		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
667		    (vm_paddr_t)curaddr + 0x10000000, 1);
668		/* Gzipped kernel */
669		dst = inflate_kernel(kernel, &_end);
670		kernel = (char *)&_end;
671		altdst = 4 + load_kernel((unsigned int)kernel,
672		    (unsigned int)curaddr,
673		    (unsigned int)&func_end + 800 , 0);
674		if (altdst > dst)
675			dst = altdst;
676
677		/*
678		 * Disable MMU.  Otherwise, setup_pagetables call below
679		 * might overwrite the L1 table we are currently using.
680		 */
681		cpu_idcache_wbinv_all();
682		cpu_l2cache_wbinv_all();
683		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
684		  "bic %0, %0, #1\n" /* MMU_DISABLE */
685		  "mcr p15, 0, %0, c1, c0, 0\n"
686		  :"=r" (pt_addr));
687	} else
688#endif
689		dst = 4 + load_kernel((unsigned int)&kernel_start,
690	    (unsigned int)curaddr,
691	    (unsigned int)&func_end, 0);
692	dst = (void *)(((vm_offset_t)dst & ~3));
693	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
694	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
695	    (vm_paddr_t)curaddr + 0x10000000, 0);
696	sp = pt_addr + L1_TABLE_SIZE + 8192;
697	sp = sp &~3;
698	dst = (void *)(sp + 4);
699	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
700	    (unsigned int)&load_kernel + 800);
701	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
702	    (unsigned int)(&load_kernel) + 800, sp);
703}
704
705#ifdef __ARM_EABI__
706/* We need to provide these functions but never call them */
707void __aeabi_unwind_cpp_pr0(void);
708void __aeabi_unwind_cpp_pr1(void);
709void __aeabi_unwind_cpp_pr2(void);
710
711__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
712__strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
713void
714__aeabi_unwind_cpp_pr0(void)
715{
716}
717#endif
718
719