elf_trampoline.c revision 171619
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 171619 2007-07-27 14:42:25Z cognet $");
27#include <machine/asm.h>
28#include <sys/param.h>
29#include <sys/elf32.h>
30#include <sys/inflate.h>
31#include <machine/elf.h>
32#include <machine/pte.h>
33#include <machine/cpufunc.h>
34#include <machine/armreg.h>
35
36#include <stdlib.h>
37
38/*
39 * Since we are compiled outside of the normal kernel build process, we
40 * need to include opt_global.h manually.
41 */
42#include "opt_global.h"
43#include "opt_kernname.h"
44
45extern char kernel_start[];
46extern char kernel_end[];
47
48extern void *_end;
49
50void __start(void);
51void __startC(void);
52
53#define GZ_HEAD	0xa
54
55#ifdef CPU_ARM7TDMI
56#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
57#elif defined(CPU_ARM8)
58#define cpu_idcache_wbinv_all	arm8_cache_purgeID
59#elif defined(CPU_ARM9)
60#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
61#elif defined(CPU_ARM10)
62#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
63#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
64    defined(CPU_IXP12X0)
65#define cpu_idcache_wbinv_all	sa1_cache_purgeID
66#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
67  defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
68  defined(CPU_XSCALE_80219)
69#define cpu_idcache_wbinv_all	xscale_cache_purgeID
70#elif defined(CPU_XSCALE_81342)
71#define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
72#endif
73#ifdef CPU_XSCALE_81342
74#define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
75#else
76#define cpu_l2cache_wbinv_all()
77#endif
78
79
80int     arm_picache_size;
81int     arm_picache_line_size;
82int     arm_picache_ways;
83
84int     arm_pdcache_size;       /* and unified */
85int     arm_pdcache_line_size = 32;
86int     arm_pdcache_ways;
87
88int     arm_pcache_type;
89int     arm_pcache_unified;
90
91int     arm_dcache_align;
92int     arm_dcache_align_mask;
93
94/* Additional cache information local to this file.  Log2 of some of the
95      above numbers.  */
96static int      arm_dcache_l2_nsets;
97static int      arm_dcache_l2_assoc;
98static int      arm_dcache_l2_linesize;
99
100
101int block_userspace_access = 0;
102extern int arm9_dcache_sets_inc;
103extern int arm9_dcache_sets_max;
104extern int arm9_dcache_index_max;
105extern int arm9_dcache_index_inc;
106
107static __inline void *
108memcpy(void *dst, const void *src, int len)
109{
110	const char *s = src;
111    	char *d = dst;
112
113	while (len) {
114		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
115		    !((vm_offset_t)s & 3)) {
116			*(uint32_t *)d = *(uint32_t *)s;
117			s += 4;
118			d += 4;
119			len -= 4;
120		} else {
121			*d++ = *s++;
122			len--;
123		}
124	}
125	return (dst);
126}
127
128static __inline void
129bzero(void *addr, int count)
130{
131	char *tmp = (char *)addr;
132
133	while (count > 0) {
134		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
135			*(uint32_t *)tmp = 0;
136			tmp += 4;
137			count -= 4;
138		} else {
139			*tmp = 0;
140			tmp++;
141			count--;
142		}
143	}
144}
145
146static void arm9_setup(void);
147
148void
149_startC(void)
150{
151	int physaddr = KERNPHYSADDR;
152	int tmp1;
153	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
154#if defined(FLASHADDR) && defined(LOADERRAMADDR)
155	unsigned int pc;
156
157	__asm __volatile("adr %0, _start\n"
158	    : "=r" (pc));
159	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
160	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
161		/*
162		 * We're running from flash, so just copy the whole thing
163		 * from flash to memory.
164		 * This is far from optimal, we could do the relocation or
165		 * the unzipping directly from flash to memory to avoid this
166		 * needless copy, but it would require to know the flash
167		 * physical address.
168		 */
169		unsigned int target_addr;
170		unsigned int tmp_sp;
171
172		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
173		tmp_sp = target_addr + 0x100000 +
174		    (unsigned int)&_end - (unsigned int)&_start;
175		memcpy((char *)target_addr, (char *)pc,
176		    (unsigned int)&_end - (unsigned int)&_start);
177		/* Temporary set the sp and jump to the new location. */
178		__asm __volatile(
179		    "mov sp, %1\n"
180		    "mov pc, %0\n"
181		    : : "r" (target_addr), "r" (tmp_sp));
182
183	}
184#endif
185#ifdef KZIP
186	sp += KERNSIZE + 0x100;
187	sp &= ~(L1_TABLE_SIZE - 1);
188	sp += 2 * L1_TABLE_SIZE;
189#endif
190	sp += 1024 * 1024; /* Should be enough for a stack */
191
192	__asm __volatile("adr %0, 2f\n"
193	    		 "bic %0, %0, #0xff000000\n"
194			 "and %1, %1, #0xff000000\n"
195			 "orr %0, %0, %1\n"
196			 "mrc p15, 0, %1, c1, c0, 0\n"
197			 "bic %1, %1, #1\n" /* Disable MMU */
198			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
199						     WBUF enable */
200			 "orr %1, %1, #0x1000\n" /* Add IC enable */
201			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
202
203			 "mcr p15, 0, %1, c1, c0, 0\n"
204			 "nop\n"
205			 "nop\n"
206			 "nop\n"
207			 "mov pc, %0\n"
208			 "2: nop\n"
209			 "mov sp, %2\n"
210			 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
211#ifndef KZIP
212#ifdef CPU_ARM9
213	/* So that idcache_wbinv works; */
214	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
215		arm9_setup();
216#endif
217	cpu_idcache_wbinv_all();
218	cpu_l2cache_wbinv_all();
219#endif
220	__start();
221}
222
223static void
224get_cachetype_cp15()
225{
226	u_int ctype, isize, dsize;
227	u_int multiplier;
228
229	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
230	    : "=r" (ctype));
231
232	/*
233	 * ...and thus spake the ARM ARM:
234	 *
235 	 * If an <opcode2> value corresponding to an unimplemented or
236	 * reserved ID register is encountered, the System Control
237	 * processor returns the value of the main ID register.
238	 */
239	if (ctype == cpufunc_id())
240		goto out;
241
242	if ((ctype & CPU_CT_S) == 0)
243		arm_pcache_unified = 1;
244
245	/*
246	 * If you want to know how this code works, go read the ARM ARM.
247	 */
248
249	arm_pcache_type = CPU_CT_CTYPE(ctype);
250        if (arm_pcache_unified == 0) {
251		isize = CPU_CT_ISIZE(ctype);
252	    	multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
253		arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
254		if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
255			if (isize & CPU_CT_xSIZE_M)
256				arm_picache_line_size = 0; /* not present */
257			else
258				arm_picache_ways = 1;
259		} else {
260			arm_picache_ways = multiplier <<
261			    (CPU_CT_xSIZE_ASSOC(isize) - 1);
262		}
263		arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
264	}
265
266	dsize = CPU_CT_DSIZE(ctype);
267	multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
268	arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
269	if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
270		if (dsize & CPU_CT_xSIZE_M)
271			arm_pdcache_line_size = 0; /* not present */
272		else
273			arm_pdcache_ways = 1;
274	} else {
275		arm_pdcache_ways = multiplier <<
276		    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
277	}
278	arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
279
280	arm_dcache_align = arm_pdcache_line_size;
281
282	arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
283	arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
284	arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
285	    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
286 out:
287	arm_dcache_align_mask = arm_dcache_align - 1;
288}
289
290static void
291arm9_setup(void)
292{
293
294	get_cachetype_cp15();
295	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
296	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
297	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
298	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
299	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
300}
301
302
303#ifdef KZIP
304static  unsigned char *orig_input, *i_input, *i_output;
305
306
307static u_int memcnt;		/* Memory allocated: blocks */
308static size_t memtot;		/* Memory allocated: bytes */
309/*
310 * Library functions required by inflate().
311 */
312
313#define MEMSIZ 0x8000
314
315/*
316 * Allocate memory block.
317 */
318unsigned char *
319kzipmalloc(int size)
320{
321	void *ptr;
322	static u_char mem[MEMSIZ];
323
324	if (memtot + size > MEMSIZ)
325		return NULL;
326	ptr = mem + memtot;
327	memtot += size;
328	memcnt++;
329	return ptr;
330}
331
332/*
333 * Free allocated memory block.
334 */
335void
336kzipfree(void *ptr)
337{
338	memcnt--;
339	if (!memcnt)
340		memtot = 0;
341}
342
343void
344putstr(char *dummy)
345{
346}
347
348static int
349input(void *dummy)
350{
351	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
352		return (GZ_EOF);
353	}
354	return *i_input++;
355}
356
357static int
358output(void *dummy, unsigned char *ptr, unsigned long len)
359{
360
361
362	memcpy(i_output, ptr, len);
363	i_output += len;
364	return (0);
365}
366
367static void *
368inflate_kernel(void *kernel, void *startaddr)
369{
370	struct inflate infl;
371	char slide[GZ_WSIZE];
372
373	orig_input = kernel;
374	memcnt = memtot = 0;
375	i_input = (char *)kernel + GZ_HEAD;
376	if (((char *)kernel)[3] & 0x18) {
377		while (*i_input)
378			i_input++;
379		i_input++;
380	}
381	i_output = startaddr;
382	bzero(&infl, sizeof(infl));
383	infl.gz_input = input;
384	infl.gz_output = output;
385	infl.gz_slide = slide;
386	inflate(&infl);
387	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
388}
389
390#endif
391
392void *
393load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
394    int d)
395{
396	Elf32_Ehdr *eh;
397	Elf32_Phdr phdr[64] /* XXX */, *php;
398	Elf32_Shdr shdr[64] /* XXX */;
399	int i,j;
400	void *entry_point;
401	int symtabindex = -1;
402	int symstrindex = -1;
403	vm_offset_t lastaddr = 0;
404	Elf_Addr ssym = 0, esym = 0;
405	Elf_Dyn *dp;
406
407	eh = (Elf32_Ehdr *)kstart;
408	ssym = esym = 0;
409	entry_point = (void*)eh->e_entry;
410	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
411	    eh->e_phnum * sizeof(phdr[0]));
412
413	/* Determine lastaddr. */
414	for (i = 0; i < eh->e_phnum; i++) {
415		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
416		    + phdr[i].p_memsz))
417			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
418			    curaddr + phdr[i].p_memsz;
419	}
420
421	/* Save the symbol tables, as there're about to be scratched. */
422	memcpy(shdr, (void *)(kstart + eh->e_shoff),
423	    sizeof(*shdr) * eh->e_shnum);
424	if (eh->e_shnum * eh->e_shentsize != 0 &&
425	    eh->e_shoff != 0) {
426		for (i = 0; i < eh->e_shnum; i++) {
427			if (shdr[i].sh_type == SHT_SYMTAB) {
428				for (j = 0; j < eh->e_phnum; j++) {
429					if (phdr[j].p_type == PT_LOAD &&
430					    shdr[i].sh_offset >=
431					    phdr[j].p_offset &&
432					    (shdr[i].sh_offset +
433					     shdr[i].sh_size <=
434					     phdr[j].p_offset +
435					     phdr[j].p_filesz)) {
436						shdr[i].sh_offset = 0;
437						shdr[i].sh_size = 0;
438						j = eh->e_phnum;
439					}
440				}
441				if (shdr[i].sh_offset != 0 &&
442				    shdr[i].sh_size != 0) {
443					symtabindex = i;
444					symstrindex = shdr[i].sh_link;
445				}
446			}
447		}
448		func_end = roundup(func_end, sizeof(long));
449		if (symtabindex >= 0 && symstrindex >= 0) {
450			ssym = lastaddr;
451			if (d) {
452				memcpy((void *)func_end, (void *)(
453				    shdr[symtabindex].sh_offset + kstart),
454				    shdr[symtabindex].sh_size);
455				memcpy((void *)(func_end +
456				    shdr[symtabindex].sh_size),
457				    (void *)(shdr[symstrindex].sh_offset +
458				    kstart), shdr[symstrindex].sh_size);
459			} else {
460				lastaddr += shdr[symtabindex].sh_size;
461				lastaddr = roundup(lastaddr,
462				    sizeof(shdr[symtabindex].sh_size));
463				lastaddr += sizeof(shdr[symstrindex].sh_size);
464				lastaddr += shdr[symstrindex].sh_size;
465				lastaddr = roundup(lastaddr,
466				    sizeof(shdr[symstrindex].sh_size));
467			}
468
469		}
470	}
471	if (!d)
472		return ((void *)lastaddr);
473
474	j = eh->e_phnum;
475	for (i = 0; i < j; i++) {
476		volatile char c;
477
478		if (phdr[i].p_type != PT_LOAD)
479			continue;
480		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
481		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
482		/* Clean space from oversized segments, eg: bss. */
483		if (phdr[i].p_filesz < phdr[i].p_memsz)
484			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
485			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
486			    phdr[i].p_filesz);
487	}
488	/* Now grab the symbol tables. */
489	if (symtabindex >= 0 && symstrindex >= 0) {
490		*(Elf_Size *)lastaddr =
491		    shdr[symtabindex].sh_size;
492		lastaddr += sizeof(shdr[symtabindex].sh_size);
493		memcpy((void*)lastaddr,
494		    (void *)func_end,
495		    shdr[symtabindex].sh_size);
496		lastaddr += shdr[symtabindex].sh_size;
497		lastaddr = roundup(lastaddr,
498		    sizeof(shdr[symtabindex].sh_size));
499		*(Elf_Size *)lastaddr =
500		    shdr[symstrindex].sh_size;
501		lastaddr += sizeof(shdr[symstrindex].sh_size);
502		memcpy((void*)lastaddr,
503		    (void*)(func_end +
504			    shdr[symtabindex].sh_size),
505		    shdr[symstrindex].sh_size);
506		lastaddr += shdr[symstrindex].sh_size;
507		lastaddr = roundup(lastaddr,
508   		    sizeof(shdr[symstrindex].sh_size));
509		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
510		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
511		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
512	} else
513		*(Elf_Addr *)curaddr = 0;
514	/* Invalidate the instruction cache. */
515	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
516	    		 "mcr p15, 0, %0, c7, c10, 4\n"
517			 : : "r" (curaddr));
518	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
519	    "bic %0, %0, #1\n" /* MMU_ENABLE */
520	    "mcr p15, 0, %0, c1, c0, 0\n"
521	    : "=r" (ssym));
522	/* Jump to the entry point. */
523	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
524	__asm __volatile(".globl func_end\n"
525	    "func_end:");
526
527}
528
529extern char func_end[];
530
531
532#define PMAP_DOMAIN_KERNEL	0 /*
533				    * Just define it instead of including the
534				    * whole VM headers set.
535				    */
536int __hack;
537static __inline void
538setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
539    int write_back)
540{
541	unsigned int *pd = (unsigned int *)pt_addr;
542	vm_paddr_t addr;
543	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
544	int tmp;
545
546	bzero(pd, L1_TABLE_SIZE);
547	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
548		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
549		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
550		if (write_back)
551			pd[addr >> L1_S_SHIFT] |= L1_S_B;
552	}
553	/* XXX: See below */
554	if (0xfff00000 < physstart || 0xfff00000 > physend)
555		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
556		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
557	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
558	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
559			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
560			 "mrc p15, 0, %0, c1, c0, 0\n"
561			 "orr %0, %0, #1\n" /* MMU_ENABLE */
562			 "mcr p15, 0, %0, c1, c0, 0\n"
563			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
564			 "mov r0, r0\n"
565			 "sub pc, pc, #4\n" :
566			 "=r" (tmp) : "r" (pd), "r" (domain));
567
568	/*
569	 * XXX: This is the most stupid workaround I've ever wrote.
570	 * For some reason, the KB9202 won't boot the kernel unless
571	 * we access an address which is not in the
572	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
573	 * what's going on later.
574	 */
575	__hack = *(volatile int *)0xfffff21c;
576}
577
578void
579__start(void)
580{
581	void *curaddr;
582	void *dst, *altdst;
583	char *kernel = (char *)&kernel_start;
584	int sp;
585	int pt_addr;
586
587	__asm __volatile("mov %0, pc"  :
588	    "=r" (curaddr));
589	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
590#ifdef KZIP
591	if (*kernel == 0x1f && kernel[1] == 0x8b) {
592		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
593		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
594
595#ifdef CPU_ARM9
596		/* So that idcache_wbinv works; */
597		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
598			arm9_setup();
599#endif
600		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
601		    (vm_paddr_t)curaddr + 0x10000000, 1);
602		/* Gzipped kernel */
603		dst = inflate_kernel(kernel, &_end);
604		kernel = (char *)&_end;
605		altdst = 4 + load_kernel((unsigned int)kernel,
606		    (unsigned int)curaddr,
607		    (unsigned int)&func_end + 800 , 0);
608		if (altdst > dst)
609			dst = altdst;
610		cpu_idcache_wbinv_all();
611		cpu_l2cache_wbinv_all();
612		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
613		    "bic %0, %0, #1\n" /* MMU_ENABLE */
614		    "mcr p15, 0, %0, c1, c0, 0\n"
615		    : "=r" (pt_addr));
616	} else
617#endif
618		dst = 4 + load_kernel((unsigned int)&kernel_start,
619	    (unsigned int)curaddr,
620	    (unsigned int)&func_end, 0);
621	dst = (void *)(((vm_offset_t)dst & ~3));
622	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
623	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
624	    (vm_paddr_t)curaddr + 0x10000000, 0);
625	sp = pt_addr + L1_TABLE_SIZE + 8192;
626	sp = sp &~3;
627	dst = (void *)(sp + 4);
628	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
629	    (unsigned int)&load_kernel + 800);
630	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
631	    (unsigned int)(&load_kernel) + 800, sp);
632}
633