elf_trampoline.c revision 194609
1150861Scognet/*-
2150861Scognet * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3150861Scognet *
4150861Scognet * Redistribution and use in source and binary forms, with or without
5150861Scognet * modification, are permitted provided that the following conditions
6150861Scognet * are met:
7150861Scognet * 1. Redistributions of source code must retain the above copyright
8150861Scognet *    notice, this list of conditions and the following disclaimer.
9150861Scognet * 2. Redistributions in binary form must reproduce the above copyright
10150861Scognet *    notice, this list of conditions and the following disclaimer in the
11150861Scognet *    documentation and/or other materials provided with the distribution.
12150861Scognet *
13150861Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14150861Scognet * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15150861Scognet * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16150861Scognet * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17150861Scognet * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18150861Scognet * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19150861Scognet * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20150861Scognet * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21150861Scognet * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22150861Scognet * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23150861Scognet */
24150861Scognet
25150861Scognet#include <sys/cdefs.h>
26150861Scognet__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 194609 2009-06-21 21:38:12Z cognet $");
27150861Scognet#include <machine/asm.h>
28163871Scognet#include <sys/param.h>
29150861Scognet#include <sys/elf32.h>
30153112Scognet#include <sys/inflate.h>
31150861Scognet#include <machine/elf.h>
32154561Scognet#include <machine/pte.h>
33159084Scognet#include <machine/cpufunc.h>
34159557Scognet#include <machine/armreg.h>
35154561Scognet
36161334Simp/*
37161334Simp * Since we are compiled outside of the normal kernel build process, we
38161334Simp * need to include opt_global.h manually.
39161334Simp */
40150861Scognet#include "opt_global.h"
41153112Scognet#include "opt_kernname.h"
42150861Scognet
43150861Scognetextern char kernel_start[];
44150861Scognetextern char kernel_end[];
45150861Scognet
46159557Scognetextern void *_end;
47159557Scognet
48177508Scognetvoid _start(void);
49152743Scognetvoid __start(void);
50171619Scognetvoid __startC(void);
51152743Scognet
52153112Scognet#define GZ_HEAD	0xa
53152743Scognet
54159084Scognet#ifdef CPU_ARM7TDMI
55159084Scognet#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
56159084Scognet#elif defined(CPU_ARM8)
57159084Scognet#define cpu_idcache_wbinv_all	arm8_cache_purgeID
58159084Scognet#elif defined(CPU_ARM9)
59159557Scognet#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
60177916Sraj#elif defined(CPU_ARM9E)
61177916Sraj#define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
62159084Scognet#elif defined(CPU_ARM10)
63159084Scognet#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
64159084Scognet#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
65159084Scognet    defined(CPU_IXP12X0)
66159084Scognet#define cpu_idcache_wbinv_all	sa1_cache_purgeID
67159084Scognet#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
68161592Scognet  defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
69161592Scognet  defined(CPU_XSCALE_80219)
70159084Scognet#define cpu_idcache_wbinv_all	xscale_cache_purgeID
71171619Scognet#elif defined(CPU_XSCALE_81342)
72171619Scognet#define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
73159084Scognet#endif
74171619Scognet#ifdef CPU_XSCALE_81342
75171619Scognet#define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
76183840Sraj#elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
77186933Sraj#define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
78171619Scognet#else
79171619Scognet#define cpu_l2cache_wbinv_all()
80171619Scognet#endif
81159557Scognet
82159758Scognet
83159557Scognetint     arm_picache_size;
84159557Scognetint     arm_picache_line_size;
85159557Scognetint     arm_picache_ways;
86159557Scognet
87159557Scognetint     arm_pdcache_size;       /* and unified */
88159557Scognetint     arm_pdcache_line_size = 32;
89159557Scognetint     arm_pdcache_ways;
90159557Scognet
91159557Scognetint     arm_pcache_type;
92159557Scognetint     arm_pcache_unified;
93159557Scognet
94159557Scognetint     arm_dcache_align;
95159557Scognetint     arm_dcache_align_mask;
96159557Scognet
97159557Scognet/* Additional cache information local to this file.  Log2 of some of the
98159557Scognet      above numbers.  */
99159557Scognetstatic int      arm_dcache_l2_nsets;
100159557Scognetstatic int      arm_dcache_l2_assoc;
101159557Scognetstatic int      arm_dcache_l2_linesize;
102159557Scognet
103159557Scognet
104159084Scognetint block_userspace_access = 0;
105159557Scognetextern int arm9_dcache_sets_inc;
106159557Scognetextern int arm9_dcache_sets_max;
107159557Scognetextern int arm9_dcache_index_max;
108159557Scognetextern int arm9_dcache_index_inc;
109159084Scognet
110150861Scognetstatic __inline void *
111150861Scognetmemcpy(void *dst, const void *src, int len)
112150861Scognet{
113150861Scognet	const char *s = src;
114150861Scognet    	char *d = dst;
115153549Scognet
116153112Scognet	while (len) {
117166819Scognet		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
118153549Scognet		    !((vm_offset_t)s & 3)) {
119153112Scognet			*(uint32_t *)d = *(uint32_t *)s;
120153112Scognet			s += 4;
121153112Scognet			d += 4;
122153112Scognet			len -= 4;
123153112Scognet		} else {
124153112Scognet			*d++ = *s++;
125153112Scognet			len--;
126153112Scognet		}
127150861Scognet	}
128150861Scognet	return (dst);
129150861Scognet}
130150861Scognet
131150861Scognetstatic __inline void
132153112Scognetbzero(void *addr, int count)
133150861Scognet{
134153112Scognet	char *tmp = (char *)addr;
135153112Scognet
136150861Scognet	while (count > 0) {
137153112Scognet		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
138153112Scognet			*(uint32_t *)tmp = 0;
139153112Scognet			tmp += 4;
140153112Scognet			count -= 4;
141153112Scognet		} else {
142153112Scognet			*tmp = 0;
143153112Scognet			tmp++;
144153112Scognet			count--;
145153112Scognet		}
146150861Scognet	}
147150861Scognet}
148150861Scognet
149159900Scognetstatic void arm9_setup(void);
150159900Scognet
151153112Scognetvoid
152171619Scognet_startC(void)
153153112Scognet{
154153549Scognet	int physaddr = KERNPHYSADDR;
155153549Scognet	int tmp1;
156159758Scognet	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
157166819Scognet#if defined(FLASHADDR) && defined(LOADERRAMADDR)
158166819Scognet	unsigned int pc;
159166819Scognet
160166819Scognet	__asm __volatile("adr %0, _start\n"
161166819Scognet	    : "=r" (pc));
162166819Scognet	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
163166819Scognet	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
164166819Scognet		/*
165166819Scognet		 * We're running from flash, so just copy the whole thing
166166819Scognet		 * from flash to memory.
167166819Scognet		 * This is far from optimal, we could do the relocation or
168166819Scognet		 * the unzipping directly from flash to memory to avoid this
169166819Scognet		 * needless copy, but it would require to know the flash
170166819Scognet		 * physical address.
171166819Scognet		 */
172166819Scognet		unsigned int target_addr;
173166819Scognet		unsigned int tmp_sp;
174166819Scognet
175166819Scognet		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
176166819Scognet		tmp_sp = target_addr + 0x100000 +
177166819Scognet		    (unsigned int)&_end - (unsigned int)&_start;
178166819Scognet		memcpy((char *)target_addr, (char *)pc,
179166819Scognet		    (unsigned int)&_end - (unsigned int)&_start);
180166819Scognet		/* Temporary set the sp and jump to the new location. */
181166819Scognet		__asm __volatile(
182166819Scognet		    "mov sp, %1\n"
183166819Scognet		    "mov pc, %0\n"
184166819Scognet		    : : "r" (target_addr), "r" (tmp_sp));
185166819Scognet
186166819Scognet	}
187166819Scognet#endif
188159557Scognet#ifdef KZIP
189159557Scognet	sp += KERNSIZE + 0x100;
190159557Scognet	sp &= ~(L1_TABLE_SIZE - 1);
191159557Scognet	sp += 2 * L1_TABLE_SIZE;
192159557Scognet#endif
193159557Scognet	sp += 1024 * 1024; /* Should be enough for a stack */
194153549Scognet
195153549Scognet	__asm __volatile("adr %0, 2f\n"
196153549Scognet	    		 "bic %0, %0, #0xff000000\n"
197153549Scognet			 "and %1, %1, #0xff000000\n"
198153549Scognet			 "orr %0, %0, %1\n"
199153549Scognet			 "mrc p15, 0, %1, c1, c0, 0\n"
200153549Scognet			 "bic %1, %1, #1\n" /* Disable MMU */
201153549Scognet			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
202153549Scognet						     WBUF enable */
203153549Scognet			 "orr %1, %1, #0x1000\n" /* Add IC enable */
204153549Scognet			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
205153549Scognet
206153549Scognet			 "mcr p15, 0, %1, c1, c0, 0\n"
207153549Scognet			 "nop\n"
208153549Scognet			 "nop\n"
209153549Scognet			 "nop\n"
210153549Scognet			 "mov pc, %0\n"
211153549Scognet			 "2: nop\n"
212159557Scognet			 "mov sp, %2\n"
213159557Scognet			 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
214159758Scognet#ifndef KZIP
215159758Scognet#ifdef CPU_ARM9
216159758Scognet	/* So that idcache_wbinv works; */
217159758Scognet	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
218159758Scognet		arm9_setup();
219159758Scognet#endif
220159758Scognet	cpu_idcache_wbinv_all();
221171619Scognet	cpu_l2cache_wbinv_all();
222159758Scognet#endif
223153112Scognet	__start();
224153112Scognet}
225153112Scognet
226159557Scognetstatic void
227159557Scognetget_cachetype_cp15()
228159557Scognet{
229159557Scognet	u_int ctype, isize, dsize;
230159557Scognet	u_int multiplier;
231159557Scognet
232159557Scognet	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
233159557Scognet	    : "=r" (ctype));
234159557Scognet
235159557Scognet	/*
236159557Scognet	 * ...and thus spake the ARM ARM:
237159557Scognet	 *
238159557Scognet 	 * If an <opcode2> value corresponding to an unimplemented or
239159557Scognet	 * reserved ID register is encountered, the System Control
240159557Scognet	 * processor returns the value of the main ID register.
241159557Scognet	 */
242159557Scognet	if (ctype == cpufunc_id())
243159557Scognet		goto out;
244159557Scognet
245159557Scognet	if ((ctype & CPU_CT_S) == 0)
246159557Scognet		arm_pcache_unified = 1;
247159557Scognet
248159557Scognet	/*
249159557Scognet	 * If you want to know how this code works, go read the ARM ARM.
250159557Scognet	 */
251159557Scognet
252159557Scognet	arm_pcache_type = CPU_CT_CTYPE(ctype);
253159557Scognet        if (arm_pcache_unified == 0) {
254159557Scognet		isize = CPU_CT_ISIZE(ctype);
255159557Scognet	    	multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
256159557Scognet		arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
257159557Scognet		if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
258159557Scognet			if (isize & CPU_CT_xSIZE_M)
259159557Scognet				arm_picache_line_size = 0; /* not present */
260159557Scognet			else
261159557Scognet				arm_picache_ways = 1;
262159557Scognet		} else {
263159557Scognet			arm_picache_ways = multiplier <<
264159557Scognet			    (CPU_CT_xSIZE_ASSOC(isize) - 1);
265159557Scognet		}
266159557Scognet		arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
267159557Scognet	}
268159557Scognet
269159557Scognet	dsize = CPU_CT_DSIZE(ctype);
270159557Scognet	multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
271159557Scognet	arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
272159557Scognet	if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
273159557Scognet		if (dsize & CPU_CT_xSIZE_M)
274159557Scognet			arm_pdcache_line_size = 0; /* not present */
275159557Scognet		else
276159557Scognet			arm_pdcache_ways = 1;
277159557Scognet	} else {
278159557Scognet		arm_pdcache_ways = multiplier <<
279159557Scognet		    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
280159557Scognet	}
281159557Scognet	arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
282159557Scognet
283159557Scognet	arm_dcache_align = arm_pdcache_line_size;
284159557Scognet
285159557Scognet	arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
286159557Scognet	arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
287159557Scognet	arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
288159557Scognet	    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
289159557Scognet out:
290159557Scognet	arm_dcache_align_mask = arm_dcache_align - 1;
291159557Scognet}
292159557Scognet
293159557Scognetstatic void
294159557Scognetarm9_setup(void)
295159557Scognet{
296159557Scognet
297159557Scognet	get_cachetype_cp15();
298159557Scognet	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
299159557Scognet	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
300159557Scognet	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
301159557Scognet	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
302159557Scognet	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
303159557Scognet}
304159557Scognet
305159557Scognet
306159868Scognet#ifdef KZIP
307153112Scognetstatic  unsigned char *orig_input, *i_input, *i_output;
308153112Scognet
309153112Scognet
310153112Scognetstatic u_int memcnt;		/* Memory allocated: blocks */
311153112Scognetstatic size_t memtot;		/* Memory allocated: bytes */
312153112Scognet/*
313153112Scognet * Library functions required by inflate().
314153112Scognet */
315153112Scognet
316153112Scognet#define MEMSIZ 0x8000
317153112Scognet
318153112Scognet/*
319153112Scognet * Allocate memory block.
320153112Scognet */
321153112Scognetunsigned char *
322153112Scognetkzipmalloc(int size)
323153112Scognet{
324153112Scognet	void *ptr;
325153112Scognet	static u_char mem[MEMSIZ];
326153112Scognet
327153112Scognet	if (memtot + size > MEMSIZ)
328153112Scognet		return NULL;
329153112Scognet	ptr = mem + memtot;
330153112Scognet	memtot += size;
331153112Scognet	memcnt++;
332153112Scognet	return ptr;
333153112Scognet}
334153112Scognet
335153112Scognet/*
336153112Scognet * Free allocated memory block.
337153112Scognet */
338153112Scognetvoid
339153112Scognetkzipfree(void *ptr)
340153112Scognet{
341153112Scognet	memcnt--;
342153112Scognet	if (!memcnt)
343153112Scognet		memtot = 0;
344153112Scognet}
345153112Scognet
346153112Scognetvoid
347153112Scognetputstr(char *dummy)
348153112Scognet{
349153112Scognet}
350153112Scognet
351153112Scognetstatic int
352153112Scognetinput(void *dummy)
353153112Scognet{
354159084Scognet	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
355153112Scognet		return (GZ_EOF);
356153112Scognet	}
357153112Scognet	return *i_input++;
358153112Scognet}
359153112Scognet
360153112Scognetstatic int
361153112Scognetoutput(void *dummy, unsigned char *ptr, unsigned long len)
362153112Scognet{
363153112Scognet
364159758Scognet
365153112Scognet	memcpy(i_output, ptr, len);
366153112Scognet	i_output += len;
367153112Scognet	return (0);
368153112Scognet}
369153112Scognet
370153112Scognetstatic void *
371153112Scognetinflate_kernel(void *kernel, void *startaddr)
372153112Scognet{
373153112Scognet	struct inflate infl;
374153112Scognet	char slide[GZ_WSIZE];
375153112Scognet
376153112Scognet	orig_input = kernel;
377153549Scognet	memcnt = memtot = 0;
378153112Scognet	i_input = (char *)kernel + GZ_HEAD;
379153112Scognet	if (((char *)kernel)[3] & 0x18) {
380153112Scognet		while (*i_input)
381153112Scognet			i_input++;
382153112Scognet		i_input++;
383153112Scognet	}
384153112Scognet	i_output = startaddr;
385153112Scognet	bzero(&infl, sizeof(infl));
386153112Scognet	infl.gz_input = input;
387153112Scognet	infl.gz_output = output;
388153112Scognet	infl.gz_slide = slide;
389153112Scognet	inflate(&infl);
390153112Scognet	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
391153112Scognet}
392153112Scognet
393153112Scognet#endif
394153112Scognet
395150861Scognetvoid *
396153112Scognetload_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
397153112Scognet    int d)
398150861Scognet{
399150861Scognet	Elf32_Ehdr *eh;
400159901Scognet	Elf32_Phdr phdr[64] /* XXX */, *php;
401159901Scognet	Elf32_Shdr shdr[64] /* XXX */;
402150861Scognet	int i,j;
403150861Scognet	void *entry_point;
404153112Scognet	int symtabindex = -1;
405153112Scognet	int symstrindex = -1;
406150861Scognet	vm_offset_t lastaddr = 0;
407188019Scognet	Elf_Addr ssym = 0;
408150861Scognet	Elf_Dyn *dp;
409150861Scognet
410150861Scognet	eh = (Elf32_Ehdr *)kstart;
411188019Scognet	ssym = 0;
412150861Scognet	entry_point = (void*)eh->e_entry;
413150861Scognet	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
414150861Scognet	    eh->e_phnum * sizeof(phdr[0]));
415153549Scognet
416150861Scognet	/* Determine lastaddr. */
417150861Scognet	for (i = 0; i < eh->e_phnum; i++) {
418150861Scognet		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
419150861Scognet		    + phdr[i].p_memsz))
420150861Scognet			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
421150861Scognet			    curaddr + phdr[i].p_memsz;
422150861Scognet	}
423150861Scognet
424153112Scognet	/* Save the symbol tables, as there're about to be scratched. */
425153549Scognet	memcpy(shdr, (void *)(kstart + eh->e_shoff),
426153549Scognet	    sizeof(*shdr) * eh->e_shnum);
427150861Scognet	if (eh->e_shnum * eh->e_shentsize != 0 &&
428150861Scognet	    eh->e_shoff != 0) {
429150861Scognet		for (i = 0; i < eh->e_shnum; i++) {
430150861Scognet			if (shdr[i].sh_type == SHT_SYMTAB) {
431150861Scognet				for (j = 0; j < eh->e_phnum; j++) {
432150861Scognet					if (phdr[j].p_type == PT_LOAD &&
433150861Scognet					    shdr[i].sh_offset >=
434150861Scognet					    phdr[j].p_offset &&
435150861Scognet					    (shdr[i].sh_offset +
436150861Scognet					     shdr[i].sh_size <=
437150861Scognet					     phdr[j].p_offset +
438150861Scognet					     phdr[j].p_filesz)) {
439150861Scognet						shdr[i].sh_offset = 0;
440150861Scognet						shdr[i].sh_size = 0;
441150861Scognet						j = eh->e_phnum;
442150861Scognet					}
443150861Scognet				}
444150861Scognet				if (shdr[i].sh_offset != 0 &&
445150861Scognet				    shdr[i].sh_size != 0) {
446150861Scognet					symtabindex = i;
447150861Scognet					symstrindex = shdr[i].sh_link;
448150861Scognet				}
449150861Scognet			}
450150861Scognet		}
451150861Scognet		func_end = roundup(func_end, sizeof(long));
452150861Scognet		if (symtabindex >= 0 && symstrindex >= 0) {
453150861Scognet			ssym = lastaddr;
454150861Scognet			if (d) {
455150861Scognet				memcpy((void *)func_end, (void *)(
456150861Scognet				    shdr[symtabindex].sh_offset + kstart),
457150861Scognet				    shdr[symtabindex].sh_size);
458150861Scognet				memcpy((void *)(func_end +
459150861Scognet				    shdr[symtabindex].sh_size),
460150861Scognet				    (void *)(shdr[symstrindex].sh_offset +
461150861Scognet				    kstart), shdr[symstrindex].sh_size);
462153112Scognet			} else {
463153112Scognet				lastaddr += shdr[symtabindex].sh_size;
464153112Scognet				lastaddr = roundup(lastaddr,
465153112Scognet				    sizeof(shdr[symtabindex].sh_size));
466153112Scognet				lastaddr += sizeof(shdr[symstrindex].sh_size);
467153112Scognet				lastaddr += shdr[symstrindex].sh_size;
468153112Scognet				lastaddr = roundup(lastaddr,
469153112Scognet				    sizeof(shdr[symstrindex].sh_size));
470150861Scognet			}
471150861Scognet
472150861Scognet		}
473150861Scognet	}
474150861Scognet	if (!d)
475150861Scognet		return ((void *)lastaddr);
476150861Scognet
477150861Scognet	j = eh->e_phnum;
478150861Scognet	for (i = 0; i < j; i++) {
479150861Scognet		volatile char c;
480153112Scognet
481153112Scognet		if (phdr[i].p_type != PT_LOAD)
482150861Scognet			continue;
483150861Scognet		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
484150861Scognet		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
485150861Scognet		/* Clean space from oversized segments, eg: bss. */
486150861Scognet		if (phdr[i].p_filesz < phdr[i].p_memsz)
487150861Scognet			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
488150861Scognet			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
489150861Scognet			    phdr[i].p_filesz);
490150861Scognet	}
491150861Scognet	/* Now grab the symbol tables. */
492153112Scognet	if (symtabindex >= 0 && symstrindex >= 0) {
493153112Scognet		*(Elf_Size *)lastaddr =
494153112Scognet		    shdr[symtabindex].sh_size;
495153112Scognet		lastaddr += sizeof(shdr[symtabindex].sh_size);
496153112Scognet		memcpy((void*)lastaddr,
497153112Scognet		    (void *)func_end,
498153112Scognet		    shdr[symtabindex].sh_size);
499153112Scognet		lastaddr += shdr[symtabindex].sh_size;
500153112Scognet		lastaddr = roundup(lastaddr,
501153112Scognet		    sizeof(shdr[symtabindex].sh_size));
502153112Scognet		*(Elf_Size *)lastaddr =
503153112Scognet		    shdr[symstrindex].sh_size;
504153112Scognet		lastaddr += sizeof(shdr[symstrindex].sh_size);
505153112Scognet		memcpy((void*)lastaddr,
506153112Scognet		    (void*)(func_end +
507153112Scognet			    shdr[symtabindex].sh_size),
508153112Scognet		    shdr[symstrindex].sh_size);
509153112Scognet		lastaddr += shdr[symstrindex].sh_size;
510153112Scognet		lastaddr = roundup(lastaddr,
511153112Scognet   		    sizeof(shdr[symstrindex].sh_size));
512153112Scognet		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
513153112Scognet		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
514153112Scognet		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
515153112Scognet	} else
516153112Scognet		*(Elf_Addr *)curaddr = 0;
517153549Scognet	/* Invalidate the instruction cache. */
518153549Scognet	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
519153549Scognet	    		 "mcr p15, 0, %0, c7, c10, 4\n"
520153549Scognet			 : : "r" (curaddr));
521159758Scognet	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
522159758Scognet	    "bic %0, %0, #1\n" /* MMU_ENABLE */
523159758Scognet	    "mcr p15, 0, %0, c1, c0, 0\n"
524159758Scognet	    : "=r" (ssym));
525150861Scognet	/* Jump to the entry point. */
526150861Scognet	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
527150861Scognet	__asm __volatile(".globl func_end\n"
528150861Scognet	    "func_end:");
529150861Scognet
530150861Scognet}
531150861Scognet
532150861Scognetextern char func_end[];
533150861Scognet
534154561Scognet
535171619Scognet#define PMAP_DOMAIN_KERNEL	0 /*
536154561Scognet				    * Just define it instead of including the
537154561Scognet				    * whole VM headers set.
538154561Scognet				    */
539154561Scognetint __hack;
540154561Scognetstatic __inline void
541159758Scognetsetup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
542159758Scognet    int write_back)
543150861Scognet{
544154561Scognet	unsigned int *pd = (unsigned int *)pt_addr;
545154561Scognet	vm_paddr_t addr;
546154561Scognet	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
547154561Scognet	int tmp;
548154561Scognet
549154561Scognet	bzero(pd, L1_TABLE_SIZE);
550159758Scognet	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
551154561Scognet		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
552154561Scognet		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
553194609Scognet		if (write_back && 0)
554159758Scognet			pd[addr >> L1_S_SHIFT] |= L1_S_B;
555159758Scognet	}
556154561Scognet	/* XXX: See below */
557154561Scognet	if (0xfff00000 < physstart || 0xfff00000 > physend)
558154561Scognet		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
559154561Scognet		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
560154561Scognet	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
561154561Scognet	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
562154561Scognet			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
563154561Scognet			 "mrc p15, 0, %0, c1, c0, 0\n"
564154561Scognet			 "orr %0, %0, #1\n" /* MMU_ENABLE */
565154561Scognet			 "mcr p15, 0, %0, c1, c0, 0\n"
566154561Scognet			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
567154561Scognet			 "mov r0, r0\n"
568154561Scognet			 "sub pc, pc, #4\n" :
569154561Scognet			 "=r" (tmp) : "r" (pd), "r" (domain));
570154561Scognet
571154561Scognet	/*
572154561Scognet	 * XXX: This is the most stupid workaround I've ever wrote.
573154561Scognet	 * For some reason, the KB9202 won't boot the kernel unless
574154561Scognet	 * we access an address which is not in the
575154561Scognet	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
576154561Scognet	 * what's going on later.
577154561Scognet	 */
578154561Scognet	__hack = *(volatile int *)0xfffff21c;
579154561Scognet}
580154561Scognet
581154561Scognetvoid
582154561Scognet__start(void)
583154561Scognet{
584150861Scognet	void *curaddr;
585159084Scognet	void *dst, *altdst;
586153112Scognet	char *kernel = (char *)&kernel_start;
587159557Scognet	int sp;
588159758Scognet	int pt_addr;
589150861Scognet
590150861Scognet	__asm __volatile("mov %0, pc"  :
591150861Scognet	    "=r" (curaddr));
592150861Scognet	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
593153112Scognet#ifdef KZIP
594153112Scognet	if (*kernel == 0x1f && kernel[1] == 0x8b) {
595159758Scognet		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
596154561Scognet		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
597159084Scognet
598159557Scognet#ifdef CPU_ARM9
599159557Scognet		/* So that idcache_wbinv works; */
600159557Scognet		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
601159557Scognet			arm9_setup();
602159557Scognet#endif
603154561Scognet		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
604159758Scognet		    (vm_paddr_t)curaddr + 0x10000000, 1);
605153112Scognet		/* Gzipped kernel */
606153112Scognet		dst = inflate_kernel(kernel, &_end);
607153112Scognet		kernel = (char *)&_end;
608159084Scognet		altdst = 4 + load_kernel((unsigned int)kernel,
609159084Scognet		    (unsigned int)curaddr,
610171619Scognet		    (unsigned int)&func_end + 800 , 0);
611159084Scognet		if (altdst > dst)
612159084Scognet			dst = altdst;
613153112Scognet	} else
614153112Scognet#endif
615153112Scognet		dst = 4 + load_kernel((unsigned int)&kernel_start,
616153112Scognet	    (unsigned int)curaddr,
617150861Scognet	    (unsigned int)&func_end, 0);
618159758Scognet	dst = (void *)(((vm_offset_t)dst & ~3));
619159758Scognet	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
620159758Scognet	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
621159758Scognet	    (vm_paddr_t)curaddr + 0x10000000, 0);
622159758Scognet	sp = pt_addr + L1_TABLE_SIZE + 8192;
623159758Scognet	sp = sp &~3;
624159758Scognet	dst = (void *)(sp + 4);
625150861Scognet	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
626171619Scognet	    (unsigned int)&load_kernel + 800);
627159557Scognet	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
628171619Scognet	    (unsigned int)(&load_kernel) + 800, sp);
629150861Scognet}
630