elf_trampoline.c revision 163810
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 163810 2006-10-30 23:23:00Z cognet $");
27#include <machine/asm.h>
28#include <sys/types.h>
29#include <sys/elf32.h>
30#include <sys/param.h>
31#include <sys/inflate.h>
32#include <machine/elf.h>
33#include <machine/pte.h>
34#include <machine/cpufunc.h>
35#include <machine/armreg.h>
36
37#include <stdlib.h>
38
39/*
40 * Since we are compiled outside of the normal kernel build process, we
41 * need to include opt_global.h manually.
42 */
43#include "opt_global.h"
44#include "opt_kernname.h"
45
46extern char kernel_start[];
47extern char kernel_end[];
48
49extern void *_end;
50
51void __start(void);
52
53#define GZ_HEAD	0xa
54
55#ifdef CPU_ARM7TDMI
56#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
57#elif defined(CPU_ARM8)
58#define cpu_idcache_wbinv_all	arm8_cache_purgeID
59#elif defined(CPU_ARM9)
60#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
61#elif defined(CPU_ARM10)
62#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
63#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
64    defined(CPU_IXP12X0)
65#define cpu_idcache_wbinv_all	sa1_cache_purgeID
66#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
67  defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
68  defined(CPU_XSCALE_80219)
69#define cpu_idcache_wbinv_all	xscale_cache_purgeID
70#endif
71
72
73int     arm_picache_size;
74int     arm_picache_line_size;
75int     arm_picache_ways;
76
77int     arm_pdcache_size;       /* and unified */
78int     arm_pdcache_line_size = 32;
79int     arm_pdcache_ways;
80
81int     arm_pcache_type;
82int     arm_pcache_unified;
83
84int     arm_dcache_align;
85int     arm_dcache_align_mask;
86
87/* Additional cache information local to this file.  Log2 of some of the
88      above numbers.  */
89static int      arm_dcache_l2_nsets;
90static int      arm_dcache_l2_assoc;
91static int      arm_dcache_l2_linesize;
92
93
94int block_userspace_access = 0;
95extern int arm9_dcache_sets_inc;
96extern int arm9_dcache_sets_max;
97extern int arm9_dcache_index_max;
98extern int arm9_dcache_index_inc;
99
100static __inline void *
101memcpy(void *dst, const void *src, int len)
102{
103	const char *s = src;
104    	char *d = dst;
105
106	while (len) {
107		if (len >= 4 && !((vm_offset_t)d & 3) &&
108		    !((vm_offset_t)s & 3)) {
109			*(uint32_t *)d = *(uint32_t *)s;
110			s += 4;
111			d += 4;
112			len -= 4;
113		} else {
114			*d++ = *s++;
115			len--;
116		}
117	}
118	return (dst);
119}
120
121static __inline void
122bzero(void *addr, int count)
123{
124	char *tmp = (char *)addr;
125
126	while (count > 0) {
127		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
128			*(uint32_t *)tmp = 0;
129			tmp += 4;
130			count -= 4;
131		} else {
132			*tmp = 0;
133			tmp++;
134			count--;
135		}
136	}
137}
138
139static void arm9_setup(void);
140
141void
142_start(void)
143{
144	int physaddr = KERNPHYSADDR;
145	int tmp1;
146	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
147#ifdef KZIP
148	sp += KERNSIZE + 0x100;
149	sp &= ~(L1_TABLE_SIZE - 1);
150	sp += 2 * L1_TABLE_SIZE;
151#endif
152	sp += 1024 * 1024; /* Should be enough for a stack */
153
154	__asm __volatile("adr %0, 2f\n"
155	    		 "bic %0, %0, #0xff000000\n"
156			 "and %1, %1, #0xff000000\n"
157			 "orr %0, %0, %1\n"
158			 "mrc p15, 0, %1, c1, c0, 0\n"
159			 "bic %1, %1, #1\n" /* Disable MMU */
160			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
161						     WBUF enable */
162			 "orr %1, %1, #0x1000\n" /* Add IC enable */
163			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
164
165			 "mcr p15, 0, %1, c1, c0, 0\n"
166			 "nop\n"
167			 "nop\n"
168			 "nop\n"
169			 "mov pc, %0\n"
170			 "2: nop\n"
171			 "mov sp, %2\n"
172			 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
173#ifndef KZIP
174#ifdef CPU_ARM9
175	/* So that idcache_wbinv works; */
176	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
177		arm9_setup();
178#endif
179	cpu_idcache_wbinv_all();
180#endif
181	__start();
182}
183
184static void
185get_cachetype_cp15()
186{
187	u_int ctype, isize, dsize;
188	u_int multiplier;
189
190	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
191	    : "=r" (ctype));
192
193	/*
194	 * ...and thus spake the ARM ARM:
195	 *
196 	 * If an <opcode2> value corresponding to an unimplemented or
197	 * reserved ID register is encountered, the System Control
198	 * processor returns the value of the main ID register.
199	 */
200	if (ctype == cpufunc_id())
201		goto out;
202
203	if ((ctype & CPU_CT_S) == 0)
204		arm_pcache_unified = 1;
205
206	/*
207	 * If you want to know how this code works, go read the ARM ARM.
208	 */
209
210	arm_pcache_type = CPU_CT_CTYPE(ctype);
211        if (arm_pcache_unified == 0) {
212		isize = CPU_CT_ISIZE(ctype);
213	    	multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
214		arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
215		if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
216			if (isize & CPU_CT_xSIZE_M)
217				arm_picache_line_size = 0; /* not present */
218			else
219				arm_picache_ways = 1;
220		} else {
221			arm_picache_ways = multiplier <<
222			    (CPU_CT_xSIZE_ASSOC(isize) - 1);
223		}
224		arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
225	}
226
227	dsize = CPU_CT_DSIZE(ctype);
228	multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
229	arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
230	if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
231		if (dsize & CPU_CT_xSIZE_M)
232			arm_pdcache_line_size = 0; /* not present */
233		else
234			arm_pdcache_ways = 1;
235	} else {
236		arm_pdcache_ways = multiplier <<
237		    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
238	}
239	arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
240
241	arm_dcache_align = arm_pdcache_line_size;
242
243	arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
244	arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
245	arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
246	    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
247 out:
248	arm_dcache_align_mask = arm_dcache_align - 1;
249}
250
251static void
252arm9_setup(void)
253{
254
255	get_cachetype_cp15();
256	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
257	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
258	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
259	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
260	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
261}
262
263
264#ifdef KZIP
265static  unsigned char *orig_input, *i_input, *i_output;
266
267
268static u_int memcnt;		/* Memory allocated: blocks */
269static size_t memtot;		/* Memory allocated: bytes */
270/*
271 * Library functions required by inflate().
272 */
273
274#define MEMSIZ 0x8000
275
276/*
277 * Allocate memory block.
278 */
279unsigned char *
280kzipmalloc(int size)
281{
282	void *ptr;
283	static u_char mem[MEMSIZ];
284
285	if (memtot + size > MEMSIZ)
286		return NULL;
287	ptr = mem + memtot;
288	memtot += size;
289	memcnt++;
290	return ptr;
291}
292
293/*
294 * Free allocated memory block.
295 */
296void
297kzipfree(void *ptr)
298{
299	memcnt--;
300	if (!memcnt)
301		memtot = 0;
302}
303
304void
305putstr(char *dummy)
306{
307}
308
309static int
310input(void *dummy)
311{
312	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
313		return (GZ_EOF);
314	}
315	return *i_input++;
316}
317
318static int
319output(void *dummy, unsigned char *ptr, unsigned long len)
320{
321
322
323	memcpy(i_output, ptr, len);
324	i_output += len;
325	return (0);
326}
327
328static void *
329inflate_kernel(void *kernel, void *startaddr)
330{
331	struct inflate infl;
332	char slide[GZ_WSIZE];
333
334	orig_input = kernel;
335	memcnt = memtot = 0;
336	i_input = (char *)kernel + GZ_HEAD;
337	if (((char *)kernel)[3] & 0x18) {
338		while (*i_input)
339			i_input++;
340		i_input++;
341	}
342	i_output = startaddr;
343	bzero(&infl, sizeof(infl));
344	infl.gz_input = input;
345	infl.gz_output = output;
346	infl.gz_slide = slide;
347	inflate(&infl);
348	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
349}
350
351#endif
352
353void *
354load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
355    int d)
356{
357	Elf32_Ehdr *eh;
358	Elf32_Phdr phdr[64] /* XXX */, *php;
359	Elf32_Shdr shdr[64] /* XXX */;
360	int i,j;
361	void *entry_point;
362	int symtabindex = -1;
363	int symstrindex = -1;
364	vm_offset_t lastaddr = 0;
365	Elf_Addr ssym = 0, esym = 0;
366	Elf_Dyn *dp;
367
368	eh = (Elf32_Ehdr *)kstart;
369	ssym = esym = 0;
370	entry_point = (void*)eh->e_entry;
371	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
372	    eh->e_phnum * sizeof(phdr[0]));
373
374	/* Determine lastaddr. */
375	for (i = 0; i < eh->e_phnum; i++) {
376		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
377		    + phdr[i].p_memsz))
378			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
379			    curaddr + phdr[i].p_memsz;
380	}
381
382	/* Save the symbol tables, as there're about to be scratched. */
383	memcpy(shdr, (void *)(kstart + eh->e_shoff),
384	    sizeof(*shdr) * eh->e_shnum);
385	if (eh->e_shnum * eh->e_shentsize != 0 &&
386	    eh->e_shoff != 0) {
387		for (i = 0; i < eh->e_shnum; i++) {
388			if (shdr[i].sh_type == SHT_SYMTAB) {
389				for (j = 0; j < eh->e_phnum; j++) {
390					if (phdr[j].p_type == PT_LOAD &&
391					    shdr[i].sh_offset >=
392					    phdr[j].p_offset &&
393					    (shdr[i].sh_offset +
394					     shdr[i].sh_size <=
395					     phdr[j].p_offset +
396					     phdr[j].p_filesz)) {
397						shdr[i].sh_offset = 0;
398						shdr[i].sh_size = 0;
399						j = eh->e_phnum;
400					}
401				}
402				if (shdr[i].sh_offset != 0 &&
403				    shdr[i].sh_size != 0) {
404					symtabindex = i;
405					symstrindex = shdr[i].sh_link;
406				}
407			}
408		}
409		func_end = roundup(func_end, sizeof(long));
410		if (symtabindex >= 0 && symstrindex >= 0) {
411			ssym = lastaddr;
412			if (d) {
413				memcpy((void *)func_end, (void *)(
414				    shdr[symtabindex].sh_offset + kstart),
415				    shdr[symtabindex].sh_size);
416				memcpy((void *)(func_end +
417				    shdr[symtabindex].sh_size),
418				    (void *)(shdr[symstrindex].sh_offset +
419				    kstart), shdr[symstrindex].sh_size);
420			} else {
421				lastaddr += shdr[symtabindex].sh_size;
422				lastaddr = roundup(lastaddr,
423				    sizeof(shdr[symtabindex].sh_size));
424				lastaddr += sizeof(shdr[symstrindex].sh_size);
425				lastaddr += shdr[symstrindex].sh_size;
426				lastaddr = roundup(lastaddr,
427				    sizeof(shdr[symstrindex].sh_size));
428			}
429
430		}
431	}
432	if (!d)
433		return ((void *)lastaddr);
434
435	j = eh->e_phnum;
436	for (i = 0; i < j; i++) {
437		volatile char c;
438
439		if (phdr[i].p_type != PT_LOAD)
440			continue;
441		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
442		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
443		/* Clean space from oversized segments, eg: bss. */
444		if (phdr[i].p_filesz < phdr[i].p_memsz)
445			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
446			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
447			    phdr[i].p_filesz);
448	}
449	/* Now grab the symbol tables. */
450	if (symtabindex >= 0 && symstrindex >= 0) {
451		*(Elf_Size *)lastaddr =
452		    shdr[symtabindex].sh_size;
453		lastaddr += sizeof(shdr[symtabindex].sh_size);
454		memcpy((void*)lastaddr,
455		    (void *)func_end,
456		    shdr[symtabindex].sh_size);
457		lastaddr += shdr[symtabindex].sh_size;
458		lastaddr = roundup(lastaddr,
459		    sizeof(shdr[symtabindex].sh_size));
460		*(Elf_Size *)lastaddr =
461		    shdr[symstrindex].sh_size;
462		lastaddr += sizeof(shdr[symstrindex].sh_size);
463		memcpy((void*)lastaddr,
464		    (void*)(func_end +
465			    shdr[symtabindex].sh_size),
466		    shdr[symstrindex].sh_size);
467		lastaddr += shdr[symstrindex].sh_size;
468		lastaddr = roundup(lastaddr,
469   		    sizeof(shdr[symstrindex].sh_size));
470		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
471		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
472		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
473	} else
474		*(Elf_Addr *)curaddr = 0;
475	/* Invalidate the instruction cache. */
476	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
477	    		 "mcr p15, 0, %0, c7, c10, 4\n"
478			 : : "r" (curaddr));
479	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
480	    "bic %0, %0, #1\n" /* MMU_ENABLE */
481	    "mcr p15, 0, %0, c1, c0, 0\n"
482	    : "=r" (ssym));
483	/* Jump to the entry point. */
484	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
485	__asm __volatile(".globl func_end\n"
486	    "func_end:");
487
488}
489
490extern char func_end[];
491
492
493#define PMAP_DOMAIN_KERNEL	15 /*
494				    * Just define it instead of including the
495				    * whole VM headers set.
496				    */
497int __hack;
498static __inline void
499setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
500    int write_back)
501{
502	unsigned int *pd = (unsigned int *)pt_addr;
503	vm_paddr_t addr;
504	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
505	int tmp;
506
507	bzero(pd, L1_TABLE_SIZE);
508	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
509		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
510		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
511		if (write_back)
512			pd[addr >> L1_S_SHIFT] |= L1_S_B;
513	}
514	/* XXX: See below */
515	if (0xfff00000 < physstart || 0xfff00000 > physend)
516		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
517		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
518	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
519	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
520			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
521			 "mrc p15, 0, %0, c1, c0, 0\n"
522			 "orr %0, %0, #1\n" /* MMU_ENABLE */
523			 "mcr p15, 0, %0, c1, c0, 0\n"
524			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
525			 "mov r0, r0\n"
526			 "sub pc, pc, #4\n" :
527			 "=r" (tmp) : "r" (pd), "r" (domain));
528
529	/*
530	 * XXX: This is the most stupid workaround I've ever wrote.
531	 * For some reason, the KB9202 won't boot the kernel unless
532	 * we access an address which is not in the
533	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
534	 * what's going on later.
535	 */
536	__hack = *(volatile int *)0xfffff21c;
537}
538
539void
540__start(void)
541{
542	void *curaddr;
543	void *dst, *altdst;
544	char *kernel = (char *)&kernel_start;
545	int sp;
546	int pt_addr;
547
548	__asm __volatile("mov %0, pc"  :
549	    "=r" (curaddr));
550	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
551#ifdef KZIP
552	if (*kernel == 0x1f && kernel[1] == 0x8b) {
553		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
554		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
555
556#ifdef CPU_ARM9
557		/* So that idcache_wbinv works; */
558		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
559			arm9_setup();
560#endif
561		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
562		    (vm_paddr_t)curaddr + 0x10000000, 1);
563		/* Gzipped kernel */
564		dst = inflate_kernel(kernel, &_end);
565		kernel = (char *)&_end;
566		altdst = 4 + load_kernel((unsigned int)kernel,
567		    (unsigned int)curaddr,
568		    (unsigned int)&func_end , 0);
569		if (altdst > dst)
570			dst = altdst;
571		cpu_idcache_wbinv_all();
572		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
573		    "bic %0, %0, #1\n" /* MMU_ENABLE */
574		    "mcr p15, 0, %0, c1, c0, 0\n"
575		    : "=r" (pt_addr));
576	} else
577#endif
578		dst = 4 + load_kernel((unsigned int)&kernel_start,
579	    (unsigned int)curaddr,
580	    (unsigned int)&func_end, 0);
581	dst = (void *)(((vm_offset_t)dst & ~3));
582	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
583	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
584	    (vm_paddr_t)curaddr + 0x10000000, 0);
585	sp = pt_addr + L1_TABLE_SIZE + 8192;
586	sp = sp &~3;
587	dst = (void *)(sp + 4);
588	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
589	    (unsigned int)&load_kernel);
590	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
591	    (unsigned int)(&load_kernel), sp);
592}
593