elf_trampoline.c revision 159901
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 159901 2006-06-23 22:45:35Z cognet $");
27#include <machine/asm.h>
28#include <sys/types.h>
29#include <sys/elf32.h>
30#include <sys/param.h>
31#include <sys/inflate.h>
32#include <machine/elf.h>
33#include <machine/pte.h>
34#include <machine/cpufunc.h>
35#include <machine/armreg.h>
36
37#include <stdlib.h>
38
39#include "opt_global.h"
40#include "opt_kernname.h"
41
42extern char kernel_start[];
43extern char kernel_end[];
44
45extern void *_end;
46
47void __start(void);
48
49#define GZ_HEAD	0xa
50
51#ifdef CPU_ARM7TDMI
52#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
53#elif defined(CPU_ARM8)
54#define cpu_idcache_wbinv_all	arm8_cache_purgeID
55#elif defined(CPU_ARM9)
56#define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
57#elif defined(CPU_ARM10)
58#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
59#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
60    defined(CPU_IXP12X0)
61#define cpu_idcache_wbinv_all	sa1_cache_purgeID
62#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
63    defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425)
64#define cpu_idcache_wbinv_all	xscale_cache_purgeID
65#endif
66
67
68int     arm_picache_size;
69int     arm_picache_line_size;
70int     arm_picache_ways;
71
72int     arm_pdcache_size;       /* and unified */
73int     arm_pdcache_line_size = 32;
74int     arm_pdcache_ways;
75
76int     arm_pcache_type;
77int     arm_pcache_unified;
78
79int     arm_dcache_align;
80int     arm_dcache_align_mask;
81
82/* Additional cache information local to this file.  Log2 of some of the
83      above numbers.  */
84static int      arm_dcache_l2_nsets;
85static int      arm_dcache_l2_assoc;
86static int      arm_dcache_l2_linesize;
87
88
89int block_userspace_access = 0;
90extern int arm9_dcache_sets_inc;
91extern int arm9_dcache_sets_max;
92extern int arm9_dcache_index_max;
93extern int arm9_dcache_index_inc;
94
95static __inline void *
96memcpy(void *dst, const void *src, int len)
97{
98	const char *s = src;
99    	char *d = dst;
100
101	while (len) {
102		if (len >= 4 && !((vm_offset_t)d & 3) &&
103		    !((vm_offset_t)s & 3)) {
104			*(uint32_t *)d = *(uint32_t *)s;
105			s += 4;
106			d += 4;
107			len -= 4;
108		} else {
109			*d++ = *s++;
110			len--;
111		}
112	}
113	return (dst);
114}
115
116static __inline void
117bzero(void *addr, int count)
118{
119	char *tmp = (char *)addr;
120
121	while (count > 0) {
122		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
123			*(uint32_t *)tmp = 0;
124			tmp += 4;
125			count -= 4;
126		} else {
127			*tmp = 0;
128			tmp++;
129			count--;
130		}
131	}
132}
133
134static void arm9_setup(void);
135
136void
137_start(void)
138{
139	int physaddr = KERNPHYSADDR;
140	int tmp1;
141	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
142#ifdef KZIP
143	sp += KERNSIZE + 0x100;
144	sp &= ~(L1_TABLE_SIZE - 1);
145	sp += 2 * L1_TABLE_SIZE;
146#endif
147	sp += 1024 * 1024; /* Should be enough for a stack */
148
149	__asm __volatile("adr %0, 2f\n"
150	    		 "bic %0, %0, #0xff000000\n"
151			 "and %1, %1, #0xff000000\n"
152			 "orr %0, %0, %1\n"
153			 "mrc p15, 0, %1, c1, c0, 0\n"
154			 "bic %1, %1, #1\n" /* Disable MMU */
155			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
156						     WBUF enable */
157			 "orr %1, %1, #0x1000\n" /* Add IC enable */
158			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
159
160			 "mcr p15, 0, %1, c1, c0, 0\n"
161			 "nop\n"
162			 "nop\n"
163			 "nop\n"
164			 "mov pc, %0\n"
165			 "2: nop\n"
166			 "mov sp, %2\n"
167			 : "=r" (tmp1), "+r" (physaddr), "+r" (sp));
168#ifndef KZIP
169#ifdef CPU_ARM9
170	/* So that idcache_wbinv works; */
171	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
172		arm9_setup();
173#endif
174	cpu_idcache_wbinv_all();
175#endif
176	__start();
177}
178
179static void
180get_cachetype_cp15()
181{
182	u_int ctype, isize, dsize;
183	u_int multiplier;
184
185	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
186	    : "=r" (ctype));
187
188	/*
189	 * ...and thus spake the ARM ARM:
190	 *
191 	 * If an <opcode2> value corresponding to an unimplemented or
192	 * reserved ID register is encountered, the System Control
193	 * processor returns the value of the main ID register.
194	 */
195	if (ctype == cpufunc_id())
196		goto out;
197
198	if ((ctype & CPU_CT_S) == 0)
199		arm_pcache_unified = 1;
200
201	/*
202	 * If you want to know how this code works, go read the ARM ARM.
203	 */
204
205	arm_pcache_type = CPU_CT_CTYPE(ctype);
206        if (arm_pcache_unified == 0) {
207		isize = CPU_CT_ISIZE(ctype);
208	    	multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
209		arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
210		if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
211			if (isize & CPU_CT_xSIZE_M)
212				arm_picache_line_size = 0; /* not present */
213			else
214				arm_picache_ways = 1;
215		} else {
216			arm_picache_ways = multiplier <<
217			    (CPU_CT_xSIZE_ASSOC(isize) - 1);
218		}
219		arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
220	}
221
222	dsize = CPU_CT_DSIZE(ctype);
223	multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
224	arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
225	if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
226		if (dsize & CPU_CT_xSIZE_M)
227			arm_pdcache_line_size = 0; /* not present */
228		else
229			arm_pdcache_ways = 1;
230	} else {
231		arm_pdcache_ways = multiplier <<
232		    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
233	}
234	arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
235
236	arm_dcache_align = arm_pdcache_line_size;
237
238	arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
239	arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
240	arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
241	    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
242 out:
243	arm_dcache_align_mask = arm_dcache_align - 1;
244}
245
246static void
247arm9_setup(void)
248{
249
250	get_cachetype_cp15();
251	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
252	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
253	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
254	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
255	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
256}
257
258
259#ifdef KZIP
260static  unsigned char *orig_input, *i_input, *i_output;
261
262
263static u_int memcnt;		/* Memory allocated: blocks */
264static size_t memtot;		/* Memory allocated: bytes */
265/*
266 * Library functions required by inflate().
267 */
268
269#define MEMSIZ 0x8000
270
271/*
272 * Allocate memory block.
273 */
274unsigned char *
275kzipmalloc(int size)
276{
277	void *ptr;
278	static u_char mem[MEMSIZ];
279
280	if (memtot + size > MEMSIZ)
281		return NULL;
282	ptr = mem + memtot;
283	memtot += size;
284	memcnt++;
285	return ptr;
286}
287
288/*
289 * Free allocated memory block.
290 */
291void
292kzipfree(void *ptr)
293{
294	memcnt--;
295	if (!memcnt)
296		memtot = 0;
297}
298
299void
300putstr(char *dummy)
301{
302}
303
304static int
305input(void *dummy)
306{
307	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
308		return (GZ_EOF);
309	}
310	return *i_input++;
311}
312
313static int
314output(void *dummy, unsigned char *ptr, unsigned long len)
315{
316
317
318	memcpy(i_output, ptr, len);
319	i_output += len;
320	return (0);
321}
322
323static void *
324inflate_kernel(void *kernel, void *startaddr)
325{
326	struct inflate infl;
327	char slide[GZ_WSIZE];
328
329	orig_input = kernel;
330	memcnt = memtot = 0;
331	i_input = (char *)kernel + GZ_HEAD;
332	if (((char *)kernel)[3] & 0x18) {
333		while (*i_input)
334			i_input++;
335		i_input++;
336	}
337	i_output = startaddr;
338	bzero(&infl, sizeof(infl));
339	infl.gz_input = input;
340	infl.gz_output = output;
341	infl.gz_slide = slide;
342	inflate(&infl);
343	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
344}
345
346#endif
347
348void *
349load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
350    int d)
351{
352	Elf32_Ehdr *eh;
353	Elf32_Phdr phdr[64] /* XXX */, *php;
354	Elf32_Shdr shdr[64] /* XXX */;
355	int i,j;
356	void *entry_point;
357	int symtabindex = -1;
358	int symstrindex = -1;
359	vm_offset_t lastaddr = 0;
360	Elf_Addr ssym = 0, esym = 0;
361	Elf_Dyn *dp;
362
363	eh = (Elf32_Ehdr *)kstart;
364	ssym = esym = 0;
365	entry_point = (void*)eh->e_entry;
366	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
367	    eh->e_phnum * sizeof(phdr[0]));
368
369	/* Determine lastaddr. */
370	for (i = 0; i < eh->e_phnum; i++) {
371		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
372		    + phdr[i].p_memsz))
373			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
374			    curaddr + phdr[i].p_memsz;
375	}
376
377	/* Save the symbol tables, as there're about to be scratched. */
378	memcpy(shdr, (void *)(kstart + eh->e_shoff),
379	    sizeof(*shdr) * eh->e_shnum);
380	if (eh->e_shnum * eh->e_shentsize != 0 &&
381	    eh->e_shoff != 0) {
382		for (i = 0; i < eh->e_shnum; i++) {
383			if (shdr[i].sh_type == SHT_SYMTAB) {
384				for (j = 0; j < eh->e_phnum; j++) {
385					if (phdr[j].p_type == PT_LOAD &&
386					    shdr[i].sh_offset >=
387					    phdr[j].p_offset &&
388					    (shdr[i].sh_offset +
389					     shdr[i].sh_size <=
390					     phdr[j].p_offset +
391					     phdr[j].p_filesz)) {
392						shdr[i].sh_offset = 0;
393						shdr[i].sh_size = 0;
394						j = eh->e_phnum;
395					}
396				}
397				if (shdr[i].sh_offset != 0 &&
398				    shdr[i].sh_size != 0) {
399					symtabindex = i;
400					symstrindex = shdr[i].sh_link;
401				}
402			}
403		}
404		func_end = roundup(func_end, sizeof(long));
405		if (symtabindex >= 0 && symstrindex >= 0) {
406			ssym = lastaddr;
407			if (d) {
408				memcpy((void *)func_end, (void *)(
409				    shdr[symtabindex].sh_offset + kstart),
410				    shdr[symtabindex].sh_size);
411				memcpy((void *)(func_end +
412				    shdr[symtabindex].sh_size),
413				    (void *)(shdr[symstrindex].sh_offset +
414				    kstart), shdr[symstrindex].sh_size);
415			} else {
416				lastaddr += shdr[symtabindex].sh_size;
417				lastaddr = roundup(lastaddr,
418				    sizeof(shdr[symtabindex].sh_size));
419				lastaddr += sizeof(shdr[symstrindex].sh_size);
420				lastaddr += shdr[symstrindex].sh_size;
421				lastaddr = roundup(lastaddr,
422				    sizeof(shdr[symstrindex].sh_size));
423			}
424
425		}
426	}
427	if (!d)
428		return ((void *)lastaddr);
429
430	j = eh->e_phnum;
431	for (i = 0; i < j; i++) {
432		volatile char c;
433
434		if (phdr[i].p_type != PT_LOAD)
435			continue;
436		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
437		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
438		/* Clean space from oversized segments, eg: bss. */
439		if (phdr[i].p_filesz < phdr[i].p_memsz)
440			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
441			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
442			    phdr[i].p_filesz);
443	}
444	/* Now grab the symbol tables. */
445	if (symtabindex >= 0 && symstrindex >= 0) {
446		*(Elf_Size *)lastaddr =
447		    shdr[symtabindex].sh_size;
448		lastaddr += sizeof(shdr[symtabindex].sh_size);
449		memcpy((void*)lastaddr,
450		    (void *)func_end,
451		    shdr[symtabindex].sh_size);
452		lastaddr += shdr[symtabindex].sh_size;
453		lastaddr = roundup(lastaddr,
454		    sizeof(shdr[symtabindex].sh_size));
455		*(Elf_Size *)lastaddr =
456		    shdr[symstrindex].sh_size;
457		lastaddr += sizeof(shdr[symstrindex].sh_size);
458		memcpy((void*)lastaddr,
459		    (void*)(func_end +
460			    shdr[symtabindex].sh_size),
461		    shdr[symstrindex].sh_size);
462		lastaddr += shdr[symstrindex].sh_size;
463		lastaddr = roundup(lastaddr,
464   		    sizeof(shdr[symstrindex].sh_size));
465		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
466		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
467		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
468	} else
469		*(Elf_Addr *)curaddr = 0;
470	/* Invalidate the instruction cache. */
471	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
472	    		 "mcr p15, 0, %0, c7, c10, 4\n"
473			 : : "r" (curaddr));
474	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
475	    "bic %0, %0, #1\n" /* MMU_ENABLE */
476	    "mcr p15, 0, %0, c1, c0, 0\n"
477	    : "=r" (ssym));
478	/* Jump to the entry point. */
479	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
480	__asm __volatile(".globl func_end\n"
481	    "func_end:");
482
483}
484
485extern char func_end[];
486
487
488#define PMAP_DOMAIN_KERNEL	15 /*
489				    * Just define it instead of including the
490				    * whole VM headers set.
491				    */
492int __hack;
493static __inline void
494setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
495    int write_back)
496{
497	unsigned int *pd = (unsigned int *)pt_addr;
498	vm_paddr_t addr;
499	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
500	int tmp;
501
502	bzero(pd, L1_TABLE_SIZE);
503	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
504		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
505		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
506		if (write_back)
507			pd[addr >> L1_S_SHIFT] |= L1_S_B;
508	}
509	/* XXX: See below */
510	if (0xfff00000 < physstart || 0xfff00000 > physend)
511		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
512		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
513	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
514	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
515			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
516			 "mrc p15, 0, %0, c1, c0, 0\n"
517			 "orr %0, %0, #1\n" /* MMU_ENABLE */
518			 "mcr p15, 0, %0, c1, c0, 0\n"
519			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
520			 "mov r0, r0\n"
521			 "sub pc, pc, #4\n" :
522			 "=r" (tmp) : "r" (pd), "r" (domain));
523
524	/*
525	 * XXX: This is the most stupid workaround I've ever wrote.
526	 * For some reason, the KB9202 won't boot the kernel unless
527	 * we access an address which is not in the
528	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
529	 * what's going on later.
530	 */
531	__hack = *(volatile int *)0xfffff21c;
532}
533
534void
535__start(void)
536{
537	void *curaddr;
538	void *dst, *altdst;
539	char *kernel = (char *)&kernel_start;
540	int sp;
541	int pt_addr;
542
543	__asm __volatile("mov %0, pc"  :
544	    "=r" (curaddr));
545	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
546#ifdef KZIP
547	if (*kernel == 0x1f && kernel[1] == 0x8b) {
548		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
549		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
550
551#ifdef CPU_ARM9
552		/* So that idcache_wbinv works; */
553		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
554			arm9_setup();
555#endif
556		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
557		    (vm_paddr_t)curaddr + 0x10000000, 1);
558		/* Gzipped kernel */
559		dst = inflate_kernel(kernel, &_end);
560		kernel = (char *)&_end;
561		altdst = 4 + load_kernel((unsigned int)kernel,
562		    (unsigned int)curaddr,
563		    (unsigned int)&func_end , 0);
564		if (altdst > dst)
565			dst = altdst;
566		cpu_idcache_wbinv_all();
567		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
568		    "bic %0, %0, #1\n" /* MMU_ENABLE */
569		    "mcr p15, 0, %0, c1, c0, 0\n"
570		    : "=r" (pt_addr));
571	} else
572#endif
573		dst = 4 + load_kernel((unsigned int)&kernel_start,
574	    (unsigned int)curaddr,
575	    (unsigned int)&func_end, 0);
576	dst = (void *)(((vm_offset_t)dst & ~3));
577	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
578	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
579	    (vm_paddr_t)curaddr + 0x10000000, 0);
580	sp = pt_addr + L1_TABLE_SIZE + 8192;
581	sp = sp &~3;
582	dst = (void *)(sp + 4);
583	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
584	    (unsigned int)&load_kernel);
585	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
586	    (unsigned int)(&load_kernel), sp);
587}
588