elf_trampoline.c revision 159084
1/*-
2 * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25#include <sys/cdefs.h>
26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 159084 2006-05-30 21:13:47Z cognet $");
27#include <machine/asm.h>
28#include <sys/types.h>
29#include <sys/elf32.h>
30#include <sys/param.h>
31#include <sys/inflate.h>
32#include <machine/elf.h>
33#include <machine/pte.h>
34#include <machine/cpufunc.h>
35
36#include <stdlib.h>
37
38#include "opt_global.h"
39#include "opt_kernname.h"
40
41extern char kernel_start[];
42extern char kernel_end[];
43
44void __start(void);
45
46#define GZ_HEAD	0xa
47
48#ifdef CPU_ARM7TDMI
49#define cpu_idcache_wbinv_all	arm7tdmi_cache_flushID
50#elif defined(CPU_ARM8)
51#define cpu_idcache_wbinv_all	arm8_cache_purgeID
52#elif defined(CPU_ARM9)
53#define cpu_idcache_wbinv_all	arm9_dcache_wbinv_all
54#elif defined(CPU_ARM10)
55#define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
56#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \
57    defined(CPU_IXP12X0)
58#define cpu_idcache_wbinv_all	sa1_cache_purgeID
59#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
60    defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425)
61#define cpu_idcache_wbinv_all	xscale_cache_purgeID
62#endif
63int arm_pdcache_line_size = 32;
64int block_userspace_access = 0;
65
66static __inline void *
67memcpy(void *dst, const void *src, int len)
68{
69	const char *s = src;
70    	char *d = dst;
71
72	while (len) {
73		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
74		    !((vm_offset_t)s & 3)) {
75			*(uint32_t *)d = *(uint32_t *)s;
76			s += 4;
77			d += 4;
78			len -= 4;
79		} else {
80			*d++ = *s++;
81			len--;
82		}
83	}
84	return (dst);
85}
86
87static __inline void
88bzero(void *addr, int count)
89{
90	char *tmp = (char *)addr;
91
92	while (count > 0) {
93		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
94			*(uint32_t *)tmp = 0;
95			tmp += 4;
96			count -= 4;
97		} else {
98			*tmp = 0;
99			tmp++;
100			count--;
101		}
102	}
103}
104
105void
106_start(void)
107{
108	int physaddr = KERNPHYSADDR;
109	int tmp1;
110
111	__asm __volatile("adr %0, 2f\n"
112	    		 "bic %0, %0, #0xff000000\n"
113			 "bic sp, sp, #0xff000000\n"
114			 "and %1, %1, #0xff000000\n"
115			 "orr %0, %0, %1\n"
116			 "orr sp, sp, %1\n"
117			 "mrc p15, 0, %1, c1, c0, 0\n"
118			 "bic %1, %1, #1\n" /* Disable MMU */
119			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
120						     WBUF enable */
121			 "orr %1, %1, #0x1000\n" /* Add IC enable */
122			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
123
124			 "mcr p15, 0, %1, c1, c0, 0\n"
125			 "nop\n"
126			 "nop\n"
127			 "nop\n"
128			 "mov pc, %0\n"
129			 "2: nop\n"
130			 : "=r" (tmp1), "+r" (physaddr));
131	__start();
132}
133
134#ifdef KZIP
135static  unsigned char *orig_input, *i_input, *i_output;
136
137
138static u_int memcnt;		/* Memory allocated: blocks */
139static size_t memtot;		/* Memory allocated: bytes */
140/*
141 * Library functions required by inflate().
142 */
143
144#define MEMSIZ 0x8000
145
146/*
147 * Allocate memory block.
148 */
149unsigned char *
150kzipmalloc(int size)
151{
152	void *ptr;
153	static u_char mem[MEMSIZ];
154
155	if (memtot + size > MEMSIZ)
156		return NULL;
157	ptr = mem + memtot;
158	memtot += size;
159	memcnt++;
160	return ptr;
161}
162
163/*
164 * Free allocated memory block.
165 */
166void
167kzipfree(void *ptr)
168{
169	memcnt--;
170	if (!memcnt)
171		memtot = 0;
172}
173
174void
175putstr(char *dummy)
176{
177}
178
179static int
180input(void *dummy)
181{
182	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
183		return (GZ_EOF);
184	}
185	return *i_input++;
186}
187
188static int
189output(void *dummy, unsigned char *ptr, unsigned long len)
190{
191
192	memcpy(i_output, ptr, len);
193	i_output += len;
194	return (0);
195}
196
197static void *
198inflate_kernel(void *kernel, void *startaddr)
199{
200	struct inflate infl;
201	char slide[GZ_WSIZE];
202
203	orig_input = kernel;
204	memcnt = memtot = 0;
205	i_input = (char *)kernel + GZ_HEAD;
206	if (((char *)kernel)[3] & 0x18) {
207		while (*i_input)
208			i_input++;
209		i_input++;
210	}
211	i_output = startaddr;
212	bzero(&infl, sizeof(infl));
213	infl.gz_input = input;
214	infl.gz_output = output;
215	infl.gz_slide = slide;
216	inflate(&infl);
217	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
218}
219
220#endif
221
222void *
223load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
224    int d)
225{
226	Elf32_Ehdr *eh;
227	Elf32_Phdr phdr[512] /* XXX */, *php;
228	Elf32_Shdr shdr[512] /* XXX */;
229	int i,j;
230	void *entry_point;
231	int symtabindex = -1;
232	int symstrindex = -1;
233	vm_offset_t lastaddr = 0;
234	Elf_Addr ssym = 0, esym = 0;
235	Elf_Dyn *dp;
236
237	eh = (Elf32_Ehdr *)kstart;
238	ssym = esym = 0;
239	entry_point = (void*)eh->e_entry;
240	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
241	    eh->e_phnum * sizeof(phdr[0]));
242
243	/* Determine lastaddr. */
244	for (i = 0; i < eh->e_phnum; i++) {
245		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
246		    + phdr[i].p_memsz))
247			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
248			    curaddr + phdr[i].p_memsz;
249	}
250
251	/* Save the symbol tables, as there're about to be scratched. */
252	memcpy(shdr, (void *)(kstart + eh->e_shoff),
253	    sizeof(*shdr) * eh->e_shnum);
254	if (eh->e_shnum * eh->e_shentsize != 0 &&
255	    eh->e_shoff != 0) {
256		for (i = 0; i < eh->e_shnum; i++) {
257			if (shdr[i].sh_type == SHT_SYMTAB) {
258				for (j = 0; j < eh->e_phnum; j++) {
259					if (phdr[j].p_type == PT_LOAD &&
260					    shdr[i].sh_offset >=
261					    phdr[j].p_offset &&
262					    (shdr[i].sh_offset +
263					     shdr[i].sh_size <=
264					     phdr[j].p_offset +
265					     phdr[j].p_filesz)) {
266						shdr[i].sh_offset = 0;
267						shdr[i].sh_size = 0;
268						j = eh->e_phnum;
269					}
270				}
271				if (shdr[i].sh_offset != 0 &&
272				    shdr[i].sh_size != 0) {
273					symtabindex = i;
274					symstrindex = shdr[i].sh_link;
275				}
276			}
277		}
278		func_end = roundup(func_end, sizeof(long));
279		if (symtabindex >= 0 && symstrindex >= 0) {
280			ssym = lastaddr;
281			if (d) {
282				memcpy((void *)func_end, (void *)(
283				    shdr[symtabindex].sh_offset + kstart),
284				    shdr[symtabindex].sh_size);
285				memcpy((void *)(func_end +
286				    shdr[symtabindex].sh_size),
287				    (void *)(shdr[symstrindex].sh_offset +
288				    kstart), shdr[symstrindex].sh_size);
289			} else {
290				lastaddr += shdr[symtabindex].sh_size;
291				lastaddr = roundup(lastaddr,
292				    sizeof(shdr[symtabindex].sh_size));
293				lastaddr += sizeof(shdr[symstrindex].sh_size);
294				lastaddr += shdr[symstrindex].sh_size;
295				lastaddr = roundup(lastaddr,
296				    sizeof(shdr[symstrindex].sh_size));
297			}
298
299		}
300	}
301	if (!d)
302		return ((void *)lastaddr);
303
304	j = eh->e_phnum;
305	for (i = 0; i < j; i++) {
306		volatile char c;
307
308		if (phdr[i].p_type != PT_LOAD)
309			continue;
310		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
311		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
312		/* Clean space from oversized segments, eg: bss. */
313		if (phdr[i].p_filesz < phdr[i].p_memsz)
314			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
315			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
316			    phdr[i].p_filesz);
317	}
318	/* Now grab the symbol tables. */
319	if (symtabindex >= 0 && symstrindex >= 0) {
320		*(Elf_Size *)lastaddr =
321		    shdr[symtabindex].sh_size;
322		lastaddr += sizeof(shdr[symtabindex].sh_size);
323		memcpy((void*)lastaddr,
324		    (void *)func_end,
325		    shdr[symtabindex].sh_size);
326		lastaddr += shdr[symtabindex].sh_size;
327		lastaddr = roundup(lastaddr,
328		    sizeof(shdr[symtabindex].sh_size));
329		*(Elf_Size *)lastaddr =
330		    shdr[symstrindex].sh_size;
331		lastaddr += sizeof(shdr[symstrindex].sh_size);
332		memcpy((void*)lastaddr,
333		    (void*)(func_end +
334			    shdr[symtabindex].sh_size),
335		    shdr[symstrindex].sh_size);
336		lastaddr += shdr[symstrindex].sh_size;
337		lastaddr = roundup(lastaddr,
338   		    sizeof(shdr[symstrindex].sh_size));
339		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
340		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
341		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
342	} else
343		*(Elf_Addr *)curaddr = 0;
344	/* Invalidate the instruction cache. */
345	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
346	    		 "mcr p15, 0, %0, c7, c10, 4\n"
347			 : : "r" (curaddr));
348	/* Jump to the entry point. */
349	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
350	__asm __volatile(".globl func_end\n"
351	    "func_end:");
352
353}
354
355extern char func_end[];
356
357extern void *_end;
358
359#define PMAP_DOMAIN_KERNEL	15 /*
360				    * Just define it instead of including the
361				    * whole VM headers set.
362				    */
363int __hack;
364static __inline void
365setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend)
366{
367	unsigned int *pd = (unsigned int *)pt_addr;
368	vm_paddr_t addr;
369	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
370	int tmp;
371
372	bzero(pd, L1_TABLE_SIZE);
373	for (addr = physstart; addr < physend; addr += L1_S_SIZE)
374		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
375		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
376	/* XXX: See below */
377	if (0xfff00000 < physstart || 0xfff00000 > physend)
378		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
379		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
380	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
381	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
382			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
383			 "mrc p15, 0, %0, c1, c0, 0\n"
384			 "orr %0, %0, #1\n" /* MMU_ENABLE */
385			 "mcr p15, 0, %0, c1, c0, 0\n"
386			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
387			 "mov r0, r0\n"
388			 "sub pc, pc, #4\n" :
389			 "=r" (tmp) : "r" (pd), "r" (domain));
390
391	/*
392	 * XXX: This is the most stupid workaround I've ever wrote.
393	 * For some reason, the KB9202 won't boot the kernel unless
394	 * we access an address which is not in the
395	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
396	 * what's going on later.
397	 */
398	__hack = *(volatile int *)0xfffff21c;
399}
400
401void
402__start(void)
403{
404	void *curaddr;
405	void *dst, *altdst;
406	char *kernel = (char *)&kernel_start;
407
408	__asm __volatile("mov %0, pc"  :
409	    "=r" (curaddr));
410	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
411#ifdef KZIP
412	if (*kernel == 0x1f && kernel[1] == 0x8b) {
413		int pt_addr = (((int)&_end + KERNSIZE + 0x100) &
414		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
415
416		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
417		    (vm_paddr_t)curaddr + 0x10000000);
418		/* Gzipped kernel */
419		dst = inflate_kernel(kernel, &_end);
420		kernel = (char *)&_end;
421		altdst = 4 + load_kernel((unsigned int)kernel,
422		    (unsigned int)curaddr,
423		    (unsigned int)&func_end , 0);
424		if (altdst > dst)
425			dst = altdst;
426		cpu_idcache_wbinv_all();
427		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
428		    "bic %0, %0, #1\n" /* MMU_ENABLE */
429		    "mcr p15, 0, %0, c1, c0, 0\n"
430		    : "=r" (pt_addr));
431	} else
432#endif
433		dst = 4 + load_kernel((unsigned int)&kernel_start,
434	    (unsigned int)curaddr,
435	    (unsigned int)&func_end, 0);
436	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
437	    (unsigned int)&load_kernel);
438	((void (*)())dst)((unsigned int)kernel,
439			  (unsigned int)curaddr,
440			  dst + (unsigned int)(&func_end) -
441			  (unsigned int)(&load_kernel), 1);
442}
443