elf_trampoline.c revision 159084
1/*- 2 * Copyright (c) 2005 Olivier Houchard. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25#include <sys/cdefs.h> 26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 159084 2006-05-30 21:13:47Z cognet $"); 27#include <machine/asm.h> 28#include <sys/types.h> 29#include <sys/elf32.h> 30#include <sys/param.h> 31#include <sys/inflate.h> 32#include <machine/elf.h> 33#include <machine/pte.h> 34#include <machine/cpufunc.h> 35 36#include <stdlib.h> 37 38#include "opt_global.h" 39#include "opt_kernname.h" 40 41extern char kernel_start[]; 42extern char kernel_end[]; 43 44void __start(void); 45 46#define GZ_HEAD 0xa 47 48#ifdef CPU_ARM7TDMI 49#define cpu_idcache_wbinv_all arm7tdmi_cache_flushID 50#elif defined(CPU_ARM8) 51#define cpu_idcache_wbinv_all arm8_cache_purgeID 52#elif defined(CPU_ARM9) 53#define cpu_idcache_wbinv_all arm9_dcache_wbinv_all 54#elif defined(CPU_ARM10) 55#define cpu_idcache_wbinv_all arm10_idcache_wbinv_all 56#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \ 57 defined(CPU_IXP12X0) 58#define cpu_idcache_wbinv_all sa1_cache_purgeID 59#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ 60 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) 61#define cpu_idcache_wbinv_all xscale_cache_purgeID 62#endif 63int arm_pdcache_line_size = 32; 64int block_userspace_access = 0; 65 66static __inline void * 67memcpy(void *dst, const void *src, int len) 68{ 69 const char *s = src; 70 char *d = dst; 71 72 while (len) { 73 if (0 && len >= 4 && !((vm_offset_t)d & 3) && 74 !((vm_offset_t)s & 3)) { 75 *(uint32_t *)d = *(uint32_t *)s; 76 s += 4; 77 d += 4; 78 len -= 4; 79 } else { 80 *d++ = *s++; 81 len--; 82 } 83 } 84 return (dst); 85} 86 87static __inline void 88bzero(void *addr, int count) 89{ 90 char *tmp = (char *)addr; 91 92 while (count > 0) { 93 if (count >= 4 && !((vm_offset_t)tmp & 3)) { 94 *(uint32_t *)tmp = 0; 95 tmp += 4; 96 count -= 4; 97 } else { 98 *tmp = 0; 99 tmp++; 100 count--; 101 } 102 } 103} 104 105void 106_start(void) 107{ 108 int physaddr = KERNPHYSADDR; 109 int tmp1; 110 111 __asm __volatile("adr %0, 2f\n" 112 "bic %0, %0, #0xff000000\n" 113 "bic sp, sp, #0xff000000\n" 114 "and %1, %1, #0xff000000\n" 115 "orr %0, %0, %1\n" 116 "orr sp, sp, %1\n" 117 "mrc p15, 0, %1, c1, c0, 0\n" 118 "bic %1, %1, #1\n" /* Disable MMU */ 119 "orr %1, %1, #(4 | 8)\n" /* Add DC enable, 120 WBUF enable */ 121 "orr %1, %1, #0x1000\n" /* Add IC enable */ 122 "orr %1, %1, #(0x800)\n" /* BPRD enable */ 123 124 "mcr p15, 0, %1, c1, c0, 0\n" 125 "nop\n" 126 "nop\n" 127 "nop\n" 128 "mov pc, %0\n" 129 "2: nop\n" 130 : "=r" (tmp1), "+r" (physaddr)); 131 __start(); 132} 133 134#ifdef KZIP 135static unsigned char *orig_input, *i_input, *i_output; 136 137 138static u_int memcnt; /* Memory allocated: blocks */ 139static size_t memtot; /* Memory allocated: bytes */ 140/* 141 * Library functions required by inflate(). 142 */ 143 144#define MEMSIZ 0x8000 145 146/* 147 * Allocate memory block. 148 */ 149unsigned char * 150kzipmalloc(int size) 151{ 152 void *ptr; 153 static u_char mem[MEMSIZ]; 154 155 if (memtot + size > MEMSIZ) 156 return NULL; 157 ptr = mem + memtot; 158 memtot += size; 159 memcnt++; 160 return ptr; 161} 162 163/* 164 * Free allocated memory block. 165 */ 166void 167kzipfree(void *ptr) 168{ 169 memcnt--; 170 if (!memcnt) 171 memtot = 0; 172} 173 174void 175putstr(char *dummy) 176{ 177} 178 179static int 180input(void *dummy) 181{ 182 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) { 183 return (GZ_EOF); 184 } 185 return *i_input++; 186} 187 188static int 189output(void *dummy, unsigned char *ptr, unsigned long len) 190{ 191 192 memcpy(i_output, ptr, len); 193 i_output += len; 194 return (0); 195} 196 197static void * 198inflate_kernel(void *kernel, void *startaddr) 199{ 200 struct inflate infl; 201 char slide[GZ_WSIZE]; 202 203 orig_input = kernel; 204 memcnt = memtot = 0; 205 i_input = (char *)kernel + GZ_HEAD; 206 if (((char *)kernel)[3] & 0x18) { 207 while (*i_input) 208 i_input++; 209 i_input++; 210 } 211 i_output = startaddr; 212 bzero(&infl, sizeof(infl)); 213 infl.gz_input = input; 214 infl.gz_output = output; 215 infl.gz_slide = slide; 216 inflate(&infl); 217 return ((char *)(((vm_offset_t)i_output & ~3) + 4)); 218} 219 220#endif 221 222void * 223load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, 224 int d) 225{ 226 Elf32_Ehdr *eh; 227 Elf32_Phdr phdr[512] /* XXX */, *php; 228 Elf32_Shdr shdr[512] /* XXX */; 229 int i,j; 230 void *entry_point; 231 int symtabindex = -1; 232 int symstrindex = -1; 233 vm_offset_t lastaddr = 0; 234 Elf_Addr ssym = 0, esym = 0; 235 Elf_Dyn *dp; 236 237 eh = (Elf32_Ehdr *)kstart; 238 ssym = esym = 0; 239 entry_point = (void*)eh->e_entry; 240 memcpy(phdr, (void *)(kstart + eh->e_phoff ), 241 eh->e_phnum * sizeof(phdr[0])); 242 243 /* Determine lastaddr. */ 244 for (i = 0; i < eh->e_phnum; i++) { 245 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr 246 + phdr[i].p_memsz)) 247 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR + 248 curaddr + phdr[i].p_memsz; 249 } 250 251 /* Save the symbol tables, as there're about to be scratched. */ 252 memcpy(shdr, (void *)(kstart + eh->e_shoff), 253 sizeof(*shdr) * eh->e_shnum); 254 if (eh->e_shnum * eh->e_shentsize != 0 && 255 eh->e_shoff != 0) { 256 for (i = 0; i < eh->e_shnum; i++) { 257 if (shdr[i].sh_type == SHT_SYMTAB) { 258 for (j = 0; j < eh->e_phnum; j++) { 259 if (phdr[j].p_type == PT_LOAD && 260 shdr[i].sh_offset >= 261 phdr[j].p_offset && 262 (shdr[i].sh_offset + 263 shdr[i].sh_size <= 264 phdr[j].p_offset + 265 phdr[j].p_filesz)) { 266 shdr[i].sh_offset = 0; 267 shdr[i].sh_size = 0; 268 j = eh->e_phnum; 269 } 270 } 271 if (shdr[i].sh_offset != 0 && 272 shdr[i].sh_size != 0) { 273 symtabindex = i; 274 symstrindex = shdr[i].sh_link; 275 } 276 } 277 } 278 func_end = roundup(func_end, sizeof(long)); 279 if (symtabindex >= 0 && symstrindex >= 0) { 280 ssym = lastaddr; 281 if (d) { 282 memcpy((void *)func_end, (void *)( 283 shdr[symtabindex].sh_offset + kstart), 284 shdr[symtabindex].sh_size); 285 memcpy((void *)(func_end + 286 shdr[symtabindex].sh_size), 287 (void *)(shdr[symstrindex].sh_offset + 288 kstart), shdr[symstrindex].sh_size); 289 } else { 290 lastaddr += shdr[symtabindex].sh_size; 291 lastaddr = roundup(lastaddr, 292 sizeof(shdr[symtabindex].sh_size)); 293 lastaddr += sizeof(shdr[symstrindex].sh_size); 294 lastaddr += shdr[symstrindex].sh_size; 295 lastaddr = roundup(lastaddr, 296 sizeof(shdr[symstrindex].sh_size)); 297 } 298 299 } 300 } 301 if (!d) 302 return ((void *)lastaddr); 303 304 j = eh->e_phnum; 305 for (i = 0; i < j; i++) { 306 volatile char c; 307 308 if (phdr[i].p_type != PT_LOAD) 309 continue; 310 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr), 311 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz); 312 /* Clean space from oversized segments, eg: bss. */ 313 if (phdr[i].p_filesz < phdr[i].p_memsz) 314 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR + 315 curaddr + phdr[i].p_filesz), phdr[i].p_memsz - 316 phdr[i].p_filesz); 317 } 318 /* Now grab the symbol tables. */ 319 if (symtabindex >= 0 && symstrindex >= 0) { 320 *(Elf_Size *)lastaddr = 321 shdr[symtabindex].sh_size; 322 lastaddr += sizeof(shdr[symtabindex].sh_size); 323 memcpy((void*)lastaddr, 324 (void *)func_end, 325 shdr[symtabindex].sh_size); 326 lastaddr += shdr[symtabindex].sh_size; 327 lastaddr = roundup(lastaddr, 328 sizeof(shdr[symtabindex].sh_size)); 329 *(Elf_Size *)lastaddr = 330 shdr[symstrindex].sh_size; 331 lastaddr += sizeof(shdr[symstrindex].sh_size); 332 memcpy((void*)lastaddr, 333 (void*)(func_end + 334 shdr[symtabindex].sh_size), 335 shdr[symstrindex].sh_size); 336 lastaddr += shdr[symstrindex].sh_size; 337 lastaddr = roundup(lastaddr, 338 sizeof(shdr[symstrindex].sh_size)); 339 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER; 340 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR; 341 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR; 342 } else 343 *(Elf_Addr *)curaddr = 0; 344 /* Invalidate the instruction cache. */ 345 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n" 346 "mcr p15, 0, %0, c7, c10, 4\n" 347 : : "r" (curaddr)); 348 /* Jump to the entry point. */ 349 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))(); 350 __asm __volatile(".globl func_end\n" 351 "func_end:"); 352 353} 354 355extern char func_end[]; 356 357extern void *_end; 358 359#define PMAP_DOMAIN_KERNEL 15 /* 360 * Just define it instead of including the 361 * whole VM headers set. 362 */ 363int __hack; 364static __inline void 365setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend) 366{ 367 unsigned int *pd = (unsigned int *)pt_addr; 368 vm_paddr_t addr; 369 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT; 370 int tmp; 371 372 bzero(pd, L1_TABLE_SIZE); 373 for (addr = physstart; addr < physend; addr += L1_S_SIZE) 374 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)| 375 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr; 376 /* XXX: See below */ 377 if (0xfff00000 < physstart || 0xfff00000 > physend) 378 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)| 379 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart; 380 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */ 381 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */ 382 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */ 383 "mrc p15, 0, %0, c1, c0, 0\n" 384 "orr %0, %0, #1\n" /* MMU_ENABLE */ 385 "mcr p15, 0, %0, c1, c0, 0\n" 386 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */ 387 "mov r0, r0\n" 388 "sub pc, pc, #4\n" : 389 "=r" (tmp) : "r" (pd), "r" (domain)); 390 391 /* 392 * XXX: This is the most stupid workaround I've ever wrote. 393 * For some reason, the KB9202 won't boot the kernel unless 394 * we access an address which is not in the 395 * 0x20000000 - 0x20ffffff range. I hope I'll understand 396 * what's going on later. 397 */ 398 __hack = *(volatile int *)0xfffff21c; 399} 400 401void 402__start(void) 403{ 404 void *curaddr; 405 void *dst, *altdst; 406 char *kernel = (char *)&kernel_start; 407 408 __asm __volatile("mov %0, pc" : 409 "=r" (curaddr)); 410 curaddr = (void*)((unsigned int)curaddr & 0xfff00000); 411#ifdef KZIP 412 if (*kernel == 0x1f && kernel[1] == 0x8b) { 413 int pt_addr = (((int)&_end + KERNSIZE + 0x100) & 414 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 415 416 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 417 (vm_paddr_t)curaddr + 0x10000000); 418 /* Gzipped kernel */ 419 dst = inflate_kernel(kernel, &_end); 420 kernel = (char *)&_end; 421 altdst = 4 + load_kernel((unsigned int)kernel, 422 (unsigned int)curaddr, 423 (unsigned int)&func_end , 0); 424 if (altdst > dst) 425 dst = altdst; 426 cpu_idcache_wbinv_all(); 427 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 428 "bic %0, %0, #1\n" /* MMU_ENABLE */ 429 "mcr p15, 0, %0, c1, c0, 0\n" 430 : "=r" (pt_addr)); 431 } else 432#endif 433 dst = 4 + load_kernel((unsigned int)&kernel_start, 434 (unsigned int)curaddr, 435 (unsigned int)&func_end, 0); 436 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end - 437 (unsigned int)&load_kernel); 438 ((void (*)())dst)((unsigned int)kernel, 439 (unsigned int)curaddr, 440 dst + (unsigned int)(&func_end) - 441 (unsigned int)(&load_kernel), 1); 442} 443