elf_trampoline.c revision 163810
1/*- 2 * Copyright (c) 2005 Olivier Houchard. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25#include <sys/cdefs.h> 26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 163810 2006-10-30 23:23:00Z cognet $"); 27#include <machine/asm.h> 28#include <sys/types.h> 29#include <sys/elf32.h> 30#include <sys/param.h> 31#include <sys/inflate.h> 32#include <machine/elf.h> 33#include <machine/pte.h> 34#include <machine/cpufunc.h> 35#include <machine/armreg.h> 36 37#include <stdlib.h> 38 39/* 40 * Since we are compiled outside of the normal kernel build process, we 41 * need to include opt_global.h manually. 42 */ 43#include "opt_global.h" 44#include "opt_kernname.h" 45 46extern char kernel_start[]; 47extern char kernel_end[]; 48 49extern void *_end; 50 51void __start(void); 52 53#define GZ_HEAD 0xa 54 55#ifdef CPU_ARM7TDMI 56#define cpu_idcache_wbinv_all arm7tdmi_cache_flushID 57#elif defined(CPU_ARM8) 58#define cpu_idcache_wbinv_all arm8_cache_purgeID 59#elif defined(CPU_ARM9) 60#define cpu_idcache_wbinv_all arm9_idcache_wbinv_all 61#elif defined(CPU_ARM10) 62#define cpu_idcache_wbinv_all arm10_idcache_wbinv_all 63#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \ 64 defined(CPU_IXP12X0) 65#define cpu_idcache_wbinv_all sa1_cache_purgeID 66#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ 67 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ 68 defined(CPU_XSCALE_80219) 69#define cpu_idcache_wbinv_all xscale_cache_purgeID 70#endif 71 72 73int arm_picache_size; 74int arm_picache_line_size; 75int arm_picache_ways; 76 77int arm_pdcache_size; /* and unified */ 78int arm_pdcache_line_size = 32; 79int arm_pdcache_ways; 80 81int arm_pcache_type; 82int arm_pcache_unified; 83 84int arm_dcache_align; 85int arm_dcache_align_mask; 86 87/* Additional cache information local to this file. Log2 of some of the 88 above numbers. */ 89static int arm_dcache_l2_nsets; 90static int arm_dcache_l2_assoc; 91static int arm_dcache_l2_linesize; 92 93 94int block_userspace_access = 0; 95extern int arm9_dcache_sets_inc; 96extern int arm9_dcache_sets_max; 97extern int arm9_dcache_index_max; 98extern int arm9_dcache_index_inc; 99 100static __inline void * 101memcpy(void *dst, const void *src, int len) 102{ 103 const char *s = src; 104 char *d = dst; 105 106 while (len) { 107 if (len >= 4 && !((vm_offset_t)d & 3) && 108 !((vm_offset_t)s & 3)) { 109 *(uint32_t *)d = *(uint32_t *)s; 110 s += 4; 111 d += 4; 112 len -= 4; 113 } else { 114 *d++ = *s++; 115 len--; 116 } 117 } 118 return (dst); 119} 120 121static __inline void 122bzero(void *addr, int count) 123{ 124 char *tmp = (char *)addr; 125 126 while (count > 0) { 127 if (count >= 4 && !((vm_offset_t)tmp & 3)) { 128 *(uint32_t *)tmp = 0; 129 tmp += 4; 130 count -= 4; 131 } else { 132 *tmp = 0; 133 tmp++; 134 count--; 135 } 136 } 137} 138 139static void arm9_setup(void); 140 141void 142_start(void) 143{ 144 int physaddr = KERNPHYSADDR; 145 int tmp1; 146 unsigned int sp = ((unsigned int)&_end & ~3) + 4; 147#ifdef KZIP 148 sp += KERNSIZE + 0x100; 149 sp &= ~(L1_TABLE_SIZE - 1); 150 sp += 2 * L1_TABLE_SIZE; 151#endif 152 sp += 1024 * 1024; /* Should be enough for a stack */ 153 154 __asm __volatile("adr %0, 2f\n" 155 "bic %0, %0, #0xff000000\n" 156 "and %1, %1, #0xff000000\n" 157 "orr %0, %0, %1\n" 158 "mrc p15, 0, %1, c1, c0, 0\n" 159 "bic %1, %1, #1\n" /* Disable MMU */ 160 "orr %1, %1, #(4 | 8)\n" /* Add DC enable, 161 WBUF enable */ 162 "orr %1, %1, #0x1000\n" /* Add IC enable */ 163 "orr %1, %1, #(0x800)\n" /* BPRD enable */ 164 165 "mcr p15, 0, %1, c1, c0, 0\n" 166 "nop\n" 167 "nop\n" 168 "nop\n" 169 "mov pc, %0\n" 170 "2: nop\n" 171 "mov sp, %2\n" 172 : "=r" (tmp1), "+r" (physaddr), "+r" (sp)); 173#ifndef KZIP 174#ifdef CPU_ARM9 175 /* So that idcache_wbinv works; */ 176 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 177 arm9_setup(); 178#endif 179 cpu_idcache_wbinv_all(); 180#endif 181 __start(); 182} 183 184static void 185get_cachetype_cp15() 186{ 187 u_int ctype, isize, dsize; 188 u_int multiplier; 189 190 __asm __volatile("mrc p15, 0, %0, c0, c0, 1" 191 : "=r" (ctype)); 192 193 /* 194 * ...and thus spake the ARM ARM: 195 * 196 * If an <opcode2> value corresponding to an unimplemented or 197 * reserved ID register is encountered, the System Control 198 * processor returns the value of the main ID register. 199 */ 200 if (ctype == cpufunc_id()) 201 goto out; 202 203 if ((ctype & CPU_CT_S) == 0) 204 arm_pcache_unified = 1; 205 206 /* 207 * If you want to know how this code works, go read the ARM ARM. 208 */ 209 210 arm_pcache_type = CPU_CT_CTYPE(ctype); 211 if (arm_pcache_unified == 0) { 212 isize = CPU_CT_ISIZE(ctype); 213 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2; 214 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3); 215 if (CPU_CT_xSIZE_ASSOC(isize) == 0) { 216 if (isize & CPU_CT_xSIZE_M) 217 arm_picache_line_size = 0; /* not present */ 218 else 219 arm_picache_ways = 1; 220 } else { 221 arm_picache_ways = multiplier << 222 (CPU_CT_xSIZE_ASSOC(isize) - 1); 223 } 224 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8); 225 } 226 227 dsize = CPU_CT_DSIZE(ctype); 228 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2; 229 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3); 230 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) { 231 if (dsize & CPU_CT_xSIZE_M) 232 arm_pdcache_line_size = 0; /* not present */ 233 else 234 arm_pdcache_ways = 1; 235 } else { 236 arm_pdcache_ways = multiplier << 237 (CPU_CT_xSIZE_ASSOC(dsize) - 1); 238 } 239 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8); 240 241 arm_dcache_align = arm_pdcache_line_size; 242 243 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2; 244 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3; 245 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) - 246 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize); 247 out: 248 arm_dcache_align_mask = arm_dcache_align - 1; 249} 250 251static void 252arm9_setup(void) 253{ 254 255 get_cachetype_cp15(); 256 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize; 257 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize + 258 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc; 259 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc); 260 arm9_dcache_index_max = 0U - arm9_dcache_index_inc; 261} 262 263 264#ifdef KZIP 265static unsigned char *orig_input, *i_input, *i_output; 266 267 268static u_int memcnt; /* Memory allocated: blocks */ 269static size_t memtot; /* Memory allocated: bytes */ 270/* 271 * Library functions required by inflate(). 272 */ 273 274#define MEMSIZ 0x8000 275 276/* 277 * Allocate memory block. 278 */ 279unsigned char * 280kzipmalloc(int size) 281{ 282 void *ptr; 283 static u_char mem[MEMSIZ]; 284 285 if (memtot + size > MEMSIZ) 286 return NULL; 287 ptr = mem + memtot; 288 memtot += size; 289 memcnt++; 290 return ptr; 291} 292 293/* 294 * Free allocated memory block. 295 */ 296void 297kzipfree(void *ptr) 298{ 299 memcnt--; 300 if (!memcnt) 301 memtot = 0; 302} 303 304void 305putstr(char *dummy) 306{ 307} 308 309static int 310input(void *dummy) 311{ 312 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) { 313 return (GZ_EOF); 314 } 315 return *i_input++; 316} 317 318static int 319output(void *dummy, unsigned char *ptr, unsigned long len) 320{ 321 322 323 memcpy(i_output, ptr, len); 324 i_output += len; 325 return (0); 326} 327 328static void * 329inflate_kernel(void *kernel, void *startaddr) 330{ 331 struct inflate infl; 332 char slide[GZ_WSIZE]; 333 334 orig_input = kernel; 335 memcnt = memtot = 0; 336 i_input = (char *)kernel + GZ_HEAD; 337 if (((char *)kernel)[3] & 0x18) { 338 while (*i_input) 339 i_input++; 340 i_input++; 341 } 342 i_output = startaddr; 343 bzero(&infl, sizeof(infl)); 344 infl.gz_input = input; 345 infl.gz_output = output; 346 infl.gz_slide = slide; 347 inflate(&infl); 348 return ((char *)(((vm_offset_t)i_output & ~3) + 4)); 349} 350 351#endif 352 353void * 354load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, 355 int d) 356{ 357 Elf32_Ehdr *eh; 358 Elf32_Phdr phdr[64] /* XXX */, *php; 359 Elf32_Shdr shdr[64] /* XXX */; 360 int i,j; 361 void *entry_point; 362 int symtabindex = -1; 363 int symstrindex = -1; 364 vm_offset_t lastaddr = 0; 365 Elf_Addr ssym = 0, esym = 0; 366 Elf_Dyn *dp; 367 368 eh = (Elf32_Ehdr *)kstart; 369 ssym = esym = 0; 370 entry_point = (void*)eh->e_entry; 371 memcpy(phdr, (void *)(kstart + eh->e_phoff ), 372 eh->e_phnum * sizeof(phdr[0])); 373 374 /* Determine lastaddr. */ 375 for (i = 0; i < eh->e_phnum; i++) { 376 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr 377 + phdr[i].p_memsz)) 378 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR + 379 curaddr + phdr[i].p_memsz; 380 } 381 382 /* Save the symbol tables, as there're about to be scratched. */ 383 memcpy(shdr, (void *)(kstart + eh->e_shoff), 384 sizeof(*shdr) * eh->e_shnum); 385 if (eh->e_shnum * eh->e_shentsize != 0 && 386 eh->e_shoff != 0) { 387 for (i = 0; i < eh->e_shnum; i++) { 388 if (shdr[i].sh_type == SHT_SYMTAB) { 389 for (j = 0; j < eh->e_phnum; j++) { 390 if (phdr[j].p_type == PT_LOAD && 391 shdr[i].sh_offset >= 392 phdr[j].p_offset && 393 (shdr[i].sh_offset + 394 shdr[i].sh_size <= 395 phdr[j].p_offset + 396 phdr[j].p_filesz)) { 397 shdr[i].sh_offset = 0; 398 shdr[i].sh_size = 0; 399 j = eh->e_phnum; 400 } 401 } 402 if (shdr[i].sh_offset != 0 && 403 shdr[i].sh_size != 0) { 404 symtabindex = i; 405 symstrindex = shdr[i].sh_link; 406 } 407 } 408 } 409 func_end = roundup(func_end, sizeof(long)); 410 if (symtabindex >= 0 && symstrindex >= 0) { 411 ssym = lastaddr; 412 if (d) { 413 memcpy((void *)func_end, (void *)( 414 shdr[symtabindex].sh_offset + kstart), 415 shdr[symtabindex].sh_size); 416 memcpy((void *)(func_end + 417 shdr[symtabindex].sh_size), 418 (void *)(shdr[symstrindex].sh_offset + 419 kstart), shdr[symstrindex].sh_size); 420 } else { 421 lastaddr += shdr[symtabindex].sh_size; 422 lastaddr = roundup(lastaddr, 423 sizeof(shdr[symtabindex].sh_size)); 424 lastaddr += sizeof(shdr[symstrindex].sh_size); 425 lastaddr += shdr[symstrindex].sh_size; 426 lastaddr = roundup(lastaddr, 427 sizeof(shdr[symstrindex].sh_size)); 428 } 429 430 } 431 } 432 if (!d) 433 return ((void *)lastaddr); 434 435 j = eh->e_phnum; 436 for (i = 0; i < j; i++) { 437 volatile char c; 438 439 if (phdr[i].p_type != PT_LOAD) 440 continue; 441 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr), 442 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz); 443 /* Clean space from oversized segments, eg: bss. */ 444 if (phdr[i].p_filesz < phdr[i].p_memsz) 445 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR + 446 curaddr + phdr[i].p_filesz), phdr[i].p_memsz - 447 phdr[i].p_filesz); 448 } 449 /* Now grab the symbol tables. */ 450 if (symtabindex >= 0 && symstrindex >= 0) { 451 *(Elf_Size *)lastaddr = 452 shdr[symtabindex].sh_size; 453 lastaddr += sizeof(shdr[symtabindex].sh_size); 454 memcpy((void*)lastaddr, 455 (void *)func_end, 456 shdr[symtabindex].sh_size); 457 lastaddr += shdr[symtabindex].sh_size; 458 lastaddr = roundup(lastaddr, 459 sizeof(shdr[symtabindex].sh_size)); 460 *(Elf_Size *)lastaddr = 461 shdr[symstrindex].sh_size; 462 lastaddr += sizeof(shdr[symstrindex].sh_size); 463 memcpy((void*)lastaddr, 464 (void*)(func_end + 465 shdr[symtabindex].sh_size), 466 shdr[symstrindex].sh_size); 467 lastaddr += shdr[symstrindex].sh_size; 468 lastaddr = roundup(lastaddr, 469 sizeof(shdr[symstrindex].sh_size)); 470 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER; 471 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR; 472 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR; 473 } else 474 *(Elf_Addr *)curaddr = 0; 475 /* Invalidate the instruction cache. */ 476 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n" 477 "mcr p15, 0, %0, c7, c10, 4\n" 478 : : "r" (curaddr)); 479 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 480 "bic %0, %0, #1\n" /* MMU_ENABLE */ 481 "mcr p15, 0, %0, c1, c0, 0\n" 482 : "=r" (ssym)); 483 /* Jump to the entry point. */ 484 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))(); 485 __asm __volatile(".globl func_end\n" 486 "func_end:"); 487 488} 489 490extern char func_end[]; 491 492 493#define PMAP_DOMAIN_KERNEL 15 /* 494 * Just define it instead of including the 495 * whole VM headers set. 496 */ 497int __hack; 498static __inline void 499setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend, 500 int write_back) 501{ 502 unsigned int *pd = (unsigned int *)pt_addr; 503 vm_paddr_t addr; 504 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT; 505 int tmp; 506 507 bzero(pd, L1_TABLE_SIZE); 508 for (addr = physstart; addr < physend; addr += L1_S_SIZE) { 509 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)| 510 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr; 511 if (write_back) 512 pd[addr >> L1_S_SHIFT] |= L1_S_B; 513 } 514 /* XXX: See below */ 515 if (0xfff00000 < physstart || 0xfff00000 > physend) 516 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)| 517 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart; 518 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */ 519 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */ 520 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */ 521 "mrc p15, 0, %0, c1, c0, 0\n" 522 "orr %0, %0, #1\n" /* MMU_ENABLE */ 523 "mcr p15, 0, %0, c1, c0, 0\n" 524 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */ 525 "mov r0, r0\n" 526 "sub pc, pc, #4\n" : 527 "=r" (tmp) : "r" (pd), "r" (domain)); 528 529 /* 530 * XXX: This is the most stupid workaround I've ever wrote. 531 * For some reason, the KB9202 won't boot the kernel unless 532 * we access an address which is not in the 533 * 0x20000000 - 0x20ffffff range. I hope I'll understand 534 * what's going on later. 535 */ 536 __hack = *(volatile int *)0xfffff21c; 537} 538 539void 540__start(void) 541{ 542 void *curaddr; 543 void *dst, *altdst; 544 char *kernel = (char *)&kernel_start; 545 int sp; 546 int pt_addr; 547 548 __asm __volatile("mov %0, pc" : 549 "=r" (curaddr)); 550 curaddr = (void*)((unsigned int)curaddr & 0xfff00000); 551#ifdef KZIP 552 if (*kernel == 0x1f && kernel[1] == 0x8b) { 553 pt_addr = (((int)&_end + KERNSIZE + 0x100) & 554 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 555 556#ifdef CPU_ARM9 557 /* So that idcache_wbinv works; */ 558 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 559 arm9_setup(); 560#endif 561 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 562 (vm_paddr_t)curaddr + 0x10000000, 1); 563 /* Gzipped kernel */ 564 dst = inflate_kernel(kernel, &_end); 565 kernel = (char *)&_end; 566 altdst = 4 + load_kernel((unsigned int)kernel, 567 (unsigned int)curaddr, 568 (unsigned int)&func_end , 0); 569 if (altdst > dst) 570 dst = altdst; 571 cpu_idcache_wbinv_all(); 572 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 573 "bic %0, %0, #1\n" /* MMU_ENABLE */ 574 "mcr p15, 0, %0, c1, c0, 0\n" 575 : "=r" (pt_addr)); 576 } else 577#endif 578 dst = 4 + load_kernel((unsigned int)&kernel_start, 579 (unsigned int)curaddr, 580 (unsigned int)&func_end, 0); 581 dst = (void *)(((vm_offset_t)dst & ~3)); 582 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 583 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 584 (vm_paddr_t)curaddr + 0x10000000, 0); 585 sp = pt_addr + L1_TABLE_SIZE + 8192; 586 sp = sp &~3; 587 dst = (void *)(sp + 4); 588 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end - 589 (unsigned int)&load_kernel); 590 do_call(dst, kernel, dst + (unsigned int)(&func_end) - 591 (unsigned int)(&load_kernel), sp); 592} 593