elf_trampoline.c revision 159901
1/*- 2 * Copyright (c) 2005 Olivier Houchard. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25#include <sys/cdefs.h> 26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 159901 2006-06-23 22:45:35Z cognet $"); 27#include <machine/asm.h> 28#include <sys/types.h> 29#include <sys/elf32.h> 30#include <sys/param.h> 31#include <sys/inflate.h> 32#include <machine/elf.h> 33#include <machine/pte.h> 34#include <machine/cpufunc.h> 35#include <machine/armreg.h> 36 37#include <stdlib.h> 38 39#include "opt_global.h" 40#include "opt_kernname.h" 41 42extern char kernel_start[]; 43extern char kernel_end[]; 44 45extern void *_end; 46 47void __start(void); 48 49#define GZ_HEAD 0xa 50 51#ifdef CPU_ARM7TDMI 52#define cpu_idcache_wbinv_all arm7tdmi_cache_flushID 53#elif defined(CPU_ARM8) 54#define cpu_idcache_wbinv_all arm8_cache_purgeID 55#elif defined(CPU_ARM9) 56#define cpu_idcache_wbinv_all arm9_idcache_wbinv_all 57#elif defined(CPU_ARM10) 58#define cpu_idcache_wbinv_all arm10_idcache_wbinv_all 59#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \ 60 defined(CPU_IXP12X0) 61#define cpu_idcache_wbinv_all sa1_cache_purgeID 62#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ 63 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) 64#define cpu_idcache_wbinv_all xscale_cache_purgeID 65#endif 66 67 68int arm_picache_size; 69int arm_picache_line_size; 70int arm_picache_ways; 71 72int arm_pdcache_size; /* and unified */ 73int arm_pdcache_line_size = 32; 74int arm_pdcache_ways; 75 76int arm_pcache_type; 77int arm_pcache_unified; 78 79int arm_dcache_align; 80int arm_dcache_align_mask; 81 82/* Additional cache information local to this file. Log2 of some of the 83 above numbers. */ 84static int arm_dcache_l2_nsets; 85static int arm_dcache_l2_assoc; 86static int arm_dcache_l2_linesize; 87 88 89int block_userspace_access = 0; 90extern int arm9_dcache_sets_inc; 91extern int arm9_dcache_sets_max; 92extern int arm9_dcache_index_max; 93extern int arm9_dcache_index_inc; 94 95static __inline void * 96memcpy(void *dst, const void *src, int len) 97{ 98 const char *s = src; 99 char *d = dst; 100 101 while (len) { 102 if (len >= 4 && !((vm_offset_t)d & 3) && 103 !((vm_offset_t)s & 3)) { 104 *(uint32_t *)d = *(uint32_t *)s; 105 s += 4; 106 d += 4; 107 len -= 4; 108 } else { 109 *d++ = *s++; 110 len--; 111 } 112 } 113 return (dst); 114} 115 116static __inline void 117bzero(void *addr, int count) 118{ 119 char *tmp = (char *)addr; 120 121 while (count > 0) { 122 if (count >= 4 && !((vm_offset_t)tmp & 3)) { 123 *(uint32_t *)tmp = 0; 124 tmp += 4; 125 count -= 4; 126 } else { 127 *tmp = 0; 128 tmp++; 129 count--; 130 } 131 } 132} 133 134static void arm9_setup(void); 135 136void 137_start(void) 138{ 139 int physaddr = KERNPHYSADDR; 140 int tmp1; 141 unsigned int sp = ((unsigned int)&_end & ~3) + 4; 142#ifdef KZIP 143 sp += KERNSIZE + 0x100; 144 sp &= ~(L1_TABLE_SIZE - 1); 145 sp += 2 * L1_TABLE_SIZE; 146#endif 147 sp += 1024 * 1024; /* Should be enough for a stack */ 148 149 __asm __volatile("adr %0, 2f\n" 150 "bic %0, %0, #0xff000000\n" 151 "and %1, %1, #0xff000000\n" 152 "orr %0, %0, %1\n" 153 "mrc p15, 0, %1, c1, c0, 0\n" 154 "bic %1, %1, #1\n" /* Disable MMU */ 155 "orr %1, %1, #(4 | 8)\n" /* Add DC enable, 156 WBUF enable */ 157 "orr %1, %1, #0x1000\n" /* Add IC enable */ 158 "orr %1, %1, #(0x800)\n" /* BPRD enable */ 159 160 "mcr p15, 0, %1, c1, c0, 0\n" 161 "nop\n" 162 "nop\n" 163 "nop\n" 164 "mov pc, %0\n" 165 "2: nop\n" 166 "mov sp, %2\n" 167 : "=r" (tmp1), "+r" (physaddr), "+r" (sp)); 168#ifndef KZIP 169#ifdef CPU_ARM9 170 /* So that idcache_wbinv works; */ 171 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 172 arm9_setup(); 173#endif 174 cpu_idcache_wbinv_all(); 175#endif 176 __start(); 177} 178 179static void 180get_cachetype_cp15() 181{ 182 u_int ctype, isize, dsize; 183 u_int multiplier; 184 185 __asm __volatile("mrc p15, 0, %0, c0, c0, 1" 186 : "=r" (ctype)); 187 188 /* 189 * ...and thus spake the ARM ARM: 190 * 191 * If an <opcode2> value corresponding to an unimplemented or 192 * reserved ID register is encountered, the System Control 193 * processor returns the value of the main ID register. 194 */ 195 if (ctype == cpufunc_id()) 196 goto out; 197 198 if ((ctype & CPU_CT_S) == 0) 199 arm_pcache_unified = 1; 200 201 /* 202 * If you want to know how this code works, go read the ARM ARM. 203 */ 204 205 arm_pcache_type = CPU_CT_CTYPE(ctype); 206 if (arm_pcache_unified == 0) { 207 isize = CPU_CT_ISIZE(ctype); 208 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2; 209 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3); 210 if (CPU_CT_xSIZE_ASSOC(isize) == 0) { 211 if (isize & CPU_CT_xSIZE_M) 212 arm_picache_line_size = 0; /* not present */ 213 else 214 arm_picache_ways = 1; 215 } else { 216 arm_picache_ways = multiplier << 217 (CPU_CT_xSIZE_ASSOC(isize) - 1); 218 } 219 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8); 220 } 221 222 dsize = CPU_CT_DSIZE(ctype); 223 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2; 224 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3); 225 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) { 226 if (dsize & CPU_CT_xSIZE_M) 227 arm_pdcache_line_size = 0; /* not present */ 228 else 229 arm_pdcache_ways = 1; 230 } else { 231 arm_pdcache_ways = multiplier << 232 (CPU_CT_xSIZE_ASSOC(dsize) - 1); 233 } 234 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8); 235 236 arm_dcache_align = arm_pdcache_line_size; 237 238 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2; 239 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3; 240 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) - 241 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize); 242 out: 243 arm_dcache_align_mask = arm_dcache_align - 1; 244} 245 246static void 247arm9_setup(void) 248{ 249 250 get_cachetype_cp15(); 251 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize; 252 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize + 253 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc; 254 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc); 255 arm9_dcache_index_max = 0U - arm9_dcache_index_inc; 256} 257 258 259#ifdef KZIP 260static unsigned char *orig_input, *i_input, *i_output; 261 262 263static u_int memcnt; /* Memory allocated: blocks */ 264static size_t memtot; /* Memory allocated: bytes */ 265/* 266 * Library functions required by inflate(). 267 */ 268 269#define MEMSIZ 0x8000 270 271/* 272 * Allocate memory block. 273 */ 274unsigned char * 275kzipmalloc(int size) 276{ 277 void *ptr; 278 static u_char mem[MEMSIZ]; 279 280 if (memtot + size > MEMSIZ) 281 return NULL; 282 ptr = mem + memtot; 283 memtot += size; 284 memcnt++; 285 return ptr; 286} 287 288/* 289 * Free allocated memory block. 290 */ 291void 292kzipfree(void *ptr) 293{ 294 memcnt--; 295 if (!memcnt) 296 memtot = 0; 297} 298 299void 300putstr(char *dummy) 301{ 302} 303 304static int 305input(void *dummy) 306{ 307 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) { 308 return (GZ_EOF); 309 } 310 return *i_input++; 311} 312 313static int 314output(void *dummy, unsigned char *ptr, unsigned long len) 315{ 316 317 318 memcpy(i_output, ptr, len); 319 i_output += len; 320 return (0); 321} 322 323static void * 324inflate_kernel(void *kernel, void *startaddr) 325{ 326 struct inflate infl; 327 char slide[GZ_WSIZE]; 328 329 orig_input = kernel; 330 memcnt = memtot = 0; 331 i_input = (char *)kernel + GZ_HEAD; 332 if (((char *)kernel)[3] & 0x18) { 333 while (*i_input) 334 i_input++; 335 i_input++; 336 } 337 i_output = startaddr; 338 bzero(&infl, sizeof(infl)); 339 infl.gz_input = input; 340 infl.gz_output = output; 341 infl.gz_slide = slide; 342 inflate(&infl); 343 return ((char *)(((vm_offset_t)i_output & ~3) + 4)); 344} 345 346#endif 347 348void * 349load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, 350 int d) 351{ 352 Elf32_Ehdr *eh; 353 Elf32_Phdr phdr[64] /* XXX */, *php; 354 Elf32_Shdr shdr[64] /* XXX */; 355 int i,j; 356 void *entry_point; 357 int symtabindex = -1; 358 int symstrindex = -1; 359 vm_offset_t lastaddr = 0; 360 Elf_Addr ssym = 0, esym = 0; 361 Elf_Dyn *dp; 362 363 eh = (Elf32_Ehdr *)kstart; 364 ssym = esym = 0; 365 entry_point = (void*)eh->e_entry; 366 memcpy(phdr, (void *)(kstart + eh->e_phoff ), 367 eh->e_phnum * sizeof(phdr[0])); 368 369 /* Determine lastaddr. */ 370 for (i = 0; i < eh->e_phnum; i++) { 371 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr 372 + phdr[i].p_memsz)) 373 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR + 374 curaddr + phdr[i].p_memsz; 375 } 376 377 /* Save the symbol tables, as there're about to be scratched. */ 378 memcpy(shdr, (void *)(kstart + eh->e_shoff), 379 sizeof(*shdr) * eh->e_shnum); 380 if (eh->e_shnum * eh->e_shentsize != 0 && 381 eh->e_shoff != 0) { 382 for (i = 0; i < eh->e_shnum; i++) { 383 if (shdr[i].sh_type == SHT_SYMTAB) { 384 for (j = 0; j < eh->e_phnum; j++) { 385 if (phdr[j].p_type == PT_LOAD && 386 shdr[i].sh_offset >= 387 phdr[j].p_offset && 388 (shdr[i].sh_offset + 389 shdr[i].sh_size <= 390 phdr[j].p_offset + 391 phdr[j].p_filesz)) { 392 shdr[i].sh_offset = 0; 393 shdr[i].sh_size = 0; 394 j = eh->e_phnum; 395 } 396 } 397 if (shdr[i].sh_offset != 0 && 398 shdr[i].sh_size != 0) { 399 symtabindex = i; 400 symstrindex = shdr[i].sh_link; 401 } 402 } 403 } 404 func_end = roundup(func_end, sizeof(long)); 405 if (symtabindex >= 0 && symstrindex >= 0) { 406 ssym = lastaddr; 407 if (d) { 408 memcpy((void *)func_end, (void *)( 409 shdr[symtabindex].sh_offset + kstart), 410 shdr[symtabindex].sh_size); 411 memcpy((void *)(func_end + 412 shdr[symtabindex].sh_size), 413 (void *)(shdr[symstrindex].sh_offset + 414 kstart), shdr[symstrindex].sh_size); 415 } else { 416 lastaddr += shdr[symtabindex].sh_size; 417 lastaddr = roundup(lastaddr, 418 sizeof(shdr[symtabindex].sh_size)); 419 lastaddr += sizeof(shdr[symstrindex].sh_size); 420 lastaddr += shdr[symstrindex].sh_size; 421 lastaddr = roundup(lastaddr, 422 sizeof(shdr[symstrindex].sh_size)); 423 } 424 425 } 426 } 427 if (!d) 428 return ((void *)lastaddr); 429 430 j = eh->e_phnum; 431 for (i = 0; i < j; i++) { 432 volatile char c; 433 434 if (phdr[i].p_type != PT_LOAD) 435 continue; 436 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr), 437 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz); 438 /* Clean space from oversized segments, eg: bss. */ 439 if (phdr[i].p_filesz < phdr[i].p_memsz) 440 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR + 441 curaddr + phdr[i].p_filesz), phdr[i].p_memsz - 442 phdr[i].p_filesz); 443 } 444 /* Now grab the symbol tables. */ 445 if (symtabindex >= 0 && symstrindex >= 0) { 446 *(Elf_Size *)lastaddr = 447 shdr[symtabindex].sh_size; 448 lastaddr += sizeof(shdr[symtabindex].sh_size); 449 memcpy((void*)lastaddr, 450 (void *)func_end, 451 shdr[symtabindex].sh_size); 452 lastaddr += shdr[symtabindex].sh_size; 453 lastaddr = roundup(lastaddr, 454 sizeof(shdr[symtabindex].sh_size)); 455 *(Elf_Size *)lastaddr = 456 shdr[symstrindex].sh_size; 457 lastaddr += sizeof(shdr[symstrindex].sh_size); 458 memcpy((void*)lastaddr, 459 (void*)(func_end + 460 shdr[symtabindex].sh_size), 461 shdr[symstrindex].sh_size); 462 lastaddr += shdr[symstrindex].sh_size; 463 lastaddr = roundup(lastaddr, 464 sizeof(shdr[symstrindex].sh_size)); 465 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER; 466 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR; 467 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR; 468 } else 469 *(Elf_Addr *)curaddr = 0; 470 /* Invalidate the instruction cache. */ 471 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n" 472 "mcr p15, 0, %0, c7, c10, 4\n" 473 : : "r" (curaddr)); 474 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 475 "bic %0, %0, #1\n" /* MMU_ENABLE */ 476 "mcr p15, 0, %0, c1, c0, 0\n" 477 : "=r" (ssym)); 478 /* Jump to the entry point. */ 479 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))(); 480 __asm __volatile(".globl func_end\n" 481 "func_end:"); 482 483} 484 485extern char func_end[]; 486 487 488#define PMAP_DOMAIN_KERNEL 15 /* 489 * Just define it instead of including the 490 * whole VM headers set. 491 */ 492int __hack; 493static __inline void 494setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend, 495 int write_back) 496{ 497 unsigned int *pd = (unsigned int *)pt_addr; 498 vm_paddr_t addr; 499 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT; 500 int tmp; 501 502 bzero(pd, L1_TABLE_SIZE); 503 for (addr = physstart; addr < physend; addr += L1_S_SIZE) { 504 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)| 505 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr; 506 if (write_back) 507 pd[addr >> L1_S_SHIFT] |= L1_S_B; 508 } 509 /* XXX: See below */ 510 if (0xfff00000 < physstart || 0xfff00000 > physend) 511 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)| 512 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart; 513 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */ 514 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */ 515 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */ 516 "mrc p15, 0, %0, c1, c0, 0\n" 517 "orr %0, %0, #1\n" /* MMU_ENABLE */ 518 "mcr p15, 0, %0, c1, c0, 0\n" 519 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */ 520 "mov r0, r0\n" 521 "sub pc, pc, #4\n" : 522 "=r" (tmp) : "r" (pd), "r" (domain)); 523 524 /* 525 * XXX: This is the most stupid workaround I've ever wrote. 526 * For some reason, the KB9202 won't boot the kernel unless 527 * we access an address which is not in the 528 * 0x20000000 - 0x20ffffff range. I hope I'll understand 529 * what's going on later. 530 */ 531 __hack = *(volatile int *)0xfffff21c; 532} 533 534void 535__start(void) 536{ 537 void *curaddr; 538 void *dst, *altdst; 539 char *kernel = (char *)&kernel_start; 540 int sp; 541 int pt_addr; 542 543 __asm __volatile("mov %0, pc" : 544 "=r" (curaddr)); 545 curaddr = (void*)((unsigned int)curaddr & 0xfff00000); 546#ifdef KZIP 547 if (*kernel == 0x1f && kernel[1] == 0x8b) { 548 pt_addr = (((int)&_end + KERNSIZE + 0x100) & 549 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 550 551#ifdef CPU_ARM9 552 /* So that idcache_wbinv works; */ 553 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 554 arm9_setup(); 555#endif 556 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 557 (vm_paddr_t)curaddr + 0x10000000, 1); 558 /* Gzipped kernel */ 559 dst = inflate_kernel(kernel, &_end); 560 kernel = (char *)&_end; 561 altdst = 4 + load_kernel((unsigned int)kernel, 562 (unsigned int)curaddr, 563 (unsigned int)&func_end , 0); 564 if (altdst > dst) 565 dst = altdst; 566 cpu_idcache_wbinv_all(); 567 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 568 "bic %0, %0, #1\n" /* MMU_ENABLE */ 569 "mcr p15, 0, %0, c1, c0, 0\n" 570 : "=r" (pt_addr)); 571 } else 572#endif 573 dst = 4 + load_kernel((unsigned int)&kernel_start, 574 (unsigned int)curaddr, 575 (unsigned int)&func_end, 0); 576 dst = (void *)(((vm_offset_t)dst & ~3)); 577 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 578 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 579 (vm_paddr_t)curaddr + 0x10000000, 0); 580 sp = pt_addr + L1_TABLE_SIZE + 8192; 581 sp = sp &~3; 582 dst = (void *)(sp + 4); 583 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end - 584 (unsigned int)&load_kernel); 585 do_call(dst, kernel, dst + (unsigned int)(&func_end) - 586 (unsigned int)(&load_kernel), sp); 587} 588