elf_trampoline.c revision 163871
1/*- 2 * Copyright (c) 2005 Olivier Houchard. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 15 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 16 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 17 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 18 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 19 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 20 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 22 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25#include <sys/cdefs.h> 26__FBSDID("$FreeBSD: head/sys/arm/arm/elf_trampoline.c 163871 2006-11-01 12:41:43Z cognet $"); 27#include <machine/asm.h> 28#include <sys/param.h> 29#include <sys/elf32.h> 30#include <sys/inflate.h> 31#include <machine/elf.h> 32#include <machine/pte.h> 33#include <machine/cpufunc.h> 34#include <machine/armreg.h> 35 36#include <stdlib.h> 37 38/* 39 * Since we are compiled outside of the normal kernel build process, we 40 * need to include opt_global.h manually. 41 */ 42#include "opt_global.h" 43#include "opt_kernname.h" 44 45extern char kernel_start[]; 46extern char kernel_end[]; 47 48extern void *_end; 49 50void __start(void); 51 52#define GZ_HEAD 0xa 53 54#ifdef CPU_ARM7TDMI 55#define cpu_idcache_wbinv_all arm7tdmi_cache_flushID 56#elif defined(CPU_ARM8) 57#define cpu_idcache_wbinv_all arm8_cache_purgeID 58#elif defined(CPU_ARM9) 59#define cpu_idcache_wbinv_all arm9_idcache_wbinv_all 60#elif defined(CPU_ARM10) 61#define cpu_idcache_wbinv_all arm10_idcache_wbinv_all 62#elif defined(CPU_SA110) || defined(CPU_SA1110) || defined(CPU_SA1100) || \ 63 defined(CPU_IXP12X0) 64#define cpu_idcache_wbinv_all sa1_cache_purgeID 65#elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \ 66 defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) || \ 67 defined(CPU_XSCALE_80219) 68#define cpu_idcache_wbinv_all xscale_cache_purgeID 69#endif 70 71 72int arm_picache_size; 73int arm_picache_line_size; 74int arm_picache_ways; 75 76int arm_pdcache_size; /* and unified */ 77int arm_pdcache_line_size = 32; 78int arm_pdcache_ways; 79 80int arm_pcache_type; 81int arm_pcache_unified; 82 83int arm_dcache_align; 84int arm_dcache_align_mask; 85 86/* Additional cache information local to this file. Log2 of some of the 87 above numbers. */ 88static int arm_dcache_l2_nsets; 89static int arm_dcache_l2_assoc; 90static int arm_dcache_l2_linesize; 91 92 93int block_userspace_access = 0; 94extern int arm9_dcache_sets_inc; 95extern int arm9_dcache_sets_max; 96extern int arm9_dcache_index_max; 97extern int arm9_dcache_index_inc; 98 99static __inline void * 100memcpy(void *dst, const void *src, int len) 101{ 102 const char *s = src; 103 char *d = dst; 104 105 while (len) { 106 if (len >= 4 && !((vm_offset_t)d & 3) && 107 !((vm_offset_t)s & 3)) { 108 *(uint32_t *)d = *(uint32_t *)s; 109 s += 4; 110 d += 4; 111 len -= 4; 112 } else { 113 *d++ = *s++; 114 len--; 115 } 116 } 117 return (dst); 118} 119 120static __inline void 121bzero(void *addr, int count) 122{ 123 char *tmp = (char *)addr; 124 125 while (count > 0) { 126 if (count >= 4 && !((vm_offset_t)tmp & 3)) { 127 *(uint32_t *)tmp = 0; 128 tmp += 4; 129 count -= 4; 130 } else { 131 *tmp = 0; 132 tmp++; 133 count--; 134 } 135 } 136} 137 138static void arm9_setup(void); 139 140void 141_start(void) 142{ 143 int physaddr = KERNPHYSADDR; 144 int tmp1; 145 unsigned int sp = ((unsigned int)&_end & ~3) + 4; 146#ifdef KZIP 147 sp += KERNSIZE + 0x100; 148 sp &= ~(L1_TABLE_SIZE - 1); 149 sp += 2 * L1_TABLE_SIZE; 150#endif 151 sp += 1024 * 1024; /* Should be enough for a stack */ 152 153 __asm __volatile("adr %0, 2f\n" 154 "bic %0, %0, #0xff000000\n" 155 "and %1, %1, #0xff000000\n" 156 "orr %0, %0, %1\n" 157 "mrc p15, 0, %1, c1, c0, 0\n" 158 "bic %1, %1, #1\n" /* Disable MMU */ 159 "orr %1, %1, #(4 | 8)\n" /* Add DC enable, 160 WBUF enable */ 161 "orr %1, %1, #0x1000\n" /* Add IC enable */ 162 "orr %1, %1, #(0x800)\n" /* BPRD enable */ 163 164 "mcr p15, 0, %1, c1, c0, 0\n" 165 "nop\n" 166 "nop\n" 167 "nop\n" 168 "mov pc, %0\n" 169 "2: nop\n" 170 "mov sp, %2\n" 171 : "=r" (tmp1), "+r" (physaddr), "+r" (sp)); 172#ifndef KZIP 173#ifdef CPU_ARM9 174 /* So that idcache_wbinv works; */ 175 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 176 arm9_setup(); 177#endif 178 cpu_idcache_wbinv_all(); 179#endif 180 __start(); 181} 182 183static void 184get_cachetype_cp15() 185{ 186 u_int ctype, isize, dsize; 187 u_int multiplier; 188 189 __asm __volatile("mrc p15, 0, %0, c0, c0, 1" 190 : "=r" (ctype)); 191 192 /* 193 * ...and thus spake the ARM ARM: 194 * 195 * If an <opcode2> value corresponding to an unimplemented or 196 * reserved ID register is encountered, the System Control 197 * processor returns the value of the main ID register. 198 */ 199 if (ctype == cpufunc_id()) 200 goto out; 201 202 if ((ctype & CPU_CT_S) == 0) 203 arm_pcache_unified = 1; 204 205 /* 206 * If you want to know how this code works, go read the ARM ARM. 207 */ 208 209 arm_pcache_type = CPU_CT_CTYPE(ctype); 210 if (arm_pcache_unified == 0) { 211 isize = CPU_CT_ISIZE(ctype); 212 multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2; 213 arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3); 214 if (CPU_CT_xSIZE_ASSOC(isize) == 0) { 215 if (isize & CPU_CT_xSIZE_M) 216 arm_picache_line_size = 0; /* not present */ 217 else 218 arm_picache_ways = 1; 219 } else { 220 arm_picache_ways = multiplier << 221 (CPU_CT_xSIZE_ASSOC(isize) - 1); 222 } 223 arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8); 224 } 225 226 dsize = CPU_CT_DSIZE(ctype); 227 multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2; 228 arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3); 229 if (CPU_CT_xSIZE_ASSOC(dsize) == 0) { 230 if (dsize & CPU_CT_xSIZE_M) 231 arm_pdcache_line_size = 0; /* not present */ 232 else 233 arm_pdcache_ways = 1; 234 } else { 235 arm_pdcache_ways = multiplier << 236 (CPU_CT_xSIZE_ASSOC(dsize) - 1); 237 } 238 arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8); 239 240 arm_dcache_align = arm_pdcache_line_size; 241 242 arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2; 243 arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3; 244 arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) - 245 CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize); 246 out: 247 arm_dcache_align_mask = arm_dcache_align - 1; 248} 249 250static void 251arm9_setup(void) 252{ 253 254 get_cachetype_cp15(); 255 arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize; 256 arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize + 257 arm_dcache_l2_nsets)) - arm9_dcache_sets_inc; 258 arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc); 259 arm9_dcache_index_max = 0U - arm9_dcache_index_inc; 260} 261 262 263#ifdef KZIP 264static unsigned char *orig_input, *i_input, *i_output; 265 266 267static u_int memcnt; /* Memory allocated: blocks */ 268static size_t memtot; /* Memory allocated: bytes */ 269/* 270 * Library functions required by inflate(). 271 */ 272 273#define MEMSIZ 0x8000 274 275/* 276 * Allocate memory block. 277 */ 278unsigned char * 279kzipmalloc(int size) 280{ 281 void *ptr; 282 static u_char mem[MEMSIZ]; 283 284 if (memtot + size > MEMSIZ) 285 return NULL; 286 ptr = mem + memtot; 287 memtot += size; 288 memcnt++; 289 return ptr; 290} 291 292/* 293 * Free allocated memory block. 294 */ 295void 296kzipfree(void *ptr) 297{ 298 memcnt--; 299 if (!memcnt) 300 memtot = 0; 301} 302 303void 304putstr(char *dummy) 305{ 306} 307 308static int 309input(void *dummy) 310{ 311 if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) { 312 return (GZ_EOF); 313 } 314 return *i_input++; 315} 316 317static int 318output(void *dummy, unsigned char *ptr, unsigned long len) 319{ 320 321 322 memcpy(i_output, ptr, len); 323 i_output += len; 324 return (0); 325} 326 327static void * 328inflate_kernel(void *kernel, void *startaddr) 329{ 330 struct inflate infl; 331 char slide[GZ_WSIZE]; 332 333 orig_input = kernel; 334 memcnt = memtot = 0; 335 i_input = (char *)kernel + GZ_HEAD; 336 if (((char *)kernel)[3] & 0x18) { 337 while (*i_input) 338 i_input++; 339 i_input++; 340 } 341 i_output = startaddr; 342 bzero(&infl, sizeof(infl)); 343 infl.gz_input = input; 344 infl.gz_output = output; 345 infl.gz_slide = slide; 346 inflate(&infl); 347 return ((char *)(((vm_offset_t)i_output & ~3) + 4)); 348} 349 350#endif 351 352void * 353load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end, 354 int d) 355{ 356 Elf32_Ehdr *eh; 357 Elf32_Phdr phdr[64] /* XXX */, *php; 358 Elf32_Shdr shdr[64] /* XXX */; 359 int i,j; 360 void *entry_point; 361 int symtabindex = -1; 362 int symstrindex = -1; 363 vm_offset_t lastaddr = 0; 364 Elf_Addr ssym = 0, esym = 0; 365 Elf_Dyn *dp; 366 367 eh = (Elf32_Ehdr *)kstart; 368 ssym = esym = 0; 369 entry_point = (void*)eh->e_entry; 370 memcpy(phdr, (void *)(kstart + eh->e_phoff ), 371 eh->e_phnum * sizeof(phdr[0])); 372 373 /* Determine lastaddr. */ 374 for (i = 0; i < eh->e_phnum; i++) { 375 if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr 376 + phdr[i].p_memsz)) 377 lastaddr = phdr[i].p_vaddr - KERNVIRTADDR + 378 curaddr + phdr[i].p_memsz; 379 } 380 381 /* Save the symbol tables, as there're about to be scratched. */ 382 memcpy(shdr, (void *)(kstart + eh->e_shoff), 383 sizeof(*shdr) * eh->e_shnum); 384 if (eh->e_shnum * eh->e_shentsize != 0 && 385 eh->e_shoff != 0) { 386 for (i = 0; i < eh->e_shnum; i++) { 387 if (shdr[i].sh_type == SHT_SYMTAB) { 388 for (j = 0; j < eh->e_phnum; j++) { 389 if (phdr[j].p_type == PT_LOAD && 390 shdr[i].sh_offset >= 391 phdr[j].p_offset && 392 (shdr[i].sh_offset + 393 shdr[i].sh_size <= 394 phdr[j].p_offset + 395 phdr[j].p_filesz)) { 396 shdr[i].sh_offset = 0; 397 shdr[i].sh_size = 0; 398 j = eh->e_phnum; 399 } 400 } 401 if (shdr[i].sh_offset != 0 && 402 shdr[i].sh_size != 0) { 403 symtabindex = i; 404 symstrindex = shdr[i].sh_link; 405 } 406 } 407 } 408 func_end = roundup(func_end, sizeof(long)); 409 if (symtabindex >= 0 && symstrindex >= 0) { 410 ssym = lastaddr; 411 if (d) { 412 memcpy((void *)func_end, (void *)( 413 shdr[symtabindex].sh_offset + kstart), 414 shdr[symtabindex].sh_size); 415 memcpy((void *)(func_end + 416 shdr[symtabindex].sh_size), 417 (void *)(shdr[symstrindex].sh_offset + 418 kstart), shdr[symstrindex].sh_size); 419 } else { 420 lastaddr += shdr[symtabindex].sh_size; 421 lastaddr = roundup(lastaddr, 422 sizeof(shdr[symtabindex].sh_size)); 423 lastaddr += sizeof(shdr[symstrindex].sh_size); 424 lastaddr += shdr[symstrindex].sh_size; 425 lastaddr = roundup(lastaddr, 426 sizeof(shdr[symstrindex].sh_size)); 427 } 428 429 } 430 } 431 if (!d) 432 return ((void *)lastaddr); 433 434 j = eh->e_phnum; 435 for (i = 0; i < j; i++) { 436 volatile char c; 437 438 if (phdr[i].p_type != PT_LOAD) 439 continue; 440 memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr), 441 (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz); 442 /* Clean space from oversized segments, eg: bss. */ 443 if (phdr[i].p_filesz < phdr[i].p_memsz) 444 bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR + 445 curaddr + phdr[i].p_filesz), phdr[i].p_memsz - 446 phdr[i].p_filesz); 447 } 448 /* Now grab the symbol tables. */ 449 if (symtabindex >= 0 && symstrindex >= 0) { 450 *(Elf_Size *)lastaddr = 451 shdr[symtabindex].sh_size; 452 lastaddr += sizeof(shdr[symtabindex].sh_size); 453 memcpy((void*)lastaddr, 454 (void *)func_end, 455 shdr[symtabindex].sh_size); 456 lastaddr += shdr[symtabindex].sh_size; 457 lastaddr = roundup(lastaddr, 458 sizeof(shdr[symtabindex].sh_size)); 459 *(Elf_Size *)lastaddr = 460 shdr[symstrindex].sh_size; 461 lastaddr += sizeof(shdr[symstrindex].sh_size); 462 memcpy((void*)lastaddr, 463 (void*)(func_end + 464 shdr[symtabindex].sh_size), 465 shdr[symstrindex].sh_size); 466 lastaddr += shdr[symstrindex].sh_size; 467 lastaddr = roundup(lastaddr, 468 sizeof(shdr[symstrindex].sh_size)); 469 *(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER; 470 *((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR; 471 *((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR; 472 } else 473 *(Elf_Addr *)curaddr = 0; 474 /* Invalidate the instruction cache. */ 475 __asm __volatile("mcr p15, 0, %0, c7, c5, 0\n" 476 "mcr p15, 0, %0, c7, c10, 4\n" 477 : : "r" (curaddr)); 478 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 479 "bic %0, %0, #1\n" /* MMU_ENABLE */ 480 "mcr p15, 0, %0, c1, c0, 0\n" 481 : "=r" (ssym)); 482 /* Jump to the entry point. */ 483 ((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))(); 484 __asm __volatile(".globl func_end\n" 485 "func_end:"); 486 487} 488 489extern char func_end[]; 490 491 492#define PMAP_DOMAIN_KERNEL 15 /* 493 * Just define it instead of including the 494 * whole VM headers set. 495 */ 496int __hack; 497static __inline void 498setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend, 499 int write_back) 500{ 501 unsigned int *pd = (unsigned int *)pt_addr; 502 vm_paddr_t addr; 503 int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT; 504 int tmp; 505 506 bzero(pd, L1_TABLE_SIZE); 507 for (addr = physstart; addr < physend; addr += L1_S_SIZE) { 508 pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)| 509 L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr; 510 if (write_back) 511 pd[addr >> L1_S_SHIFT] |= L1_S_B; 512 } 513 /* XXX: See below */ 514 if (0xfff00000 < physstart || 0xfff00000 > physend) 515 pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)| 516 L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart; 517 __asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */ 518 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */ 519 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */ 520 "mrc p15, 0, %0, c1, c0, 0\n" 521 "orr %0, %0, #1\n" /* MMU_ENABLE */ 522 "mcr p15, 0, %0, c1, c0, 0\n" 523 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */ 524 "mov r0, r0\n" 525 "sub pc, pc, #4\n" : 526 "=r" (tmp) : "r" (pd), "r" (domain)); 527 528 /* 529 * XXX: This is the most stupid workaround I've ever wrote. 530 * For some reason, the KB9202 won't boot the kernel unless 531 * we access an address which is not in the 532 * 0x20000000 - 0x20ffffff range. I hope I'll understand 533 * what's going on later. 534 */ 535 __hack = *(volatile int *)0xfffff21c; 536} 537 538void 539__start(void) 540{ 541 void *curaddr; 542 void *dst, *altdst; 543 char *kernel = (char *)&kernel_start; 544 int sp; 545 int pt_addr; 546 547 __asm __volatile("mov %0, pc" : 548 "=r" (curaddr)); 549 curaddr = (void*)((unsigned int)curaddr & 0xfff00000); 550#ifdef KZIP 551 if (*kernel == 0x1f && kernel[1] == 0x8b) { 552 pt_addr = (((int)&_end + KERNSIZE + 0x100) & 553 ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 554 555#ifdef CPU_ARM9 556 /* So that idcache_wbinv works; */ 557 if ((cpufunc_id() & 0x0000f000) == 0x00009000) 558 arm9_setup(); 559#endif 560 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 561 (vm_paddr_t)curaddr + 0x10000000, 1); 562 /* Gzipped kernel */ 563 dst = inflate_kernel(kernel, &_end); 564 kernel = (char *)&_end; 565 altdst = 4 + load_kernel((unsigned int)kernel, 566 (unsigned int)curaddr, 567 (unsigned int)&func_end , 0); 568 if (altdst > dst) 569 dst = altdst; 570 cpu_idcache_wbinv_all(); 571 __asm __volatile("mrc p15, 0, %0, c1, c0, 0\n" 572 "bic %0, %0, #1\n" /* MMU_ENABLE */ 573 "mcr p15, 0, %0, c1, c0, 0\n" 574 : "=r" (pt_addr)); 575 } else 576#endif 577 dst = 4 + load_kernel((unsigned int)&kernel_start, 578 (unsigned int)curaddr, 579 (unsigned int)&func_end, 0); 580 dst = (void *)(((vm_offset_t)dst & ~3)); 581 pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE; 582 setup_pagetables(pt_addr, (vm_paddr_t)curaddr, 583 (vm_paddr_t)curaddr + 0x10000000, 0); 584 sp = pt_addr + L1_TABLE_SIZE + 8192; 585 sp = sp &~3; 586 dst = (void *)(sp + 4); 587 memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end - 588 (unsigned int)&load_kernel); 589 do_call(dst, kernel, dst + (unsigned int)(&func_end) - 590 (unsigned int)(&load_kernel), sp); 591} 592