1# SPDX-License-Identifier: GPL-2.0+ 2# Copyright (c) 2016 Google, Inc 3# Written by Simon Glass <sjg@chromium.org> 4# 5# Handle various things related to ELF images 6# 7 8from collections import namedtuple, OrderedDict 9import io 10import os 11import re 12import shutil 13import struct 14import tempfile 15 16from u_boot_pylib import command 17from u_boot_pylib import tools 18from u_boot_pylib import tout 19 20ELF_TOOLS = True 21try: 22 from elftools.elf.elffile import ELFFile 23 from elftools.elf.elffile import ELFError 24 from elftools.elf.sections import SymbolTableSection 25except: # pragma: no cover 26 ELF_TOOLS = False 27 28# BSYM in little endian, keep in sync with include/binman_sym.h 29BINMAN_SYM_MAGIC_VALUE = 0x4d595342 30 31# Information about an EFL symbol: 32# section (str): Name of the section containing this symbol 33# address (int): Address of the symbol (its value) 34# size (int): Size of the symbol in bytes 35# weak (bool): True if the symbol is weak 36# offset (int or None): Offset of the symbol's data in the ELF file, or None if 37# not known 38Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset']) 39 40# Information about an ELF file: 41# data: Extracted program contents of ELF file (this would be loaded by an 42# ELF loader when reading this file 43# load: Load address of code 44# entry: Entry address of code 45# memsize: Number of bytes in memory occupied by loading this ELF file 46ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize']) 47 48 49def GetSymbols(fname, patterns): 50 """Get the symbols from an ELF file 51 52 Args: 53 fname: Filename of the ELF file to read 54 patterns: List of regex patterns to search for, each a string 55 56 Returns: 57 None, if the file does not exist, or Dict: 58 key: Name of symbol 59 value: Hex value of symbol 60 """ 61 stdout = tools.run('objdump', '-t', fname) 62 lines = stdout.splitlines() 63 if patterns: 64 re_syms = re.compile('|'.join(patterns)) 65 else: 66 re_syms = None 67 syms = {} 68 syms_started = False 69 for line in lines: 70 if not line or not syms_started: 71 if 'SYMBOL TABLE' in line: 72 syms_started = True 73 line = None # Otherwise code coverage complains about 'continue' 74 continue 75 if re_syms and not re_syms.search(line): 76 continue 77 78 space_pos = line.find(' ') 79 value, rest = line[:space_pos], line[space_pos + 1:] 80 flags = rest[:7] 81 parts = rest[7:].split() 82 section, size = parts[:2] 83 if len(parts) > 2: 84 name = parts[2] if parts[2] != '.hidden' else parts[3] 85 syms[name] = Symbol(section, int(value, 16), int(size, 16), 86 flags[1] == 'w', None) 87 88 # Sort dict by address 89 return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) 90 91def _GetFileOffset(elf, addr): 92 """Get the file offset for an address 93 94 Args: 95 elf (ELFFile): ELF file to check 96 addr (int): Address to search for 97 98 Returns 99 int: Offset of that address in the ELF file, or None if not valid 100 """ 101 for seg in elf.iter_segments(): 102 seg_end = seg['p_vaddr'] + seg['p_filesz'] 103 if seg.header['p_type'] == 'PT_LOAD': 104 if addr >= seg['p_vaddr'] and addr < seg_end: 105 return addr - seg['p_vaddr'] + seg['p_offset'] 106 107def GetFileOffset(fname, addr): 108 """Get the file offset for an address 109 110 Args: 111 fname (str): Filename of ELF file to check 112 addr (int): Address to search for 113 114 Returns 115 int: Offset of that address in the ELF file, or None if not valid 116 """ 117 if not ELF_TOOLS: 118 raise ValueError("Python: No module named 'elftools'") 119 with open(fname, 'rb') as fd: 120 elf = ELFFile(fd) 121 return _GetFileOffset(elf, addr) 122 123def GetSymbolFromAddress(fname, addr): 124 """Get the symbol at a particular address 125 126 Args: 127 fname (str): Filename of ELF file to check 128 addr (int): Address to search for 129 130 Returns: 131 str: Symbol name, or None if no symbol at that address 132 """ 133 if not ELF_TOOLS: 134 raise ValueError("Python: No module named 'elftools'") 135 with open(fname, 'rb') as fd: 136 elf = ELFFile(fd) 137 syms = GetSymbols(fname, None) 138 for name, sym in syms.items(): 139 if sym.address == addr: 140 return name 141 142def GetSymbolFileOffset(fname, patterns): 143 """Get the symbols from an ELF file 144 145 Args: 146 fname: Filename of the ELF file to read 147 patterns: List of regex patterns to search for, each a string 148 149 Returns: 150 None, if the file does not exist, or Dict: 151 key: Name of symbol 152 value: Hex value of symbol 153 """ 154 if not ELF_TOOLS: 155 raise ValueError("Python: No module named 'elftools'") 156 157 syms = {} 158 with open(fname, 'rb') as fd: 159 elf = ELFFile(fd) 160 161 re_syms = re.compile('|'.join(patterns)) 162 for section in elf.iter_sections(): 163 if isinstance(section, SymbolTableSection): 164 for symbol in section.iter_symbols(): 165 if not re_syms or re_syms.search(symbol.name): 166 addr = symbol.entry['st_value'] 167 syms[symbol.name] = Symbol( 168 section.name, addr, symbol.entry['st_size'], 169 symbol.entry['st_info']['bind'] == 'STB_WEAK', 170 _GetFileOffset(elf, addr)) 171 172 # Sort dict by address 173 return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) 174 175def GetSymbolAddress(fname, sym_name): 176 """Get a value of a symbol from an ELF file 177 178 Args: 179 fname: Filename of the ELF file to read 180 patterns: List of regex patterns to search for, each a string 181 182 Returns: 183 Symbol value (as an integer) or None if not found 184 """ 185 syms = GetSymbols(fname, [sym_name]) 186 sym = syms.get(sym_name) 187 if not sym: 188 return None 189 return sym.address 190 191def GetPackString(sym, msg): 192 """Get the struct.pack/unpack string to use with a given symbol 193 194 Args: 195 sym (Symbol): Symbol to check. Only the size member is checked 196 @msg (str): String which indicates the entry being processed, used for 197 errors 198 199 Returns: 200 str: struct string to use, .e.g. '<I' 201 202 Raises: 203 ValueError: Symbol has an unexpected size 204 """ 205 if sym.size == 4: 206 return '<I' 207 elif sym.size == 8: 208 return '<Q' 209 else: 210 raise ValueError('%s has size %d: only 4 and 8 are supported' % 211 (msg, sym.size)) 212 213def GetSymbolOffset(elf_fname, sym_name, base_sym=None): 214 """Read the offset of a symbol compared to base symbol 215 216 This is useful for obtaining the value of a single symbol relative to the 217 base of a binary blob. 218 219 Args: 220 elf_fname: Filename of the ELF file to read 221 sym_name (str): Name of symbol to read 222 base_sym (str): Base symbol to sue to calculate the offset (or None to 223 use '__image_copy_start' 224 225 Returns: 226 int: Offset of the symbol relative to the base symbol 227 """ 228 if not base_sym: 229 base_sym = '__image_copy_start' 230 fname = tools.get_input_filename(elf_fname) 231 syms = GetSymbols(fname, [base_sym, sym_name]) 232 base = syms[base_sym].address 233 val = syms[sym_name].address 234 return val - base 235 236def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False, 237 base_sym=None): 238 """Replace all symbols in an entry with their correct values 239 240 The entry contents is updated so that values for referenced symbols will be 241 visible at run time. This is done by finding out the symbols offsets in the 242 entry (using the ELF file) and replacing them with values from binman's data 243 structures. 244 245 Args: 246 elf_fname: Filename of ELF image containing the symbol information for 247 entry 248 entry: Entry to process 249 section: Section which can be used to lookup symbol values 250 base_sym: Base symbol marking the start of the image 251 252 Returns: 253 int: Number of symbols written 254 """ 255 if not base_sym: 256 base_sym = '__image_copy_start' 257 fname = tools.get_input_filename(elf_fname) 258 syms = GetSymbols(fname, ['image', 'binman']) 259 if is_elf: 260 if not ELF_TOOLS: 261 msg = ("Section '%s': entry '%s'" % 262 (section.GetPath(), entry.GetPath())) 263 raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools') 264 new_syms = {} 265 with open(fname, 'rb') as fd: 266 elf = ELFFile(fd) 267 for name, sym in syms.items(): 268 offset = _GetFileOffset(elf, sym.address) 269 new_syms[name] = Symbol(sym.section, sym.address, sym.size, 270 sym.weak, offset) 271 syms = new_syms 272 273 if not syms: 274 tout.debug('LookupAndWriteSymbols: no syms') 275 return 0 276 base = syms.get(base_sym) 277 if not base and not is_elf: 278 tout.debug('LookupAndWriteSymbols: no base') 279 return 0 280 base_addr = 0 if is_elf else base.address 281 count = 0 282 for name, sym in syms.items(): 283 if name.startswith('_binman'): 284 msg = ("Section '%s': Symbol '%s'\n in entry '%s'" % 285 (section.GetPath(), name, entry.GetPath())) 286 if is_elf: 287 # For ELF files, use the file offset 288 offset = sym.offset 289 else: 290 # For blobs use the offset of the symbol, calculated by 291 # subtracting the base address which by definition is at the 292 # start 293 offset = sym.address - base.address 294 if offset < 0 or offset + sym.size > entry.contents_size: 295 raise ValueError('%s has offset %x (size %x) but the contents ' 296 'size is %x' % (entry.GetPath(), offset, 297 sym.size, 298 entry.contents_size)) 299 pack_string = GetPackString(sym, msg) 300 if name == '_binman_sym_magic': 301 value = BINMAN_SYM_MAGIC_VALUE 302 else: 303 # Look up the symbol in our entry tables. 304 value = section.GetImage().LookupImageSymbol(name, sym.weak, 305 msg, base_addr) 306 if value is None: 307 value = -1 308 pack_string = pack_string.lower() 309 value_bytes = struct.pack(pack_string, value) 310 tout.debug('%s:\n insert %s, offset %x, value %x, length %d' % 311 (msg, name, offset, value, len(value_bytes))) 312 entry.data = (entry.data[:offset] + value_bytes + 313 entry.data[offset + sym.size:]) 314 count += 1 315 if count: 316 tout.detail( 317 f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols") 318 return count 319 320def GetSymbolValue(sym, data, msg): 321 """Get the value of a symbol 322 323 This can only be used on symbols with an integer value. 324 325 Args: 326 sym (Symbol): Symbol to check 327 data (butes): Data for the ELF file - the symbol data appears at offset 328 sym.offset 329 @msg (str): String which indicates the entry being processed, used for 330 errors 331 332 Returns: 333 int: Value of the symbol 334 335 Raises: 336 ValueError: Symbol has an unexpected size 337 """ 338 pack_string = GetPackString(sym, msg) 339 value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size]) 340 return value[0] 341 342def MakeElf(elf_fname, text, data): 343 """Make an elf file with the given data in a single section 344 345 The output file has a several section including '.text' and '.data', 346 containing the info provided in arguments. 347 348 Args: 349 elf_fname: Output filename 350 text: Text (code) to put in the file's .text section 351 data: Data to put in the file's .data section 352 """ 353 outdir = tempfile.mkdtemp(prefix='binman.elf.') 354 s_file = os.path.join(outdir, 'elf.S') 355 356 # Spilt the text into two parts so that we can make the entry point two 357 # bytes after the start of the text section 358 text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]] 359 text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]] 360 data_bytes = ['\t.byte\t%#x' % byte for byte in data] 361 with open(s_file, 'w') as fd: 362 print('''/* Auto-generated C program to produce an ELF file for testing */ 363 364.section .text 365.code32 366.globl _start 367.type _start, @function 368%s 369_start: 370%s 371.ident "comment" 372 373.comm fred,8,4 374 375.section .empty 376.globl _empty 377_empty: 378.byte 1 379 380.globl ernie 381.data 382.type ernie, @object 383.size ernie, 4 384ernie: 385%s 386''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)), 387 file=fd) 388 lds_file = os.path.join(outdir, 'elf.lds') 389 390 # Use a linker script to set the alignment and text address. 391 with open(lds_file, 'w') as fd: 392 print('''/* Auto-generated linker script to produce an ELF file for testing */ 393 394PHDRS 395{ 396 text PT_LOAD ; 397 data PT_LOAD ; 398 empty PT_LOAD FLAGS ( 6 ) ; 399 note PT_NOTE ; 400} 401 402SECTIONS 403{ 404 . = 0xfef20000; 405 ENTRY(_start) 406 .text . : SUBALIGN(0) 407 { 408 *(.text) 409 } :text 410 .data : { 411 *(.data) 412 } :data 413 _bss_start = .; 414 .empty : { 415 *(.empty) 416 } :empty 417 /DISCARD/ : { 418 *(.note.gnu.property) 419 } 420 .note : { 421 *(.comment) 422 } :note 423 .bss _bss_start (OVERLAY) : { 424 *(.bss) 425 } 426} 427''', file=fd) 428 # -static: Avoid requiring any shared libraries 429 # -nostdlib: Don't link with C library 430 # -Wl,--build-id=none: Don't generate a build ID, so that we just get the 431 # text section at the start 432 # -m32: Build for 32-bit x86 433 # -T...: Specifies the link script, which sets the start address 434 cc, args = tools.get_target_compile_tool('cc') 435 args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T', 436 lds_file, '-o', elf_fname, s_file] 437 stdout = command.output(cc, *args) 438 shutil.rmtree(outdir) 439 440def DecodeElf(data, location): 441 """Decode an ELF file and return information about it 442 443 Args: 444 data: Data from ELF file 445 location: Start address of data to return 446 447 Returns: 448 ElfInfo object containing information about the decoded ELF file 449 """ 450 if not ELF_TOOLS: 451 raise ValueError("Python: No module named 'elftools'") 452 file_size = len(data) 453 with io.BytesIO(data) as fd: 454 elf = ELFFile(fd) 455 data_start = 0xffffffff 456 data_end = 0 457 mem_end = 0 458 virt_to_phys = 0 459 460 for i in range(elf.num_segments()): 461 segment = elf.get_segment(i) 462 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 463 skipped = 1 # To make code-coverage see this line 464 continue 465 start = segment['p_paddr'] 466 mend = start + segment['p_memsz'] 467 rend = start + segment['p_filesz'] 468 data_start = min(data_start, start) 469 data_end = max(data_end, rend) 470 mem_end = max(mem_end, mend) 471 if not virt_to_phys: 472 virt_to_phys = segment['p_paddr'] - segment['p_vaddr'] 473 474 output = bytearray(data_end - data_start) 475 for i in range(elf.num_segments()): 476 segment = elf.get_segment(i) 477 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 478 skipped = 1 # To make code-coverage see this line 479 continue 480 start = segment['p_paddr'] 481 offset = 0 482 if start < location: 483 offset = location - start 484 start = location 485 # A legal ELF file can have a program header with non-zero length 486 # but zero-length file size and a non-zero offset which, added 487 # together, are greater than input->size (i.e. the total file size). 488 # So we need to not even test in the case that p_filesz is zero. 489 # Note: All of this code is commented out since we don't have a test 490 # case for it. 491 size = segment['p_filesz'] 492 #if not size: 493 #continue 494 #end = segment['p_offset'] + segment['p_filesz'] 495 #if end > file_size: 496 #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n', 497 #file_size, end) 498 output[start - data_start:start - data_start + size] = ( 499 segment.data()[offset:]) 500 return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys, 501 mem_end - data_start) 502 503def UpdateFile(infile, outfile, start_sym, end_sym, insert): 504 tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" % 505 (outfile, len(insert), len(insert), start_sym, end_sym)) 506 syms = GetSymbolFileOffset(infile, [start_sym, end_sym]) 507 if len(syms) != 2: 508 raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" % 509 (start_sym, end_sym, len(syms), 510 ','.join(syms.keys()))) 511 512 size = syms[end_sym].offset - syms[start_sym].offset 513 if len(insert) > size: 514 raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" % 515 (infile, len(insert), len(insert), size, size)) 516 517 data = tools.read_file(infile) 518 newdata = data[:syms[start_sym].offset] 519 newdata += insert + tools.get_bytes(0, size - len(insert)) 520 newdata += data[syms[end_sym].offset:] 521 tools.write_file(outfile, newdata) 522 tout.info('Written to offset %#x' % syms[start_sym].offset) 523 524def read_loadable_segments(data): 525 """Read segments from an ELF file 526 527 Args: 528 data (bytes): Contents of file 529 530 Returns: 531 tuple: 532 list of segments, each: 533 int: Segment number (0 = first) 534 int: Start address of segment in memory 535 bytes: Contents of segment 536 int: entry address for image 537 538 Raises: 539 ValueError: elftools is not available 540 """ 541 if not ELF_TOOLS: 542 raise ValueError("Python: No module named 'elftools'") 543 with io.BytesIO(data) as inf: 544 try: 545 elf = ELFFile(inf) 546 except ELFError as err: 547 raise ValueError(err) 548 entry = elf.header['e_entry'] 549 segments = [] 550 for i in range(elf.num_segments()): 551 segment = elf.get_segment(i) 552 if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: 553 skipped = 1 # To make code-coverage see this line 554 continue 555 start = segment['p_offset'] 556 rend = start + segment['p_filesz'] 557 segments.append((i, segment['p_paddr'], data[start:rend])) 558 return segments, entry 559 560def is_valid(data): 561 """Check if some binary data is a valid ELF file 562 563 Args: 564 data (bytes): Bytes to check 565 566 Returns: 567 bool: True if a valid Elf file, False if not 568 """ 569 try: 570 DecodeElf(data, 0) 571 return True 572 except ELFError: 573 return False 574