1# SPDX-License-Identifier: GPL-2.0+
2# Copyright (c) 2016 Google, Inc
3# Written by Simon Glass <sjg@chromium.org>
4#
5# Handle various things related to ELF images
6#
7
8from collections import namedtuple, OrderedDict
9import io
10import os
11import re
12import shutil
13import struct
14import tempfile
15
16from u_boot_pylib import command
17from u_boot_pylib import tools
18from u_boot_pylib import tout
19
20ELF_TOOLS = True
21try:
22    from elftools.elf.elffile import ELFFile
23    from elftools.elf.elffile import ELFError
24    from elftools.elf.sections import SymbolTableSection
25except:  # pragma: no cover
26    ELF_TOOLS = False
27
28# BSYM in little endian, keep in sync with include/binman_sym.h
29BINMAN_SYM_MAGIC_VALUE = 0x4d595342
30
31# Information about an EFL symbol:
32# section (str): Name of the section containing this symbol
33# address (int): Address of the symbol (its value)
34# size (int): Size of the symbol in bytes
35# weak (bool): True if the symbol is weak
36# offset (int or None): Offset of the symbol's data in the ELF file, or None if
37#   not known
38Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
39
40# Information about an ELF file:
41#    data: Extracted program contents of ELF file (this would be loaded by an
42#           ELF loader when reading this file
43#    load: Load address of code
44#    entry: Entry address of code
45#    memsize: Number of bytes in memory occupied by loading this ELF file
46ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
47
48
49def GetSymbols(fname, patterns):
50    """Get the symbols from an ELF file
51
52    Args:
53        fname: Filename of the ELF file to read
54        patterns: List of regex patterns to search for, each a string
55
56    Returns:
57        None, if the file does not exist, or Dict:
58          key: Name of symbol
59          value: Hex value of symbol
60    """
61    stdout = tools.run('objdump', '-t', fname)
62    lines = stdout.splitlines()
63    if patterns:
64        re_syms = re.compile('|'.join(patterns))
65    else:
66        re_syms = None
67    syms = {}
68    syms_started = False
69    for line in lines:
70        if not line or not syms_started:
71            if 'SYMBOL TABLE' in line:
72                syms_started = True
73            line = None  # Otherwise code coverage complains about 'continue'
74            continue
75        if re_syms and not re_syms.search(line):
76            continue
77
78        space_pos = line.find(' ')
79        value, rest = line[:space_pos], line[space_pos + 1:]
80        flags = rest[:7]
81        parts = rest[7:].split()
82        section, size =  parts[:2]
83        if len(parts) > 2:
84            name = parts[2] if parts[2] != '.hidden' else parts[3]
85            syms[name] = Symbol(section, int(value, 16), int(size, 16),
86                                flags[1] == 'w', None)
87
88    # Sort dict by address
89    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
90
91def _GetFileOffset(elf, addr):
92    """Get the file offset for an address
93
94    Args:
95        elf (ELFFile): ELF file to check
96        addr (int): Address to search for
97
98    Returns
99        int: Offset of that address in the ELF file, or None if not valid
100    """
101    for seg in elf.iter_segments():
102        seg_end = seg['p_vaddr'] + seg['p_filesz']
103        if seg.header['p_type'] == 'PT_LOAD':
104            if addr >= seg['p_vaddr'] and addr < seg_end:
105                return addr - seg['p_vaddr'] + seg['p_offset']
106
107def GetFileOffset(fname, addr):
108    """Get the file offset for an address
109
110    Args:
111        fname (str): Filename of ELF file to check
112        addr (int): Address to search for
113
114    Returns
115        int: Offset of that address in the ELF file, or None if not valid
116    """
117    if not ELF_TOOLS:
118        raise ValueError("Python: No module named 'elftools'")
119    with open(fname, 'rb') as fd:
120        elf = ELFFile(fd)
121        return _GetFileOffset(elf, addr)
122
123def GetSymbolFromAddress(fname, addr):
124    """Get the symbol at a particular address
125
126    Args:
127        fname (str): Filename of ELF file to check
128        addr (int): Address to search for
129
130    Returns:
131        str: Symbol name, or None if no symbol at that address
132    """
133    if not ELF_TOOLS:
134        raise ValueError("Python: No module named 'elftools'")
135    with open(fname, 'rb') as fd:
136        elf = ELFFile(fd)
137        syms = GetSymbols(fname, None)
138    for name, sym in syms.items():
139        if sym.address == addr:
140            return name
141
142def GetSymbolFileOffset(fname, patterns):
143    """Get the symbols from an ELF file
144
145    Args:
146        fname: Filename of the ELF file to read
147        patterns: List of regex patterns to search for, each a string
148
149    Returns:
150        None, if the file does not exist, or Dict:
151          key: Name of symbol
152          value: Hex value of symbol
153    """
154    if not ELF_TOOLS:
155        raise ValueError("Python: No module named 'elftools'")
156
157    syms = {}
158    with open(fname, 'rb') as fd:
159        elf = ELFFile(fd)
160
161        re_syms = re.compile('|'.join(patterns))
162        for section in elf.iter_sections():
163            if isinstance(section, SymbolTableSection):
164                for symbol in section.iter_symbols():
165                    if not re_syms or re_syms.search(symbol.name):
166                        addr = symbol.entry['st_value']
167                        syms[symbol.name] = Symbol(
168                            section.name, addr, symbol.entry['st_size'],
169                            symbol.entry['st_info']['bind'] == 'STB_WEAK',
170                            _GetFileOffset(elf, addr))
171
172    # Sort dict by address
173    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
174
175def GetSymbolAddress(fname, sym_name):
176    """Get a value of a symbol from an ELF file
177
178    Args:
179        fname: Filename of the ELF file to read
180        patterns: List of regex patterns to search for, each a string
181
182    Returns:
183        Symbol value (as an integer) or None if not found
184    """
185    syms = GetSymbols(fname, [sym_name])
186    sym = syms.get(sym_name)
187    if not sym:
188        return None
189    return sym.address
190
191def GetPackString(sym, msg):
192    """Get the struct.pack/unpack string to use with a given symbol
193
194    Args:
195        sym (Symbol): Symbol to check. Only the size member is checked
196        @msg (str): String which indicates the entry being processed, used for
197            errors
198
199    Returns:
200        str: struct string to use, .e.g. '<I'
201
202    Raises:
203        ValueError: Symbol has an unexpected size
204    """
205    if sym.size == 4:
206        return '<I'
207    elif sym.size == 8:
208        return '<Q'
209    else:
210        raise ValueError('%s has size %d: only 4 and 8 are supported' %
211                         (msg, sym.size))
212
213def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
214    """Read the offset of a symbol compared to base symbol
215
216    This is useful for obtaining the value of a single symbol relative to the
217    base of a binary blob.
218
219    Args:
220        elf_fname: Filename of the ELF file to read
221        sym_name (str): Name of symbol to read
222        base_sym (str): Base symbol to sue to calculate the offset (or None to
223            use '__image_copy_start'
224
225    Returns:
226        int: Offset of the symbol relative to the base symbol
227    """
228    if not base_sym:
229        base_sym = '__image_copy_start'
230    fname = tools.get_input_filename(elf_fname)
231    syms = GetSymbols(fname, [base_sym, sym_name])
232    base = syms[base_sym].address
233    val = syms[sym_name].address
234    return val - base
235
236def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
237                          base_sym=None):
238    """Replace all symbols in an entry with their correct values
239
240    The entry contents is updated so that values for referenced symbols will be
241    visible at run time. This is done by finding out the symbols offsets in the
242    entry (using the ELF file) and replacing them with values from binman's data
243    structures.
244
245    Args:
246        elf_fname: Filename of ELF image containing the symbol information for
247            entry
248        entry: Entry to process
249        section: Section which can be used to lookup symbol values
250        base_sym: Base symbol marking the start of the image
251
252    Returns:
253        int: Number of symbols written
254    """
255    if not base_sym:
256        base_sym = '__image_copy_start'
257    fname = tools.get_input_filename(elf_fname)
258    syms = GetSymbols(fname, ['image', 'binman'])
259    if is_elf:
260        if not ELF_TOOLS:
261            msg = ("Section '%s': entry '%s'" %
262                   (section.GetPath(), entry.GetPath()))
263            raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
264        new_syms = {}
265        with open(fname, 'rb') as fd:
266            elf = ELFFile(fd)
267            for name, sym in syms.items():
268                offset = _GetFileOffset(elf, sym.address)
269                new_syms[name] = Symbol(sym.section, sym.address, sym.size,
270                                        sym.weak, offset)
271            syms = new_syms
272
273    if not syms:
274        tout.debug('LookupAndWriteSymbols: no syms')
275        return 0
276    base = syms.get(base_sym)
277    if not base and not is_elf:
278        tout.debug('LookupAndWriteSymbols: no base')
279        return 0
280    base_addr = 0 if is_elf else base.address
281    count = 0
282    for name, sym in syms.items():
283        if name.startswith('_binman'):
284            msg = ("Section '%s': Symbol '%s'\n   in entry '%s'" %
285                   (section.GetPath(), name, entry.GetPath()))
286            if is_elf:
287                # For ELF files, use the file offset
288                offset = sym.offset
289            else:
290                # For blobs use the offset of the symbol, calculated by
291                # subtracting the base address which by definition is at the
292                # start
293                offset = sym.address - base.address
294                if offset < 0 or offset + sym.size > entry.contents_size:
295                    raise ValueError('%s has offset %x (size %x) but the contents '
296                                     'size is %x' % (entry.GetPath(), offset,
297                                                     sym.size,
298                                                     entry.contents_size))
299            pack_string = GetPackString(sym, msg)
300            if name == '_binman_sym_magic':
301                value = BINMAN_SYM_MAGIC_VALUE
302            else:
303                # Look up the symbol in our entry tables.
304                value = section.GetImage().LookupImageSymbol(name, sym.weak,
305                                                             msg, base_addr)
306            if value is None:
307                value = -1
308                pack_string = pack_string.lower()
309            value_bytes = struct.pack(pack_string, value)
310            tout.debug('%s:\n   insert %s, offset %x, value %x, length %d' %
311                       (msg, name, offset, value, len(value_bytes)))
312            entry.data = (entry.data[:offset] + value_bytes +
313                        entry.data[offset + sym.size:])
314            count += 1
315    if count:
316        tout.detail(
317            f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols")
318    return count
319
320def GetSymbolValue(sym, data, msg):
321    """Get the value of a symbol
322
323    This can only be used on symbols with an integer value.
324
325    Args:
326        sym (Symbol): Symbol to check
327        data (butes): Data for the ELF file - the symbol data appears at offset
328            sym.offset
329        @msg (str): String which indicates the entry being processed, used for
330            errors
331
332    Returns:
333        int: Value of the symbol
334
335    Raises:
336        ValueError: Symbol has an unexpected size
337    """
338    pack_string = GetPackString(sym, msg)
339    value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
340    return value[0]
341
342def MakeElf(elf_fname, text, data):
343    """Make an elf file with the given data in a single section
344
345    The output file has a several section including '.text' and '.data',
346    containing the info provided in arguments.
347
348    Args:
349        elf_fname: Output filename
350        text: Text (code) to put in the file's .text section
351        data: Data to put in the file's .data section
352    """
353    outdir = tempfile.mkdtemp(prefix='binman.elf.')
354    s_file = os.path.join(outdir, 'elf.S')
355
356    # Spilt the text into two parts so that we can make the entry point two
357    # bytes after the start of the text section
358    text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
359    text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
360    data_bytes = ['\t.byte\t%#x' % byte for byte in data]
361    with open(s_file, 'w') as fd:
362        print('''/* Auto-generated C program to produce an ELF file for testing */
363
364.section .text
365.code32
366.globl _start
367.type _start, @function
368%s
369_start:
370%s
371.ident "comment"
372
373.comm fred,8,4
374
375.section .empty
376.globl _empty
377_empty:
378.byte 1
379
380.globl ernie
381.data
382.type ernie, @object
383.size ernie, 4
384ernie:
385%s
386''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
387        file=fd)
388    lds_file = os.path.join(outdir, 'elf.lds')
389
390    # Use a linker script to set the alignment and text address.
391    with open(lds_file, 'w') as fd:
392        print('''/* Auto-generated linker script to produce an ELF file for testing */
393
394PHDRS
395{
396    text PT_LOAD ;
397    data PT_LOAD ;
398    empty PT_LOAD FLAGS ( 6 ) ;
399    note PT_NOTE ;
400}
401
402SECTIONS
403{
404    . = 0xfef20000;
405    ENTRY(_start)
406    .text . : SUBALIGN(0)
407    {
408        *(.text)
409    } :text
410    .data : {
411        *(.data)
412    } :data
413    _bss_start = .;
414    .empty : {
415        *(.empty)
416    } :empty
417    /DISCARD/ : {
418        *(.note.gnu.property)
419    }
420    .note : {
421        *(.comment)
422    } :note
423    .bss _bss_start  (OVERLAY) : {
424        *(.bss)
425    }
426}
427''', file=fd)
428    # -static: Avoid requiring any shared libraries
429    # -nostdlib: Don't link with C library
430    # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
431    #   text section at the start
432    # -m32: Build for 32-bit x86
433    # -T...: Specifies the link script, which sets the start address
434    cc, args = tools.get_target_compile_tool('cc')
435    args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
436            lds_file, '-o', elf_fname, s_file]
437    stdout = command.output(cc, *args)
438    shutil.rmtree(outdir)
439
440def DecodeElf(data, location):
441    """Decode an ELF file and return information about it
442
443    Args:
444        data: Data from ELF file
445        location: Start address of data to return
446
447    Returns:
448        ElfInfo object containing information about the decoded ELF file
449    """
450    if not ELF_TOOLS:
451        raise ValueError("Python: No module named 'elftools'")
452    file_size = len(data)
453    with io.BytesIO(data) as fd:
454        elf = ELFFile(fd)
455        data_start = 0xffffffff
456        data_end = 0
457        mem_end = 0
458        virt_to_phys = 0
459
460        for i in range(elf.num_segments()):
461            segment = elf.get_segment(i)
462            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
463                skipped = 1  # To make code-coverage see this line
464                continue
465            start = segment['p_paddr']
466            mend = start + segment['p_memsz']
467            rend = start + segment['p_filesz']
468            data_start = min(data_start, start)
469            data_end = max(data_end, rend)
470            mem_end = max(mem_end, mend)
471            if not virt_to_phys:
472                virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
473
474        output = bytearray(data_end - data_start)
475        for i in range(elf.num_segments()):
476            segment = elf.get_segment(i)
477            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
478                skipped = 1  # To make code-coverage see this line
479                continue
480            start = segment['p_paddr']
481            offset = 0
482            if start < location:
483                offset = location - start
484                start = location
485            # A legal ELF file can have a program header with non-zero length
486            # but zero-length file size and a non-zero offset which, added
487            # together, are greater than input->size (i.e. the total file size).
488            #  So we need to not even test in the case that p_filesz is zero.
489            # Note: All of this code is commented out since we don't have a test
490            # case for it.
491            size = segment['p_filesz']
492            #if not size:
493                #continue
494            #end = segment['p_offset'] + segment['p_filesz']
495            #if end > file_size:
496                #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
497                                 #file_size, end)
498            output[start - data_start:start - data_start + size] = (
499                segment.data()[offset:])
500    return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
501                   mem_end - data_start)
502
503def UpdateFile(infile, outfile, start_sym, end_sym, insert):
504    tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
505                (outfile, len(insert), len(insert), start_sym, end_sym))
506    syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
507    if len(syms) != 2:
508        raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
509                         (start_sym, end_sym, len(syms),
510                          ','.join(syms.keys())))
511
512    size = syms[end_sym].offset - syms[start_sym].offset
513    if len(insert) > size:
514        raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
515                         (infile, len(insert), len(insert), size, size))
516
517    data = tools.read_file(infile)
518    newdata = data[:syms[start_sym].offset]
519    newdata += insert + tools.get_bytes(0, size - len(insert))
520    newdata += data[syms[end_sym].offset:]
521    tools.write_file(outfile, newdata)
522    tout.info('Written to offset %#x' % syms[start_sym].offset)
523
524def read_loadable_segments(data):
525    """Read segments from an ELF file
526
527    Args:
528        data (bytes): Contents of file
529
530    Returns:
531        tuple:
532            list of segments, each:
533                int: Segment number (0 = first)
534                int: Start address of segment in memory
535                bytes: Contents of segment
536            int: entry address for image
537
538    Raises:
539        ValueError: elftools is not available
540    """
541    if not ELF_TOOLS:
542        raise ValueError("Python: No module named 'elftools'")
543    with io.BytesIO(data) as inf:
544        try:
545            elf = ELFFile(inf)
546        except ELFError as err:
547            raise ValueError(err)
548        entry = elf.header['e_entry']
549        segments = []
550        for i in range(elf.num_segments()):
551            segment = elf.get_segment(i)
552            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
553                skipped = 1  # To make code-coverage see this line
554                continue
555            start = segment['p_offset']
556            rend = start + segment['p_filesz']
557            segments.append((i, segment['p_paddr'], data[start:rend]))
558    return segments, entry
559
560def is_valid(data):
561    """Check if some binary data is a valid ELF file
562
563    Args:
564        data (bytes): Bytes to check
565
566    Returns:
567        bool: True if a valid Elf file, False if not
568    """
569    try:
570        DecodeElf(data, 0)
571        return True
572    except ELFError:
573        return False
574