1#!/usr/bin/env python
2#-
3# Copyright (c) 2010 Gleb Kurtsou
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25# SUCH DAMAGE.
26#
27
28from __future__ import print_function
29import os
30import sys
31import re
32import optparse
33
34class Config(object):
35    version = '0.1'
36    # controlled by user
37    verbose = 0
38    dump = False
39    no_dump = False
40    version_filter = None
41    symbol_filter = None
42    alias_prefixes = []
43    # misc opts
44    objdump = 'objdump'
45    dwarfdump = 'dwarfdump'
46    # debug
47    cmpcache_enabled = True
48    dwarfcache_enabled = True
49    w_alias = True
50    w_cached = False
51    w_symbol = True
52
53    class FileConfig(object):
54        filename = None
55        out = sys.stdout
56        def init(self, outname):
57            if outname and outname != '-':
58                self.out = open(outname, "w")
59
60    origfile = FileConfig()
61    newfile = FileConfig()
62
63    exclude_sym_default = [
64            '^__bss_start$',
65            '^_edata$',
66            '^_end$',
67            '^_fini$',
68            '^_init$',
69            ]
70
71    @classmethod
72    def init(cls):
73        cls.version_filter = StrFilter()
74        cls.symbol_filter = StrFilter()
75
76class App(object):
77    result_code = 0
78
79def warn(cond, msg):
80    if cond:
81        print("WARN: " + msg, file=sys.stderr)
82
83# {{{ misc
84
85class StrFilter(object):
86    def __init__(self):
87        self.exclude = []
88        self.include = []
89
90    def compile(self):
91        self.re_exclude = [ re.compile(x) for x in self.exclude ]
92        self.re_include = [ re.compile(x) for x in self.include ]
93
94    def match(self, s):
95        if len(self.re_include):
96            matched = False
97            for r in self.re_include:
98                if r.match(s):
99                    matched = True
100                    break
101            if not matched:
102                return False
103        for r in self.re_exclude:
104            if r.match(s):
105                return False
106        return True
107
108class Cache(object):
109
110    class CacheStats(object):
111        def __init__(self):
112            self.hit = 0
113            self.miss = 0
114
115        def show(self, name):
116            total = self.hit + self.miss
117            if total == 0:
118                ratio = '(undef)'
119            else:
120                ratio = '%f' % (self.hit/float(total))
121            return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \
122                    (name, self.hit, self.miss, ratio)
123
124    def __init__(self, enabled=True, stats=None):
125        self.enabled = enabled
126        self.items = {}
127        if stats == None:
128            self.stats = Cache.CacheStats()
129        else:
130            self.stats = stats
131
132    def get(self, id):
133        if self.enabled and id in self.items:
134            self.stats.hit += 1
135            return self.items[id]
136        else:
137            self.stats.miss += 1
138            return None
139
140    def put(self, id, obj):
141        if self.enabled:
142            if id in self.items and obj is not self.items[id]:
143                #raise ValueError("Item is already cached: %d (%s, %s)" %
144                #        (id, self.items[id], obj))
145                warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \
146                        (id, self.items[id], obj))
147            self.items[id] = obj
148
149    def replace(self, id, obj):
150        if self.enabled:
151            assert id in self.items
152            self.items[id] = obj
153
154class ListDiff(object):
155    def __init__(self, orig, new):
156        self.orig = set(orig)
157        self.new = set(new)
158        self.common = self.orig & self.new
159        self.added = self.new - self.common
160        self.removed = self.orig - self.common
161
162class PrettyPrinter(object):
163    def __init__(self):
164        self.stack = []
165
166    def run_nested(self, obj):
167        ex = obj._pp_ex(self)
168        self.stack.append(ex)
169
170    def run(self, obj):
171        self._result = obj._pp(self)
172        return self._result
173
174    def nested(self):
175        return sorted(set(self.stack))
176
177    def result(self):
178        return self._result;
179
180# }}}
181
182#{{{ symbols and version maps
183
184class Symbol(object):
185    def __init__(self, name, offset, version, lib):
186        self.name = name
187        self.offset = offset
188        self.version = version
189        self.lib = lib
190        self.definition = None
191
192    @property
193    def name_ver(self):
194        return self.name + '@' + self.version
195
196    def __repr__(self):
197        return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version)
198
199class CommonSymbol(object):
200    def __init__(self, origsym, newsym):
201        if origsym.name != newsym.name or origsym.version != newsym.version:
202            raise RuntimeError("Symbols have different names: %s",
203                    [origsym, newsym])
204        self.origsym = origsym
205        self.newsym = newsym
206        self.name = newsym.name
207        self.version = newsym.version
208
209    def __repr__(self):
210        return "CommonSymbol(%s, %s)" % (self.name, self.version)
211
212class SymbolAlias(object):
213    def __init__(self, alias, prefix, offset):
214        assert alias.startswith(prefix)
215        self.alias = alias
216        self.name = alias[len(prefix):]
217        self.offset = offset
218
219    def __repr__(self):
220        return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset)
221
222
223class VersionMap(object):
224    def __init__(self, name):
225        self.name = name
226        self.symbols = {}
227
228    def append(self, symbol):
229        if (symbol.name in self.symbols):
230            raise ValueError("Symbol is already defined %s@%s" %
231                    (symbol.name, self.name))
232        self.symbols[symbol.name] = symbol
233
234    def names(self):
235        return self.symbols.keys()
236
237    def __repr__(self):
238        return repr(self.symbols.values())
239
240# }}}
241
242# {{{ types and definitions
243
244class Def(object):
245    _is_alias = False
246
247    def __init__(self, id, name, **kwargs):
248        self.id = id
249        self.name = name
250        self.attrs = kwargs
251
252    def __getattr__(self, attr):
253        if attr not in self.attrs:
254            raise AttributeError('%s in %s' % (attr, str(self)))
255        return self.attrs[attr]
256
257    def _name_opt(self, default=''):
258        if not self.name:
259            return default
260        return self.name
261
262    def _alias(self):
263        if self._is_alias:
264            return self.type._alias()
265        return self
266
267    def __cmp__(self, other):
268        # TODO assert 'self' and 'other' belong to different libraries
269        #print 'cmp defs: %s, %s' % (self, other)
270        a = self._alias()
271        try:
272            b = other._alias()
273        except AttributeError:
274            return 1
275        r = cmp(a.__class__, b.__class__)
276        if r == 0:
277            if a.id != 0 and b.id != 0:
278                ind = (long(a.id) << 32) + b.id
279                r = Dwarf.cmpcache.get(ind)
280                if r != None:
281                    return r
282            else:
283                ind = 0
284            r = cmp(a.attrs, b.attrs)
285            if ind != 0:
286                Dwarf.cmpcache.put(ind, r)
287        else:
288            r = 0
289            #raise RuntimeError('Comparing different classes: %s, %s' %
290            #        (a.__class__.__name__, b.__class__.__name__))
291        return r
292
293    def __repr__(self):
294        p = []
295        if hasattr(self, 'name'):
296            p.append("name=%s" % self.name)
297        for (k, v) in self.attrs.items():
298            if isinstance(v, Def):
299                v = v.__class__.__name__ + '(...)'
300            p.append("%s=%s" % (k, v))
301        return self.__class__.__name__ + '(' + ', '.join(p) + ')'
302
303    def _mapval(self, param, vals):
304        if param not in vals.keys():
305            raise NotImplementedError("Invalid value '%s': %s" %
306                    (param, str(self)))
307        return vals[param]
308
309    def _pp_ex(self, pp):
310        raise NotImplementedError('Extended pretty print not implemeted: %s' %
311                str(self))
312
313    def _pp(self, pp):
314        raise NotImplementedError('Pretty print not implemeted: %s' % str(self))
315
316class AnonymousDef(Def):
317    def __init__(self, id, **kwargs):
318        Def.__init__(self, id, None, **kwargs)
319
320class Void(AnonymousDef):
321    _instance = None
322
323    def __new__(cls, *args, **kwargs):
324        if not cls._instance:
325            cls._instance = super(Void, cls).__new__(
326                    cls, *args, **kwargs)
327        return cls._instance
328
329    def __init__(self):
330        AnonymousDef.__init__(self, 0)
331
332    def _pp(self, pp):
333        return "void"
334
335class VarArgs(AnonymousDef):
336    def _pp(self, pp):
337        return "..."
338
339class PointerDef(AnonymousDef):
340    def _pp(self, pp):
341        t = pp.run(self.type)
342        return "%s*" % (t,)
343
344class BaseTypeDef(Def):
345    inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char']
346    def _pp(self, pp):
347        if self.encoding in self.inttypes:
348            sign = '' if self.encoding == 'DW_ATE_signed' else 'u'
349            bits = int(self.byte_size, 0) * 8
350            return '%sint%s_t' % (sign, bits)
351        elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size, 0) == 1:
352            return 'char';
353        elif self.encoding == 'DW_ATE_boolean' and int(self.byte_size, 0) == 1:
354            return 'bool';
355        elif self.encoding == 'DW_ATE_float':
356            return self._mapval(int(self.byte_size, 0), {
357                16: 'long double',
358                8: 'double',
359                4: 'float',
360            })
361        raise NotImplementedError('Invalid encoding: %s' % self)
362
363class TypeAliasDef(Def):
364    _is_alias = True
365    def _pp(self, pp):
366        alias = self._alias()
367        # push typedef name
368        if self.name and not alias.name:
369            alias.name = 'T(%s)' % self.name
370        # return type with modifiers
371        return self.type._pp(pp)
372
373class EnumerationTypeDef(Def):
374    def _pp(self, pp):
375        return 'enum ' + self._name_opt('UNKNOWN')
376
377class ConstTypeDef(AnonymousDef):
378    _is_alias = True
379    def _pp(self, pp):
380        return 'const ' + self.type._pp(pp)
381
382class VolatileTypeDef(AnonymousDef):
383    _is_alias = True
384    def _pp(self, pp):
385        return 'volatile ' + self.type._pp(pp)
386
387class RestrictTypeDef(AnonymousDef):
388    _is_alias = True
389    def _pp(self, pp):
390        return 'restrict ' + self.type._pp(pp)
391
392class ArrayDef(AnonymousDef):
393    def _pp(self, pp):
394        t = pp.run(self.type)
395        assert len(self.subranges) == 1
396        try:
397            sz = int(self.subranges[0].upper_bound) + 1
398        except ValueError:
399            s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound)
400            sz = int(s) + 1
401        return '%s[%s]' % (t, sz)
402
403class ArraySubrangeDef(AnonymousDef):
404    pass
405
406class FunctionDef(Def):
407    def _pp(self, pp):
408        result = pp.run(self.result)
409        if not self.params:
410            params = "void"
411        else:
412            params = ', '.join([ pp.run(x) for x in self.params ])
413        return "%s %s(%s);" % (result, self.name, params)
414
415class FunctionTypeDef(Def):
416    def _pp(self, pp):
417        result = pp.run(self.result)
418        if not self.params:
419            params = "void"
420        else:
421            params = ', '.join([ pp.run(x) for x in self.params ])
422        return "F(%s, %s, (%s))" % (self._name_opt(), result, params)
423
424class ParameterDef(Def):
425    def _pp(self, pp):
426        t = pp.run(self.type)
427        return "%s %s" % (t, self._name_opt())
428
429class VariableDef(Def):
430    def _pp(self, pp):
431        t = pp.run(self.type)
432        return "%s %s" % (t, self._name_opt())
433
434# TODO
435class StructForwardDef(Def):
436    pass
437
438class IncompleteDef(Def):
439    def update(self, complete, cache=None):
440        self.complete = complete
441        complete.incomplete = self
442        if cache != None:
443            cached = cache.get(self.id)
444            if cached != None and isinstance(cached, IncompleteDef):
445                cache.replace(self.id, complete)
446
447class StructIncompleteDef(IncompleteDef):
448    def _pp(self, pp):
449        return "struct %s" % (self.name,)
450
451class UnionIncompleteDef(IncompleteDef):
452    def _pp(self, pp):
453        return "union %s" % (self.name,)
454
455class StructDef(Def):
456    def _pp_ex(self, pp, suffix=';'):
457        members = [ pp.run(x) for x in self.members ]
458        return "struct %s { %s }%s" % \
459                (self._name_opt(), ' '.join(members), suffix)
460    def _pp(self, pp):
461        if self.name:
462            pp.run_nested(self)
463            return "struct %s" % (self.name,)
464        else:
465            return self._pp_ex(pp, suffix='')
466
467class UnionDef(Def):
468    def _pp_ex(self, pp, suffix=';'):
469        members = [ pp.run(x) for x in self.members ]
470        return "union %s { %s }%s" % \
471                (self._name_opt(), ' '.join(members), suffix)
472    def _pp(self, pp):
473        if self.name:
474            pp.run_nested(self)
475            return "union %s" % (self.name,)
476        else:
477            return self._pp_ex(pp, suffix='')
478
479class MemberDef(Def):
480    def _pp(self, pp):
481        t = pp.run(self.type)
482        if self.bit_size:
483            bits = ":%s" % self.bit_size
484        else:
485            bits = ""
486        return "%s %s%s;" % (t, self._name_opt(), bits)
487
488class Dwarf(object):
489
490    cmpcache = Cache(enabled=Config.cmpcache_enabled)
491
492    def __init__(self, dump):
493        self.dump = dump
494
495    def _build_optarg_type(self, praw):
496        type = praw.optarg('type', Void())
497        if type != Void():
498            type = self.buildref(praw.unit, type)
499        return type
500
501    def build_subprogram(self, raw):
502        if raw.optname == None:
503            raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc'));
504        params = [ self.build(x) for x in raw.nested ]
505        result = self._build_optarg_type(raw)
506        return FunctionDef(raw.id, raw.name, params=params, result=result)
507
508    def build_variable(self, raw):
509        type = self._build_optarg_type(raw)
510        return VariableDef(raw.id, raw.optname, type=type)
511
512    def build_subroutine_type(self, raw):
513        params = [ self.build(x) for x in raw.nested ]
514        result = self._build_optarg_type(raw)
515        return FunctionTypeDef(raw.id, raw.optname, params=params, result=result)
516
517    def build_formal_parameter(self, raw):
518        type = self._build_optarg_type(raw)
519        return ParameterDef(raw.id, raw.optname, type=type)
520
521    def build_pointer_type(self, raw):
522        type = self._build_optarg_type(raw)
523        return PointerDef(raw.id, type=type)
524
525    def build_member(self, raw):
526        type = self.buildref(raw.unit, raw.arg('type'))
527        return MemberDef(raw.id, raw.name, type=type,
528                bit_size=raw.optarg('bit_size', None))
529
530    def build_structure_type(self, raw):
531        incomplete = raw.unit.incomplete.get(raw.id)
532        if incomplete == None:
533            incomplete = StructIncompleteDef(raw.id, raw.optname)
534            raw.unit.incomplete.put(raw.id, incomplete)
535        else:
536            return incomplete
537        members = [ self.build(x) for x in raw.nested ]
538        byte_size = raw.optarg('byte_size', None)
539        if byte_size == None:
540            obj = StructForwardDef(raw.id, raw.name, members=members,
541                    forcename=raw.name)
542        obj = StructDef(raw.id, raw.optname, members=members,
543                byte_size=byte_size)
544        incomplete.update(obj, cache=raw.unit.cache)
545        return obj
546
547    def build_union_type(self, raw):
548        incomplete = raw.unit.incomplete.get(raw.id)
549        if incomplete == None:
550            incomplete = UnionIncompleteDef(raw.id, raw.optname)
551            raw.unit.incomplete.put(raw.id, incomplete)
552        else:
553            return incomplete
554        members = [ self.build(x) for x in raw.nested ]
555        byte_size = raw.optarg('byte_size', None)
556        obj = UnionDef(raw.id, raw.optname, members=members,
557                byte_size=byte_size)
558        obj.incomplete = incomplete
559        incomplete.complete = obj
560        return obj
561
562    def build_typedef(self, raw):
563        type = self._build_optarg_type(raw)
564        return TypeAliasDef(raw.id, raw.name, type=type)
565
566    def build_const_type(self, raw):
567        type = self._build_optarg_type(raw)
568        return ConstTypeDef(raw.id, type=type)
569
570    def build_volatile_type(self, raw):
571        type = self._build_optarg_type(raw)
572        return VolatileTypeDef(raw.id, type=type)
573
574    def build_restrict_type(self, raw):
575        type = self._build_optarg_type(raw)
576        return RestrictTypeDef(raw.id, type=type)
577
578    def build_enumeration_type(self, raw):
579        # TODO handle DW_TAG_enumerator ???
580        return EnumerationTypeDef(raw.id, name=raw.optname,
581                byte_size=raw.arg('byte_size'))
582
583    def build_base_type(self, raw):
584        return BaseTypeDef(raw.id, raw.optname,
585                byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding'))
586
587    def build_array_type(self, raw):
588        type = self.buildref(raw.unit, raw.arg('type'))
589        subranges = [ self.build(x) for x in raw.nested ]
590        return ArrayDef(raw.id, type=type, subranges=subranges)
591
592    def build_subrange_type(self, raw):
593        type = self.buildref(raw.unit, raw.arg('type'))
594        return ArraySubrangeDef(raw.id, type=type,
595                upper_bound=raw.optarg('upper_bound', 0))
596
597    def build_unspecified_parameters(self, raw):
598        return VarArgs(raw.id)
599
600    def _get_id(self, id):
601        try:
602            return int(id)
603        except ValueError:
604            if (id.startswith('<') and id.endswith('>')):
605                return int(id[1:-1], 0)
606            else:
607                raise ValueError("Invalid dwarf id: %s" % id)
608
609    def build(self, raw):
610        obj = raw.unit.cache.get(raw.id)
611        if obj != None:
612            return obj
613        builder_name = raw.tag.replace('DW_TAG_', 'build_')
614        try:
615            builder = getattr(self, builder_name)
616        except AttributeError:
617            raise AttributeError("Unknown dwarf tag: %s" % raw)
618        obj = builder(raw)
619        raw.unit.cache.put(obj.id, obj)
620        return obj
621
622    def buildref(self, unit, id):
623        id = self._get_id(id)
624        raw = unit.tags[id]
625        obj = self.build(raw)
626        return obj
627
628# }}}
629
630class Shlib(object):
631    def __init__(self, libfile):
632        self.libfile = libfile
633        self.versions = {}
634        self.alias_syms = {}
635
636    def parse_objdump(self):
637        objdump = ObjdumpParser(self.libfile)
638        objdump.run()
639        for p in objdump.dynamic_symbols:
640            vername = p['ver']
641            if vername.startswith('(') and vername.endswith(')'):
642                vername = vername[1:-1]
643            if not Config.version_filter.match(vername):
644                continue
645            if not Config.symbol_filter.match(p['symbol']):
646                continue
647            sym = Symbol(p['symbol'], p['offset'], vername, self)
648            if vername not in self.versions:
649                self.versions[vername] = VersionMap(vername)
650            self.versions[vername].append(sym)
651        if Config.alias_prefixes:
652            self.local_offsetmap = objdump.local_offsetmap
653            for p in objdump.local_symbols:
654                for prefix in Config.alias_prefixes:
655                    if not p['symbol'].startswith(prefix):
656                        continue
657                    alias = SymbolAlias(p['symbol'], prefix, p['offset'])
658                    if alias.name in self.alias_syms:
659                        prevalias = self.alias_syms[alias.name]
660                        if alias.name != prevalias.name or \
661                                alias.offset != prevalias.offset:
662                            warn(Config.w_alias, "Symbol alias is " \
663                                    "already defined: %s: %s at %08x -- %s at %08x" % \
664                                    (alias.alias, alias.name,  alias.offset,
665                                            prevalias.name, prevalias.offset))
666                    self.alias_syms[alias.name] = alias
667
668    def parse_dwarfdump(self):
669        dwarfdump = DwarfdumpParser(self.libfile)
670        def lookup(sym):
671            raw = None
672            try:
673                raw = dwarfdump.offsetmap[sym.offset]
674            except:
675                try:
676                    localnames = self.local_offsetmap[sym.offset]
677                    localnames.sort(key=lambda x: -len(x))
678                    for localname in localnames:
679                        if localname not in self.alias_syms:
680                            continue
681                        alias = self.alias_syms[localname]
682                        raw = dwarfdump.offsetmap[alias.offset]
683                        break
684                except:
685                    pass
686            return raw
687        dwarfdump.run()
688        dwarf = Dwarf(dwarfdump)
689        for ver in self.versions.values():
690            for sym in ver.symbols.values():
691                raw = lookup(sym);
692                if not raw:
693                    warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \
694                            (sym.name_ver, self.libfile, sym.offset))
695                    continue
696                if Config.verbose >= 3:
697                    print("Parsing symbol %s (%s)" % (sym.name_ver, self.libfile))
698                sym.definition = dwarf.build(raw)
699
700    def parse(self):
701        if not os.path.isfile(self.libfile):
702            print("No such file: %s" % self.libfile, file=sys.stderr)
703            sys.exit(1)
704        self.parse_objdump()
705        self.parse_dwarfdump()
706
707# {{{ parsers
708
709class Parser(object):
710    def __init__(self, proc):
711        self.proc = proc
712        self.parser = self.parse_begin
713
714    def run(self):
715        fd = os.popen(self.proc, 'r')
716        while True:
717            line = fd.readline()
718            if (not line):
719                break
720            line = line.strip()
721            if (line):
722                self.parser(line)
723        err = fd.close()
724        if err:
725            print("Execution failed: %s" % self.proc, file=sys.stderr)
726            sys.exit(2)
727
728    def parse_begin(self, line):
729        print(line)
730
731class ObjdumpParser(Parser):
732
733    re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:')
734
735    re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)')
736    re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*')
737
738    re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)')
739
740    def __init__(self, libfile):
741        Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile))
742        self.dynamic_symbols = []
743        self.local_symbols = []
744        self.local_offsetmap = {}
745
746    def parse_begin(self, line):
747        self.parse_header(line)
748
749    def add_symbol(self, table, symbol, offsetmap = None):
750        offset = int(symbol['offset'], 16);
751        symbol['offset'] = offset
752        if (offset == 0):
753            return
754        table.append(symbol)
755        if offsetmap != None:
756            if offset not in offsetmap:
757                offsetmap[offset] = [symbol['symbol']]
758            else:
759                offsetmap[offset].append(symbol['symbol'])
760
761    def parse_header(self, line):
762        m = self.re_header.match(line)
763        if (m):
764            table = m.group('table')
765            if (table == "DYNAMIC"):
766                self.parser = self.parse_dynamic
767            elif table == '':
768                self.parser = self.parse_local
769            else:
770                raise ValueError("Invalid symbol table: %s" % table)
771            return True
772        return False
773
774    def parse_local(self, line):
775        if (self.parse_header(line)):
776            return
777        if (self.re_lame_symbol.match(line)):
778            return
779        m = self.re_local_symbol.match(line)
780        if (not m):
781            return
782            #raise ValueError("Invalid symbol definition: %s" % line)
783        p = m.groupdict()
784        if (p['symbol'] and p['symbol'].find('@') == -1):
785            self.add_symbol(self.local_symbols, p, self.local_offsetmap);
786
787    def parse_dynamic(self, line):
788        if (self.parse_header(line)):
789            return
790        if (self.re_lame_symbol.match(line)):
791            return
792        m = self.re_dynamic_symbol.match(line)
793        if (not m):
794            raise ValueError("Invalid symbol definition: %s" % line)
795        p = m.groupdict()
796        if (p['symbol'] and p['ver']):
797            self.add_symbol(self.dynamic_symbols, p);
798
799class DwarfdumpParser(Parser):
800
801    tagcache_stats = Cache.CacheStats()
802
803    class Unit(object):
804        def __init__(self):
805            self.cache = Cache(enabled=Config.dwarfcache_enabled,
806                    stats=DwarfdumpParser.tagcache_stats)
807            self.incomplete = Cache()
808            self.tags = {}
809
810    class Tag(object):
811        def __init__(self, unit, data):
812            self.unit = unit
813            self.id = int(data['id'], 0)
814            self.level = int(data['level'])
815            self.tag = data['tag']
816            self.args = {}
817            self.nested = []
818
819        @property
820        def name(self):
821            return self.arg('name')
822
823        @property
824        def optname(self):
825            return self.optarg('name', None)
826
827        def setname(self, name):
828            self.args['DW_AT_name'] = name
829
830        def arg(self, a):
831            name = 'DW_AT_' + a
832            try:
833                return self.args[name]
834            except KeyError:
835                raise KeyError("Argument '%s' not found in %s: %s" %
836                        (name, self, self.args))
837
838        def optarg(self, a, default):
839            try:
840                return self.arg(a)
841            except KeyError:
842                return default
843
844        def __repr__(self):
845            return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag)
846
847    re_header = re.compile('<(?P<level>\d+)><(?P<id>[0xX0-9a-fA-F]+(?:\+(0[xX])?[0-9a-fA-F]+)?)><(?P<tag>\w+)>')
848    re_argname = re.compile('(?P<arg>\w+)<')
849    re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>')
850
851    skip_tags = set([
852        'DW_TAG_lexical_block',
853        'DW_TAG_inlined_subroutine',
854        'DW_TAG_label',
855        'DW_TAG_variable',
856        ])
857
858    external_tags = set([
859        'DW_TAG_variable',
860        ])
861
862    def __init__(self, libfile):
863        Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile))
864        self.current_unit = None
865        self.offsetmap = {}
866        self.stack = []
867
868    def parse_begin(self, line):
869        if line == '.debug_info':
870            self.parser = self.parse_debuginfo
871        else:
872            raise ValueError("Invalid dwarfdump header: %s" % line)
873
874    def parse_argvalue(self, args):
875        assert args.startswith('<')
876        i = 1
877        cnt = 1
878        while i < len(args) and args[i]:
879            if args[i] == '<':
880                cnt += 1
881            elif args[i] == '>':
882                cnt -= 1
883                if cnt == 0:
884                    break
885            i = i + 1
886        value = args[1:i]
887        args = args[i+1:]
888        return (args, value)
889
890    def parse_arg(self, tag, args):
891        m = self.re_argname.match(args)
892        if not m:
893            m = self.re_argunknown.match(args)
894            if not m:
895                raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" %
896                        args)
897            args = args[len(m.group(0)):].lstrip()
898            return args
899        argname = m.group('arg')
900        args = args[len(argname):]
901        value = []
902        while len(args) > 0 and args.startswith('<'):
903            (args, v) = self.parse_argvalue(args)
904            value.append(v)
905        args = args.lstrip()
906        if len(value) == 1:
907            value = value[0]
908        tag.args[argname] = value
909        return args
910
911    def parse_debuginfo(self, line):
912        m = self.re_header.match(line)
913        if not m:
914            raise ValueError("Invalid dwarfdump: %s" % line)
915        if m.group('level') == '0':
916            self.current_unit = DwarfdumpParser.Unit()
917            return
918        tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict())
919        args = line[len(m.group(0)):].lstrip()
920        while args:
921            args = self.parse_arg(tag, args)
922        tag.unit.tags[tag.id] = tag
923        def parse_offset(tag):
924            if 'DW_AT_low_pc' in tag.args:
925                return int(tag.args['DW_AT_low_pc'], 16)
926            elif 'DW_AT_location' in tag.args:
927                location = tag.args['DW_AT_location']
928                if location.startswith('DW_OP_addr'):
929                    return int(location.replace('DW_OP_addr', ''), 16)
930            return None
931        offset = parse_offset(tag)
932        if offset is not None and \
933                (tag.tag not in DwarfdumpParser.skip_tags or \
934                ('DW_AT_external' in tag.args and \
935                tag.tag in DwarfdumpParser.external_tags)):
936            if offset in self.offsetmap:
937                raise ValueError("Dwarf dump parse error: " +
938                        "symbol is already defined at offset 0x%x" % offset)
939            self.offsetmap[offset] = tag
940        if len(self.stack) > 0:
941            prev = self.stack.pop()
942            while prev.level >= tag.level and len(self.stack) > 0:
943                prev = self.stack.pop()
944            if prev.level < tag.level:
945                assert prev.level == tag.level - 1
946                # TODO check DW_AT_sibling ???
947                if tag.tag not in DwarfdumpParser.skip_tags:
948                    prev.nested.append(tag)
949                self.stack.append(prev)
950        self.stack.append(tag)
951        assert len(self.stack) == tag.level
952
953# }}}
954
955def list_str(l):
956    l = [ str(x) for x in l ]
957    l.sort()
958    return ', '.join(l)
959
960def names_ver_str(vername, names):
961    return list_str([ x + "@" + vername for x in names ])
962
963def common_symbols(origlib, newlib):
964    result = []
965    verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys())
966    if Config.verbose >= 1:
967        print('Original versions:   ', list_str(verdiff.orig))
968        print('New versions:        ', list_str(verdiff.new))
969    for vername in verdiff.added:
970        print('Added version:       ', vername)
971        print('    Added symbols:   ', \
972                names_ver_str(vername, newlib.versions[vername].names()))
973    for vername in verdiff.removed:
974        print('Removed version:     ', vername)
975        print('    Removed symbols: ', \
976                names_ver_str(vername, origlib.versions[vername].names()))
977    added = []
978    removed = []
979    for vername in verdiff.common:
980        origver = origlib.versions[vername]
981        newver = newlib.versions[vername]
982        namediff = ListDiff(origver.names(), newver.names())
983        if namediff.added:
984            added.append(names_ver_str(vername, namediff.added))
985        if namediff.removed:
986            removed.append(names_ver_str(vername, namediff.removed))
987        commonver = VersionMap(vername)
988        result.append(commonver)
989        for n in namediff.common:
990            sym = CommonSymbol(origver.symbols[n], newver.symbols[n])
991            commonver.append(sym)
992    if added:
993        print('Added symbols:')
994        for i in added:
995            print('    ', i)
996    if removed:
997        print('Removed symbols:')
998        for i in removed:
999            print('    ', i)
1000    return result
1001
1002def cmp_symbols(commonver):
1003    for ver in commonver:
1004        names = ver.names();
1005        names.sort()
1006        for symname in names:
1007            sym = ver.symbols[symname]
1008            missing = sym.origsym.definition is None or sym.newsym.definition is None
1009            match = not missing and sym.origsym.definition == sym.newsym.definition
1010            if not match:
1011                App.result_code = 1
1012            if Config.verbose >= 1 or not match:
1013                if missing:
1014                    print('%s: missing definition' % \
1015                            (sym.origsym.name_ver,))
1016                    continue
1017                print('%s: definitions %smatch' % \
1018                        (sym.origsym.name_ver, "" if match else "mis"))
1019                if Config.dump or (not match and not Config.no_dump):
1020                    for x in [(sym.origsym, Config.origfile),
1021                            (sym.newsym, Config.newfile)]:
1022                        xsym = x[0]
1023                        xout = x[1].out
1024                        if not xsym.definition:
1025                            print('\n// Definition not found: %s %s' % \
1026                                    (xsym.name_ver, xsym.lib.libfile), file=xout)
1027                            continue
1028                        print('\n// Definitions mismatch: %s %s' % \
1029                                (xsym.name_ver, xsym.lib.libfile), file=xout)
1030                        pp = PrettyPrinter()
1031                        pp.run(xsym.definition)
1032                        for i in pp.nested():
1033                            print(i, file=xout)
1034                        print(pp.result(), file=xout)
1035
1036def dump_symbols(commonver):
1037    class SymbolDump(object):
1038        def __init__(self, io_conf):
1039            self.io_conf = io_conf
1040            self.pp = PrettyPrinter()
1041            self.res = []
1042        def run(self, sym):
1043            r = self.pp.run(sym.definition)
1044            self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r))
1045        def finish(self):
1046            print('\n// Symbol dump: version %s, library %s' % \
1047                    (ver.name, self.io_conf.filename), file=self.io_conf.out)
1048            for i in self.pp.nested():
1049                print(i, file=self.io_conf.out)
1050            print('', file=self.io_conf.out)
1051            for i in self.res:
1052                print(i, file=self.io_conf.out)
1053    for ver in commonver:
1054        names = sorted(ver.names());
1055        d_orig = SymbolDump(Config.origfile)
1056        d_new = SymbolDump(Config.newfile)
1057        for symname in names:
1058            sym = ver.symbols[symname]
1059            if not sym.origsym.definition or not sym.newsym.definition:
1060                # XXX
1061                warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \
1062                        (symname, ver.name))
1063                continue
1064            d_orig.run(sym.origsym)
1065            d_new.run(sym.newsym)
1066        d_orig.finish()
1067        d_new.finish()
1068
1069if __name__ == '__main__':
1070    Config.init()
1071    parser = optparse.OptionParser(usage="usage: %prog origlib newlib",
1072            version="%prog " + Config.version)
1073    parser.add_option('-v', '--verbose', action='count',
1074            help="verbose mode, may be specified several times")
1075    parser.add_option('--alias-prefix', action='append',
1076            help="name prefix to try for symbol alias lookup", metavar="STR")
1077    parser.add_option('--dump', action='store_true',
1078            help="dump symbol definitions")
1079    parser.add_option('--no-dump', action='store_true',
1080            help="disable dump for mismatched symbols")
1081    parser.add_option('--out-orig', action='store',
1082            help="result output file for original library", metavar="ORIGFILE")
1083    parser.add_option('--out-new', action='store',
1084            help="result output file for new library", metavar="NEWFILE")
1085    parser.add_option('--dwarfdump', action='store',
1086            help="path to dwarfdump executable", metavar="DWARFDUMP")
1087    parser.add_option('--objdump', action='store',
1088            help="path to objdump executable", metavar="OBJDUMP")
1089    parser.add_option('--exclude-ver', action='append', metavar="RE")
1090    parser.add_option('--include-ver', action='append', metavar="RE")
1091    parser.add_option('--exclude-sym', action='append', metavar="RE")
1092    parser.add_option('--include-sym', action='append', metavar="RE")
1093    parser.add_option('--no-exclude-sym-default', action='store_true',
1094            help="don't exclude special symbols like _init, _end, __bss_start")
1095    for opt in ['alias', 'cached', 'symbol']:
1096        parser.add_option("--w-" + opt,
1097                action="store_true", dest="w_" + opt)
1098        parser.add_option("--w-no-" + opt,
1099                action="store_false", dest="w_" + opt)
1100    (opts, args) = parser.parse_args()
1101
1102    if len(args) != 2:
1103        parser.print_help()
1104        sys.exit(-1)
1105    if opts.dwarfdump:
1106        Config.dwarfdump = opts.dwarfdump
1107    if opts.objdump:
1108        Config.objdump = opts.objdump
1109    if opts.out_orig:
1110        Config.origfile.init(opts.out_orig)
1111    if opts.out_new:
1112        Config.newfile.init(opts.out_new)
1113    if opts.no_dump:
1114        Config.dump = False
1115        Config.no_dump = True
1116    if opts.dump:
1117        Config.dump = True
1118        Config.no_dump = False
1119        Config.verbose = 1
1120    if opts.verbose:
1121        Config.verbose = opts.verbose
1122    if opts.alias_prefix:
1123        Config.alias_prefixes = opts.alias_prefix
1124        Config.alias_prefixes.sort(key=lambda x: -len(x))
1125    for (k, v) in ({ '_sym': Config.symbol_filter,
1126            '_ver': Config.version_filter }).items():
1127        for a in [ 'exclude', 'include' ]:
1128            opt = getattr(opts, a + k)
1129            if opt:
1130                getattr(v, a).extend(opt)
1131    if not opts.no_exclude_sym_default:
1132        Config.symbol_filter.exclude.extend(Config.exclude_sym_default)
1133    Config.version_filter.compile()
1134    Config.symbol_filter.compile()
1135    for w in ['w_alias', 'w_cached', 'w_symbol']:
1136        if hasattr(opts, w):
1137            v = getattr(opts, w)
1138            if v != None:
1139                setattr(Config, w, v)
1140
1141    (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1])
1142
1143    origlib = Shlib(Config.origfile.filename)
1144    origlib.parse()
1145    newlib = Shlib(Config.newfile.filename)
1146    newlib.parse()
1147
1148    commonver = common_symbols(origlib, newlib)
1149    if Config.dump:
1150        dump_symbols(commonver)
1151    cmp_symbols(commonver)
1152    if Config.verbose >= 4:
1153        print(Dwarf.cmpcache.stats.show('Cmp'))
1154        print(DwarfdumpParser.tagcache_stats.show('Dwarf tag'))
1155
1156    sys.exit(App.result_code)
1157