1#!/usr/bin/env python
2#-
3# Copyright (c) 2010 Gleb Kurtsou
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25# SUCH DAMAGE.
26#
27# $FreeBSD$
28
29import os
30import sys
31import re
32import optparse
33
34class Config(object):
35    version = '0.1'
36    # controlled by user
37    verbose = 0
38    dump = False
39    no_dump = False
40    version_filter = None
41    symbol_filter = None
42    alias_prefixes = []
43    # misc opts
44    objdump = 'objdump'
45    dwarfdump = 'dwarfdump'
46    # debug
47    cmpcache_enabled = True
48    dwarfcache_enabled = True
49    w_alias = True
50    w_cached = False
51    w_symbol = True
52
53    class FileConfig(object):
54        filename = None
55        out = sys.stdout
56        def init(self, outname):
57            if outname and outname != '-':
58                self.out = open(outname, "w")
59
60    origfile = FileConfig()
61    newfile = FileConfig()
62
63    @classmethod
64    def init(cls):
65        cls.version_filter = StrFilter()
66        cls.symbol_filter = StrFilter()
67
68class App(object):
69    result_code = 0
70
71def warn(cond, msg):
72    if cond:
73        print >> sys.stderr, "WARN: " + msg
74
75# {{{ misc
76
77class StrFilter(object):
78    def __init__(self):
79        self.exclude = []
80        self.include = []
81
82    def compile(self):
83        self.re_exclude = [ re.compile(x) for x in self.exclude ]
84        self.re_include = [ re.compile(x) for x in self.include ]
85
86    def match(self, s):
87        if len(self.re_include):
88            matched = False
89            for r in self.re_include:
90                if r.match(s):
91                    matched = True
92                    break
93            if not matched:
94                return False
95        for r in self.re_exclude:
96            if r.match(s):
97                return False
98        return True
99
100class Cache(object):
101
102    class CacheStats(object):
103        def __init__(self):
104            self.hit = 0
105            self.miss = 0
106
107        def show(self, name):
108            total = self.hit + self.miss
109            if total == 0:
110                ratio = '(undef)'
111            else:
112                ratio = '%f' % (self.hit/float(total))
113            return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \
114                    (name, self.hit, self.miss, ratio)
115
116    def __init__(self, enabled=True, stats=None):
117        self.enabled = enabled
118        self.items = {}
119        if stats == None:
120            self.stats = Cache.CacheStats()
121        else:
122            self.stats = stats
123
124    def get(self, id):
125        if self.enabled and self.items.has_key(id):
126            self.stats.hit += 1
127            return self.items[id]
128        else:
129            self.stats.miss += 1
130            return None
131
132    def put(self, id, obj):
133        if self.enabled:
134            if self.items.has_key(id) and obj is not self.items[id]:
135                #raise ValueError("Item is already cached: %d (%s, %s)" %
136                #        (id, self.items[id], obj))
137                warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \
138                        (id, self.items[id], obj))
139            self.items[id] = obj
140
141    def replace(self, id, obj):
142        if self.enabled:
143            assert self.items.has_key(id)
144            self.items[id] = obj
145
146class ListDiff(object):
147    def __init__(self, orig, new):
148        self.orig = set(orig)
149        self.new = set(new)
150        self.common = self.orig & self.new
151        self.added = self.new - self.common
152        self.removed = self.orig - self.common
153
154class PrettyPrinter(object):
155    def __init__(self):
156        self.stack = []
157
158    def run_nested(self, obj):
159        ex = obj._pp_ex(self)
160        self.stack.append(ex)
161
162    def run(self, obj):
163        self._result = obj._pp(self)
164        return self._result
165
166    def nested(self):
167        return sorted(set(self.stack))
168
169    def result(self):
170        return self._result;
171
172# }}}
173
174#{{{ symbols and version maps
175
176class Symbol(object):
177    def __init__(self, name, offset, version, lib):
178        self.name = name
179        self.offset = offset
180        self.version = version
181        self.lib = lib
182        self.definition = None
183
184    @property
185    def name_ver(self):
186        return self.name + '@' + self.version
187
188    def __repr__(self):
189        return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version)
190
191class CommonSymbol(object):
192    def __init__(self, origsym, newsym):
193        if origsym.name != newsym.name or origsym.version != newsym.version:
194            raise RuntimeError("Symbols have different names: %s",
195                    [origsym, newsym])
196        self.origsym = origsym
197        self.newsym = newsym
198        self.name = newsym.name
199        self.version = newsym.version
200
201    def __repr__(self):
202        return "CommonSymbol(%s, %s)" % (self.name, self.version)
203
204class SymbolAlias(object):
205    def __init__(self, alias, prefix, offset):
206        assert alias.startswith(prefix)
207        self.alias = alias
208        self.name = alias[len(prefix):]
209        self.offset = offset
210
211    def __repr__(self):
212        return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset)
213
214
215class VersionMap(object):
216    def __init__(self, name):
217        self.name = name
218        self.symbols = {}
219
220    def append(self, symbol):
221        if (self.symbols.has_key(symbol.name)):
222            raise ValueError("Symbol is already defined %s@%s" %
223                    (symbol.name, self.name))
224        self.symbols[symbol.name] = symbol
225
226    def names(self):
227        return self.symbols.keys()
228
229    def __repr__(self):
230        return repr(self.symbols.values())
231
232# }}}
233
234# {{{ types and definitions
235
236class Def(object):
237    _is_alias = False
238
239    def __init__(self, id, name, **kwargs):
240        self.id = id
241        self.name = name
242        self.attrs = kwargs
243
244    def __getattr__(self, attr):
245        if not self.attrs.has_key(attr):
246            raise AttributeError('%s in %s' % (attr, str(self)))
247        return self.attrs[attr]
248
249    def _name_opt(self, default=''):
250        if not self.name:
251            return default
252        return self.name
253
254    def _alias(self):
255        if self._is_alias:
256            return self.type._alias()
257        return self
258
259    def __cmp__(self, other):
260        # TODO assert 'self' and 'other' belong to different libraries
261        #print 'cmp defs: %s, %s' % (self, other)
262        a = self._alias()
263        try:
264            b = other._alias()
265        except AttributeError:
266            return 1
267        r = cmp(a.__class__, b.__class__)
268        if r == 0:
269            if a.id != 0 and b.id != 0:
270                ind = (long(a.id) << 32) + b.id
271                r = Dwarf.cmpcache.get(ind)
272                if r != None:
273                    return r
274            else:
275                ind = 0
276            r = cmp(a.attrs, b.attrs)
277            if ind != 0:
278                Dwarf.cmpcache.put(ind, r)
279        else:
280            r = 0
281            #raise RuntimeError('Comparing different classes: %s, %s' %
282            #        (a.__class__.__name__, b.__class__.__name__))
283        return r
284
285    def __repr__(self):
286        p = []
287        if hasattr(self, 'name'):
288            p.append("name=%s" % self.name)
289        for (k, v) in self.attrs.items():
290            if isinstance(v, Def):
291                v = v.__class__.__name__ + '(...)'
292            p.append("%s=%s" % (k, v))
293        return self.__class__.__name__ + '(' + ', '.join(p) + ')'
294
295    def _mapval(self, param, vals):
296        if param not in vals.keys():
297            raise NotImplementedError("Invalid value '%s': %s" %
298                    (param, str(self)))
299        return vals[param]
300
301    def _pp_ex(self, pp):
302        raise NotImplementedError('Extended pretty print not implemeted: %s' %
303                str(self))
304
305    def _pp(self, pp):
306        raise NotImplementedError('Pretty print not implemeted: %s' % str(self))
307
308class AnonymousDef(Def):
309    def __init__(self, id, **kwargs):
310        Def.__init__(self, id, None, **kwargs)
311
312class Void(AnonymousDef):
313    _instance = None
314
315    def __new__(cls, *args, **kwargs):
316        if not cls._instance:
317            cls._instance = super(Void, cls).__new__(
318                    cls, *args, **kwargs)
319        return cls._instance
320
321    def __init__(self):
322        AnonymousDef.__init__(self, 0)
323
324    def _pp(self, pp):
325        return "void"
326
327class VarArgs(AnonymousDef):
328    def _pp(self, pp):
329        return "..."
330
331class PointerDef(AnonymousDef):
332    def _pp(self, pp):
333        t = pp.run(self.type)
334        return "%s*" % (t,)
335
336class BaseTypeDef(Def):
337    inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char']
338    def _pp(self, pp):
339        if self.encoding in self.inttypes:
340            sign = '' if self.encoding == 'DW_ATE_signed' else 'u'
341            bits = int(self.byte_size) * 8
342            return '%sint%s_t' % (sign, bits)
343        elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size) == 1:
344            return 'char';
345        elif self.encoding == 'DW_ATE_float':
346            return self._mapval(self.byte_size, {
347                '16': 'long double',
348                '8': 'double',
349                '4': 'float',
350            })
351        raise NotImplementedError('Invalid encoding: %s' % self)
352
353class TypeAliasDef(Def):
354    _is_alias = True
355    def _pp(self, pp):
356        alias = self._alias()
357        # push typedef name
358        if self.name and not alias.name:
359            alias.name = 'T(%s)' % self.name
360        # return type with modifiers
361        return self.type._pp(pp)
362
363class EnumerationTypeDef(Def):
364    def _pp(self, pp):
365        return 'enum ' + self._name_opt('UNKNOWN')
366
367class ConstTypeDef(AnonymousDef):
368    _is_alias = True
369    def _pp(self, pp):
370        return 'const ' + self.type._pp(pp)
371
372class VolatileTypeDef(AnonymousDef):
373    _is_alias = True
374    def _pp(self, pp):
375        return 'volatile ' + self.type._pp(pp)
376
377class ArrayDef(AnonymousDef):
378    def _pp(self, pp):
379        t = pp.run(self.type)
380        assert len(self.subranges) == 1
381        try:
382            sz = int(self.subranges[0].upper_bound) + 1
383        except ValueError:
384            s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound)
385            sz = int(s) + 1
386        return '%s[%s]' % (t, sz)
387
388class ArraySubrangeDef(AnonymousDef):
389    pass
390
391class FunctionDef(Def):
392    def _pp(self, pp):
393        result = pp.run(self.result)
394        if not self.params:
395            params = "void"
396        else:
397            params = ', '.join([ pp.run(x) for x in self.params ])
398        return "%s %s(%s);" % (result, self.name, params)
399
400class FunctionTypeDef(Def):
401    def _pp(self, pp):
402        result = pp.run(self.result)
403        if not self.params:
404            params = "void"
405        else:
406            params = ', '.join([ pp.run(x) for x in self.params ])
407        return "F(%s, %s, (%s))" % (self._name_opt(), result, params)
408
409class ParameterDef(Def):
410    def _pp(self, pp):
411        t = pp.run(self.type)
412        return "%s %s" % (t, self._name_opt())
413
414# TODO
415class StructForwardDef(Def):
416    pass
417
418class IncompleteDef(Def):
419    def update(self, complete, cache=None):
420        self.complete = complete
421        complete.incomplete = self
422        if cache != None:
423            cached = cache.get(self.id)
424            if cached != None and isinstance(cached, IncompleteDef):
425                cache.replace(self.id, complete)
426
427class StructIncompleteDef(IncompleteDef):
428    def _pp(self, pp):
429        return "struct %s" % (self.name,)
430
431class UnionIncompleteDef(IncompleteDef):
432    def _pp(self, pp):
433        return "union %s" % (self.name,)
434
435class StructDef(Def):
436    def _pp_ex(self, pp, suffix=';'):
437        members = [ pp.run(x) for x in self.members ]
438        return "struct %s { %s }%s" % \
439                (self._name_opt(), ' '.join(members), suffix)
440    def _pp(self, pp):
441        if self.name:
442            pp.run_nested(self)
443            return "struct %s" % (self.name,)
444        else:
445            return self._pp_ex(pp, suffix='')
446
447class UnionDef(Def):
448    def _pp_ex(self, pp, suffix=';'):
449        members = [ pp.run(x) for x in self.members ]
450        return "union %s { %s }%s" % \
451                (self._name_opt(), ' '.join(members), suffix)
452    def _pp(self, pp):
453        if self.name:
454            pp.run_nested(self)
455            return "union %s" % (self.name,)
456        else:
457            return self._pp_ex(pp, suffix='')
458
459class MemberDef(Def):
460    def _pp(self, pp):
461        t = pp.run(self.type)
462        if self.bit_size:
463            bits = ":%s" % self.bit_size
464        else:
465            bits = ""
466        return "%s %s%s;" % (t, self._name_opt(), bits)
467
468class Dwarf(object):
469
470    cmpcache = Cache(enabled=Config.cmpcache_enabled)
471
472    def __init__(self, dump):
473        self.dump = dump
474
475    def _build_optarg_type(self, praw):
476        type = praw.optarg('type', Void())
477        if type != Void():
478            type = self.buildref(praw.unit, type)
479        return type
480
481    def build_subprogram(self, raw):
482        if raw.optname == None:
483            raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc'));
484        params = [ self.build(x) for x in raw.nested ]
485        result = self._build_optarg_type(raw)
486        return FunctionDef(raw.id, raw.name, params=params, result=result)
487
488    def build_subroutine_type(self, raw):
489        params = [ self.build(x) for x in raw.nested ]
490        result = self._build_optarg_type(raw)
491        return FunctionTypeDef(raw.id, raw.optname, params=params, result=result)
492
493    def build_formal_parameter(self, raw):
494        type = self._build_optarg_type(raw)
495        return ParameterDef(raw.id, raw.optname, type=type)
496
497    def build_pointer_type(self, raw):
498        type = self._build_optarg_type(raw)
499        return PointerDef(raw.id, type=type)
500
501    def build_member(self, raw):
502        type = self.buildref(raw.unit, raw.arg('type'))
503        return MemberDef(raw.id, raw.name, type=type,
504                bit_size=raw.optarg('bit_size', None))
505
506    def build_structure_type(self, raw):
507        incomplete = raw.unit.incomplete.get(raw.id)
508        if incomplete == None:
509            incomplete = StructIncompleteDef(raw.id, raw.optname)
510            raw.unit.incomplete.put(raw.id, incomplete)
511        else:
512            return incomplete
513        members = [ self.build(x) for x in raw.nested ]
514        byte_size = raw.optarg('byte_size', None)
515        if byte_size == None:
516            obj = StructForwardDef(raw.id, raw.name, members=members,
517                    forcename=raw.name)
518        obj = StructDef(raw.id, raw.optname, members=members,
519                byte_size=byte_size)
520        incomplete.update(obj, cache=raw.unit.cache)
521        return obj
522
523    def build_union_type(self, raw):
524        incomplete = raw.unit.incomplete.get(raw.id)
525        if incomplete == None:
526            incomplete = UnionIncompleteDef(raw.id, raw.optname)
527            raw.unit.incomplete.put(raw.id, incomplete)
528        else:
529            return incomplete
530        members = [ self.build(x) for x in raw.nested ]
531        byte_size = raw.optarg('byte_size', None)
532        obj = UnionDef(raw.id, raw.optname, members=members,
533                byte_size=byte_size)
534        obj.incomplete = incomplete
535        incomplete.complete = obj
536        return obj
537
538    def build_typedef(self, raw):
539        type = self._build_optarg_type(raw)
540        return TypeAliasDef(raw.id, raw.name, type=type)
541
542    def build_const_type(self, raw):
543        type = self._build_optarg_type(raw)
544        return ConstTypeDef(raw.id, type=type)
545
546    def build_volatile_type(self, raw):
547        type = self._build_optarg_type(raw)
548        return VolatileTypeDef(raw.id, type=type)
549
550    def build_enumeration_type(self, raw):
551        # TODO handle DW_TAG_enumerator ???
552        return EnumerationTypeDef(raw.id, name=raw.optname,
553                byte_size=raw.arg('byte_size'))
554
555    def build_base_type(self, raw):
556        return BaseTypeDef(raw.id, raw.optname,
557                byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding'))
558
559    def build_array_type(self, raw):
560        type = self.buildref(raw.unit, raw.arg('type'))
561        subranges = [ self.build(x) for x in raw.nested ]
562        return ArrayDef(raw.id, type=type, subranges=subranges)
563
564    def build_subrange_type(self, raw):
565        type = self.buildref(raw.unit, raw.arg('type'))
566        return ArraySubrangeDef(raw.id, type=type,
567                upper_bound=raw.optarg('upper_bound', 0))
568
569    def build_unspecified_parameters(self, raw):
570        return VarArgs(raw.id)
571
572    def _get_id(self, id):
573        try:
574            return int(id)
575        except ValueError:
576            if (id.startswith('<') and id.endswith('>')):
577                return int(id[1:-1])
578            else:
579                raise ValueError("Invalid dwarf id: %s" % id)
580
581    def build(self, raw):
582        obj = raw.unit.cache.get(raw.id)
583        if obj != None:
584            return obj
585        builder_name = raw.tag.replace('DW_TAG_', 'build_')
586        try:
587            builder = getattr(self, builder_name)
588        except AttributeError:
589            raise AttributeError("Unknown dwarf tag: %s" % raw)
590        obj = builder(raw)
591        raw.unit.cache.put(obj.id, obj)
592        return obj
593
594    def buildref(self, unit, id):
595        id = self._get_id(id)
596        raw = unit.tags[id]
597        obj = self.build(raw)
598        return obj
599
600# }}}
601
602class Shlib(object):
603    def __init__(self, libfile):
604        self.libfile = libfile
605        self.versions = {}
606        self.alias_syms = {}
607
608    def parse_objdump(self):
609        objdump = ObjdumpParser(self.libfile)
610        objdump.run()
611        for p in objdump.dynamic_symbols:
612            vername = p['ver']
613            if vername.startswith('(') and vername.endswith(')'):
614                vername = vername[1:-1]
615            if not Config.version_filter.match(vername):
616                continue
617            if not Config.symbol_filter.match(p['symbol']):
618                continue
619            sym = Symbol(p['symbol'], p['offset'], vername, self)
620            if not self.versions.has_key(vername):
621                self.versions[vername] = VersionMap(vername)
622            self.versions[vername].append(sym)
623        if Config.alias_prefixes:
624            self.local_offsetmap = objdump.local_offsetmap
625            for p in objdump.local_symbols:
626                for prefix in Config.alias_prefixes:
627                    if not p['symbol'].startswith(prefix):
628                        continue
629                    alias = SymbolAlias(p['symbol'], prefix, p['offset'])
630                    if self.alias_syms.has_key(alias.name):
631                        prevalias = self.alias_syms[alias.name]
632                        if alias.name != prevalias.name or \
633                                alias.offset != prevalias.offset:
634                            warn(Config.w_alias, "Symbol alias is " \
635                                    "already defined: %s: %s at %08x -- %s at %08x" % \
636                                    (alias.alias, alias.name,  alias.offset,
637                                            prevalias.name, prevalias.offset))
638                    self.alias_syms[alias.name] = alias
639
640    def parse_dwarfdump(self):
641        dwarfdump = DwarfdumpParser(self.libfile)
642        def lookup(sym):
643            raw = None
644            try:
645                raw = dwarfdump.offsetmap[sym.offset]
646            except:
647                try:
648                    localnames = self.local_offsetmap[sym.offset]
649                    localnames.sort(key=lambda x: -len(x))
650                    for localname in localnames:
651                        if not self.alias_syms.has_key(localname):
652                            continue
653                        alias = self.alias_syms[localname]
654                        raw = dwarfdump.offsetmap[alias.offset]
655                        break
656                except:
657                    pass
658            return raw
659        dwarfdump.run()
660        dwarf = Dwarf(dwarfdump)
661        for ver in self.versions.values():
662            for sym in ver.symbols.values():
663                raw = lookup(sym);
664                if not raw:
665                    warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \
666                            (sym.name_ver, self.libfile, sym.offset))
667                    continue
668                if Config.verbose >= 3:
669                    print "Parsing symbol %s (%s)" % (sym.name_ver, self.libfile)
670                sym.definition = dwarf.build(raw)
671
672    def parse(self):
673        if not os.path.isfile(self.libfile):
674            print >> sys.stderr, ("No such file: %s" % self.libfile)
675            sys.exit(1)
676        self.parse_objdump()
677        self.parse_dwarfdump()
678
679# {{{ parsers
680
681class Parser(object):
682    def __init__(self, proc):
683        self.proc = proc
684        self.parser = self.parse_begin
685
686    def run(self):
687        fd = os.popen(self.proc, 'r')
688        while True:
689            line = fd.readline()
690            if (not line):
691                break
692            line = line.strip()
693            if (line):
694                self.parser(line)
695        err = fd.close()
696        if err:
697            print >> sys.stderr, ("Execution failed: %s" % self.proc)
698            sys.exit(2)
699
700    def parse_begin(self, line):
701        print(line)
702
703class ObjdumpParser(Parser):
704
705    re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:')
706
707    re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)')
708    re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*')
709
710    re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)')
711
712    def __init__(self, libfile):
713        Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile))
714        self.dynamic_symbols = []
715        self.local_symbols = []
716        self.local_offsetmap = {}
717
718    def parse_begin(self, line):
719        self.parse_header(line)
720
721    def add_symbol(self, table, symbol, offsetmap = None):
722        offset = int(symbol['offset'], 16);
723        symbol['offset'] = offset
724        if (offset == 0):
725            return
726        table.append(symbol)
727        if offsetmap != None:
728            if not offsetmap.has_key(offset):
729                offsetmap[offset] = [symbol['symbol']]
730            else:
731                offsetmap[offset].append(symbol['symbol'])
732
733    def parse_header(self, line):
734        m = self.re_header.match(line)
735        if (m):
736            table = m.group('table')
737            if (table == "DYNAMIC"):
738                self.parser = self.parse_dynamic
739            elif table == '':
740                self.parser = self.parse_local
741            else:
742                raise ValueError("Invalid symbol table: %s" % table)
743            return True
744        return False
745
746    def parse_local(self, line):
747        if (self.parse_header(line)):
748            return
749        if (self.re_lame_symbol.match(line)):
750            return
751        m = self.re_local_symbol.match(line)
752        if (not m):
753            return
754            #raise ValueError("Invalid symbol definition: %s" % line)
755        p = m.groupdict()
756        if (p['symbol'] and p['symbol'].find('@') == -1):
757            self.add_symbol(self.local_symbols, p, self.local_offsetmap);
758
759    def parse_dynamic(self, line):
760        if (self.parse_header(line)):
761            return
762        if (self.re_lame_symbol.match(line)):
763            return
764        m = self.re_dynamic_symbol.match(line)
765        if (not m):
766            raise ValueError("Invalid symbol definition: %s" % line)
767        p = m.groupdict()
768        if (p['symbol'] and p['ver']):
769            self.add_symbol(self.dynamic_symbols, p);
770
771class DwarfdumpParser(Parser):
772
773    tagcache_stats = Cache.CacheStats()
774
775    class Unit(object):
776        def __init__(self):
777            self.cache = Cache(enabled=Config.dwarfcache_enabled,
778                    stats=DwarfdumpParser.tagcache_stats)
779            self.incomplete = Cache()
780            self.tags = {}
781
782    class Tag(object):
783        def __init__(self, unit, data):
784            self.unit = unit
785            self.id = int(data['id'])
786            self.level = int(data['level'])
787            self.tag = data['tag']
788            self.args = {}
789            self.nested = []
790
791        @property
792        def name(self):
793            return self.arg('name')
794
795        @property
796        def optname(self):
797            return self.optarg('name', None)
798
799        def setname(self, name):
800            self.args['DW_AT_name'] = name
801
802        def arg(self, a):
803            name = 'DW_AT_' + a
804            try:
805                return self.args[name]
806            except KeyError:
807                raise KeyError("Argument '%s' not found in %s: %s" %
808                        (name, self, self.args))
809
810        def optarg(self, a, default):
811            try:
812                return self.arg(a)
813            except KeyError:
814                return default
815
816        def __repr__(self):
817            return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag)
818
819    re_header = re.compile('<(?P<level>\d+)><(?P<id>\d+\+*\d*)><(?P<tag>\w+)>')
820    re_argname = re.compile('(?P<arg>\w+)<')
821    re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>')
822
823    skip_tags = set([
824        'DW_TAG_lexical_block',
825        'DW_TAG_inlined_subroutine',
826        'DW_TAG_label',
827        'DW_TAG_variable',
828        ])
829
830    def __init__(self, libfile):
831        Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile))
832        self.current_unit = None
833        self.offsetmap = {}
834        self.stack = []
835
836    def parse_begin(self, line):
837        if line == '.debug_info':
838            self.parser = self.parse_debuginfo
839        else:
840            raise ValueError("Invalid dwarfdump header: %s" % line)
841
842    def parse_argvalue(self, args):
843        assert args.startswith('<')
844        i = 1
845        cnt = 1
846        while i < len(args) and args[i]:
847            if args[i] == '<':
848                cnt += 1
849            elif args[i] == '>':
850                cnt -= 1
851                if cnt == 0:
852                    break
853            i = i + 1
854        value = args[1:i]
855        args = args[i+1:]
856        return (args, value)
857
858    def parse_arg(self, tag, args):
859        m = self.re_argname.match(args)
860        if not m:
861            m = self.re_argunknown.match(args)
862            if not m:
863                raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" %
864                        args)
865            args = args[len(m.group(0)):].lstrip()
866            return args
867        argname = m.group('arg')
868        args = args[len(argname):]
869        value = []
870        while len(args) > 0 and args.startswith('<'):
871            (args, v) = self.parse_argvalue(args)
872            value.append(v)
873        args = args.lstrip()
874        if len(value) == 1:
875            value = value[0]
876        tag.args[argname] = value
877        return args
878
879    def parse_debuginfo(self, line):
880        m = self.re_header.match(line)
881        if not m:
882            raise ValueError("Invalid dwarfdump: %s" % line)
883        if m.group('level') == '0':
884            self.current_unit = DwarfdumpParser.Unit()
885            return
886        tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict())
887        args = line[len(m.group(0)):].lstrip()
888        while args:
889            args = self.parse_arg(tag, args)
890        tag.unit.tags[tag.id] = tag
891        if tag.args.has_key('DW_AT_low_pc') and \
892                tag.tag not in DwarfdumpParser.skip_tags:
893            offset = int(tag.args['DW_AT_low_pc'], 16)
894            if self.offsetmap.has_key(offset):
895                raise ValueError("Dwarf dump parse error: " +
896                        "symbol is aleady defined at offset 0x%x" % offset)
897            self.offsetmap[offset] = tag
898        if len(self.stack) > 0:
899            prev = self.stack.pop()
900            while prev.level >= tag.level and len(self.stack) > 0:
901                prev = self.stack.pop()
902            if prev.level < tag.level:
903                assert prev.level == tag.level - 1
904                # TODO check DW_AT_sibling ???
905                if tag.tag not in DwarfdumpParser.skip_tags:
906                    prev.nested.append(tag)
907                self.stack.append(prev)
908        self.stack.append(tag)
909        assert len(self.stack) == tag.level
910
911# }}}
912
913def list_str(l):
914    l = [ str(x) for x in l ]
915    l.sort()
916    return ', '.join(l)
917
918def names_ver_str(vername, names):
919    return list_str([ x + "@" + vername for x in names ])
920
921def common_symbols(origlib, newlib):
922    result = []
923    verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys())
924    if Config.verbose >= 1:
925        print 'Original versions:   ', list_str(verdiff.orig)
926        print 'New versions:        ', list_str(verdiff.new)
927    for vername in verdiff.added:
928        print 'Added version:       ', vername
929        print '    Added symbols:   ', \
930                names_ver_str(vername, newlib.versions[vername].names())
931    for vername in verdiff.removed:
932        print 'Removed version:     ', vername
933        print '    Removed symbols: ', \
934                names_ver_str(vername, origlib.versions[vername].names())
935    added = []
936    removed = []
937    for vername in verdiff.common:
938        origver = origlib.versions[vername]
939        newver = newlib.versions[vername]
940        namediff = ListDiff(origver.names(), newver.names())
941        if namediff.added:
942            added.append(names_ver_str(vername, namediff.added))
943        if namediff.removed:
944            removed.append(names_ver_str(vername, namediff.removed))
945        commonver = VersionMap(vername)
946        result.append(commonver)
947        for n in namediff.common:
948            sym = CommonSymbol(origver.symbols[n], newver.symbols[n])
949            commonver.append(sym)
950    if added:
951        print 'Added symbols:'
952        for i in added:
953            print '    ', i
954    if removed:
955        print 'Removed symbols:'
956        for i in removed:
957            print '    ', i
958    return result
959
960def cmp_symbols(commonver):
961    for ver in commonver:
962        names = ver.names();
963        names.sort()
964        for symname in names:
965            sym = ver.symbols[symname]
966            match = sym.origsym.definition == sym.newsym.definition
967            if not match:
968                App.result_code = 1
969            if Config.verbose >= 1 or not match:
970                print '%s: definitions %smatch' % \
971                        (sym.origsym.name_ver, "" if match else "mis")
972                if Config.dump or (not match and not Config.no_dump):
973                    for x in [(sym.origsym, Config.origfile),
974                            (sym.newsym, Config.newfile)]:
975                        xsym = x[0]
976                        xout = x[1].out
977                        if not xsym.definition:
978                            print >> xout, '\n// Definition not found: %s %s' % \
979                                    (xsym.name_ver, xsym.lib.libfile)
980                            continue
981                        print >> xout, '\n// Definitions mismatch: %s %s' % \
982                                (xsym.name_ver, xsym.lib.libfile)
983                        pp = PrettyPrinter()
984                        pp.run(xsym.definition)
985                        for i in pp.nested():
986                            print >> xout, i
987                        print >> xout, pp.result()
988
989def dump_symbols(commonver):
990    class SymbolDump(object):
991        def __init__(self, io_conf):
992            self.io_conf = io_conf
993            self.pp = PrettyPrinter()
994            self.res = []
995        def run(self, sym):
996            r = self.pp.run(sym.definition)
997            self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r))
998        def finish(self):
999            print >> self.io_conf.out, '\n// Symbol dump: version %s, library %s' % \
1000                    (ver.name, self.io_conf.filename)
1001            for i in self.pp.nested():
1002                print >> self.io_conf.out, i
1003            print >> self.io_conf.out, ''
1004            for i in self.res:
1005                print >> self.io_conf.out, i
1006    for ver in commonver:
1007        names = sorted(ver.names());
1008        d_orig = SymbolDump(Config.origfile)
1009        d_new = SymbolDump(Config.newfile)
1010        for symname in names:
1011            sym = ver.symbols[symname]
1012            if not sym.origsym.definition or not sym.newsym.definition:
1013                # XXX
1014                warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \
1015                        (symname, ver.name))
1016                continue
1017            d_orig.run(sym.origsym)
1018            d_new.run(sym.newsym)
1019        d_orig.finish()
1020        d_new.finish()
1021
1022if __name__ == '__main__':
1023    Config.init()
1024    parser = optparse.OptionParser(usage="usage: %prog origlib newlib",
1025            version="%prog " + Config.version)
1026    parser.add_option('-v', '--verbose', action='count',
1027            help="verbose mode, may be specified several times")
1028    parser.add_option('--alias-prefix', action='append',
1029            help="name prefix to try for symbol alias lookup", metavar="STR")
1030    parser.add_option('--dump', action='store_true',
1031            help="dump symbol definitions")
1032    parser.add_option('--no-dump', action='store_true',
1033            help="disable dump for mismatched symbols")
1034    parser.add_option('--out-orig', action='store',
1035            help="result output file for original library", metavar="ORIGFILE")
1036    parser.add_option('--out-new', action='store',
1037            help="result output file for new library", metavar="NEWFILE")
1038    parser.add_option('--exclude-ver', action='append', metavar="RE")
1039    parser.add_option('--include-ver', action='append', metavar="RE")
1040    parser.add_option('--exclude-sym', action='append', metavar="RE")
1041    parser.add_option('--include-sym', action='append', metavar="RE")
1042    for opt in ['alias', 'cached', 'symbol']:
1043        parser.add_option("--w-" + opt,
1044                action="store_true", dest="w_" + opt)
1045        parser.add_option("--w-no-" + opt,
1046                action="store_false", dest="w_" + opt)
1047    (opts, args) = parser.parse_args()
1048
1049    if len(args) != 2:
1050        parser.print_help()
1051        sys.exit(-1)
1052    if opts.out_orig:
1053        Config.origfile.init(opts.out_orig)
1054    if opts.out_new:
1055        Config.newfile.init(opts.out_new)
1056    if opts.no_dump:
1057        Config.dump = False
1058        Config.no_dump = True
1059    if opts.dump:
1060        Config.dump = True
1061        Config.no_dump = False
1062        Config.verbose = 1
1063    if opts.verbose:
1064        Config.verbose = opts.verbose
1065    if opts.alias_prefix:
1066        Config.alias_prefixes = opts.alias_prefix
1067        Config.alias_prefixes.sort(key=lambda x: -len(x))
1068    for (k, v) in ({ '_sym': Config.symbol_filter,
1069            '_ver': Config.version_filter }).items():
1070        for a in [ 'exclude', 'include' ]:
1071            opt = getattr(opts, a + k)
1072            if opt:
1073                getattr(v, a).extend(opt)
1074    Config.version_filter.compile()
1075    Config.symbol_filter.compile()
1076    for w in ['w_alias', 'w_cached', 'w_symbol']:
1077        if hasattr(opts, w):
1078            v = getattr(opts, w)
1079            if v != None:
1080                setattr(Config, w, v)
1081
1082    (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1])
1083
1084    origlib = Shlib(Config.origfile.filename)
1085    origlib.parse()
1086    newlib = Shlib(Config.newfile.filename)
1087    newlib.parse()
1088
1089    commonver = common_symbols(origlib, newlib)
1090    if Config.dump:
1091        dump_symbols(commonver)
1092    cmp_symbols(commonver)
1093    if Config.verbose >= 4:
1094        print Dwarf.cmpcache.stats.show('Cmp')
1095        print DwarfdumpParser.tagcache_stats.show('Dwarf tag')
1096
1097    sys.exit(App.result_code)
1098