1#!/usr/bin/env python 2#- 3# Copyright (c) 2010 Gleb Kurtsou 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions 8# are met: 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25# SUCH DAMAGE. 26# 27# $FreeBSD$ 28 29import os 30import sys 31import re 32import optparse 33 34class Config(object): 35 version = '0.1' 36 # controlled by user 37 verbose = 0 38 dump = False 39 no_dump = False 40 version_filter = None 41 symbol_filter = None 42 alias_prefixes = [] 43 # misc opts 44 objdump = 'objdump' 45 dwarfdump = 'dwarfdump' 46 # debug 47 cmpcache_enabled = True 48 dwarfcache_enabled = True 49 w_alias = True 50 w_cached = False 51 w_symbol = True 52 53 class FileConfig(object): 54 filename = None 55 out = sys.stdout 56 def init(self, outname): 57 if outname and outname != '-': 58 self.out = open(outname, "w") 59 60 origfile = FileConfig() 61 newfile = FileConfig() 62 63 @classmethod 64 def init(cls): 65 cls.version_filter = StrFilter() 66 cls.symbol_filter = StrFilter() 67 68class App(object): 69 result_code = 0 70 71def warn(cond, msg): 72 if cond: 73 print >> sys.stderr, "WARN: " + msg 74 75# {{{ misc 76 77class StrFilter(object): 78 def __init__(self): 79 self.exclude = [] 80 self.include = [] 81 82 def compile(self): 83 self.re_exclude = [ re.compile(x) for x in self.exclude ] 84 self.re_include = [ re.compile(x) for x in self.include ] 85 86 def match(self, s): 87 if len(self.re_include): 88 matched = False 89 for r in self.re_include: 90 if r.match(s): 91 matched = True 92 break 93 if not matched: 94 return False 95 for r in self.re_exclude: 96 if r.match(s): 97 return False 98 return True 99 100class Cache(object): 101 102 class CacheStats(object): 103 def __init__(self): 104 self.hit = 0 105 self.miss = 0 106 107 def show(self, name): 108 total = self.hit + self.miss 109 if total == 0: 110 ratio = '(undef)' 111 else: 112 ratio = '%f' % (self.hit/float(total)) 113 return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \ 114 (name, self.hit, self.miss, ratio) 115 116 def __init__(self, enabled=True, stats=None): 117 self.enabled = enabled 118 self.items = {} 119 if stats == None: 120 self.stats = Cache.CacheStats() 121 else: 122 self.stats = stats 123 124 def get(self, id): 125 if self.enabled and self.items.has_key(id): 126 self.stats.hit += 1 127 return self.items[id] 128 else: 129 self.stats.miss += 1 130 return None 131 132 def put(self, id, obj): 133 if self.enabled: 134 if self.items.has_key(id) and obj is not self.items[id]: 135 #raise ValueError("Item is already cached: %d (%s, %s)" % 136 # (id, self.items[id], obj)) 137 warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \ 138 (id, self.items[id], obj)) 139 self.items[id] = obj 140 141 def replace(self, id, obj): 142 if self.enabled: 143 assert self.items.has_key(id) 144 self.items[id] = obj 145 146class ListDiff(object): 147 def __init__(self, orig, new): 148 self.orig = set(orig) 149 self.new = set(new) 150 self.common = self.orig & self.new 151 self.added = self.new - self.common 152 self.removed = self.orig - self.common 153 154class PrettyPrinter(object): 155 def __init__(self): 156 self.stack = [] 157 158 def run_nested(self, obj): 159 ex = obj._pp_ex(self) 160 self.stack.append(ex) 161 162 def run(self, obj): 163 self._result = obj._pp(self) 164 return self._result 165 166 def nested(self): 167 return sorted(set(self.stack)) 168 169 def result(self): 170 return self._result; 171 172# }}} 173 174#{{{ symbols and version maps 175 176class Symbol(object): 177 def __init__(self, name, offset, version, lib): 178 self.name = name 179 self.offset = offset 180 self.version = version 181 self.lib = lib 182 self.definition = None 183 184 @property 185 def name_ver(self): 186 return self.name + '@' + self.version 187 188 def __repr__(self): 189 return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version) 190 191class CommonSymbol(object): 192 def __init__(self, origsym, newsym): 193 if origsym.name != newsym.name or origsym.version != newsym.version: 194 raise RuntimeError("Symbols have different names: %s", 195 [origsym, newsym]) 196 self.origsym = origsym 197 self.newsym = newsym 198 self.name = newsym.name 199 self.version = newsym.version 200 201 def __repr__(self): 202 return "CommonSymbol(%s, %s)" % (self.name, self.version) 203 204class SymbolAlias(object): 205 def __init__(self, alias, prefix, offset): 206 assert alias.startswith(prefix) 207 self.alias = alias 208 self.name = alias[len(prefix):] 209 self.offset = offset 210 211 def __repr__(self): 212 return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset) 213 214 215class VersionMap(object): 216 def __init__(self, name): 217 self.name = name 218 self.symbols = {} 219 220 def append(self, symbol): 221 if (self.symbols.has_key(symbol.name)): 222 raise ValueError("Symbol is already defined %s@%s" % 223 (symbol.name, self.name)) 224 self.symbols[symbol.name] = symbol 225 226 def names(self): 227 return self.symbols.keys() 228 229 def __repr__(self): 230 return repr(self.symbols.values()) 231 232# }}} 233 234# {{{ types and definitions 235 236class Def(object): 237 _is_alias = False 238 239 def __init__(self, id, name, **kwargs): 240 self.id = id 241 self.name = name 242 self.attrs = kwargs 243 244 def __getattr__(self, attr): 245 if not self.attrs.has_key(attr): 246 raise AttributeError('%s in %s' % (attr, str(self))) 247 return self.attrs[attr] 248 249 def _name_opt(self, default=''): 250 if not self.name: 251 return default 252 return self.name 253 254 def _alias(self): 255 if self._is_alias: 256 return self.type._alias() 257 return self 258 259 def __cmp__(self, other): 260 # TODO assert 'self' and 'other' belong to different libraries 261 #print 'cmp defs: %s, %s' % (self, other) 262 a = self._alias() 263 try: 264 b = other._alias() 265 except AttributeError: 266 return 1 267 r = cmp(a.__class__, b.__class__) 268 if r == 0: 269 if a.id != 0 and b.id != 0: 270 ind = (long(a.id) << 32) + b.id 271 r = Dwarf.cmpcache.get(ind) 272 if r != None: 273 return r 274 else: 275 ind = 0 276 r = cmp(a.attrs, b.attrs) 277 if ind != 0: 278 Dwarf.cmpcache.put(ind, r) 279 else: 280 r = 0 281 #raise RuntimeError('Comparing different classes: %s, %s' % 282 # (a.__class__.__name__, b.__class__.__name__)) 283 return r 284 285 def __repr__(self): 286 p = [] 287 if hasattr(self, 'name'): 288 p.append("name=%s" % self.name) 289 for (k, v) in self.attrs.items(): 290 if isinstance(v, Def): 291 v = v.__class__.__name__ + '(...)' 292 p.append("%s=%s" % (k, v)) 293 return self.__class__.__name__ + '(' + ', '.join(p) + ')' 294 295 def _mapval(self, param, vals): 296 if param not in vals.keys(): 297 raise NotImplementedError("Invalid value '%s': %s" % 298 (param, str(self))) 299 return vals[param] 300 301 def _pp_ex(self, pp): 302 raise NotImplementedError('Extended pretty print not implemeted: %s' % 303 str(self)) 304 305 def _pp(self, pp): 306 raise NotImplementedError('Pretty print not implemeted: %s' % str(self)) 307 308class AnonymousDef(Def): 309 def __init__(self, id, **kwargs): 310 Def.__init__(self, id, None, **kwargs) 311 312class Void(AnonymousDef): 313 _instance = None 314 315 def __new__(cls, *args, **kwargs): 316 if not cls._instance: 317 cls._instance = super(Void, cls).__new__( 318 cls, *args, **kwargs) 319 return cls._instance 320 321 def __init__(self): 322 AnonymousDef.__init__(self, 0) 323 324 def _pp(self, pp): 325 return "void" 326 327class VarArgs(AnonymousDef): 328 def _pp(self, pp): 329 return "..." 330 331class PointerDef(AnonymousDef): 332 def _pp(self, pp): 333 t = pp.run(self.type) 334 return "%s*" % (t,) 335 336class BaseTypeDef(Def): 337 inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char'] 338 def _pp(self, pp): 339 if self.encoding in self.inttypes: 340 sign = '' if self.encoding == 'DW_ATE_signed' else 'u' 341 bits = int(self.byte_size) * 8 342 return '%sint%s_t' % (sign, bits) 343 elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size) == 1: 344 return 'char'; 345 elif self.encoding == 'DW_ATE_float': 346 return self._mapval(self.byte_size, { 347 '16': 'long double', 348 '8': 'double', 349 '4': 'float', 350 }) 351 raise NotImplementedError('Invalid encoding: %s' % self) 352 353class TypeAliasDef(Def): 354 _is_alias = True 355 def _pp(self, pp): 356 alias = self._alias() 357 # push typedef name 358 if self.name and not alias.name: 359 alias.name = 'T(%s)' % self.name 360 # return type with modifiers 361 return self.type._pp(pp) 362 363class EnumerationTypeDef(Def): 364 def _pp(self, pp): 365 return 'enum ' + self._name_opt('UNKNOWN') 366 367class ConstTypeDef(AnonymousDef): 368 _is_alias = True 369 def _pp(self, pp): 370 return 'const ' + self.type._pp(pp) 371 372class VolatileTypeDef(AnonymousDef): 373 _is_alias = True 374 def _pp(self, pp): 375 return 'volatile ' + self.type._pp(pp) 376 377class ArrayDef(AnonymousDef): 378 def _pp(self, pp): 379 t = pp.run(self.type) 380 assert len(self.subranges) == 1 381 try: 382 sz = int(self.subranges[0].upper_bound) + 1 383 except ValueError: 384 s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound) 385 sz = int(s) + 1 386 return '%s[%s]' % (t, sz) 387 388class ArraySubrangeDef(AnonymousDef): 389 pass 390 391class FunctionDef(Def): 392 def _pp(self, pp): 393 result = pp.run(self.result) 394 if not self.params: 395 params = "void" 396 else: 397 params = ', '.join([ pp.run(x) for x in self.params ]) 398 return "%s %s(%s);" % (result, self.name, params) 399 400class FunctionTypeDef(Def): 401 def _pp(self, pp): 402 result = pp.run(self.result) 403 if not self.params: 404 params = "void" 405 else: 406 params = ', '.join([ pp.run(x) for x in self.params ]) 407 return "F(%s, %s, (%s))" % (self._name_opt(), result, params) 408 409class ParameterDef(Def): 410 def _pp(self, pp): 411 t = pp.run(self.type) 412 return "%s %s" % (t, self._name_opt()) 413 414# TODO 415class StructForwardDef(Def): 416 pass 417 418class IncompleteDef(Def): 419 def update(self, complete, cache=None): 420 self.complete = complete 421 complete.incomplete = self 422 if cache != None: 423 cached = cache.get(self.id) 424 if cached != None and isinstance(cached, IncompleteDef): 425 cache.replace(self.id, complete) 426 427class StructIncompleteDef(IncompleteDef): 428 def _pp(self, pp): 429 return "struct %s" % (self.name,) 430 431class UnionIncompleteDef(IncompleteDef): 432 def _pp(self, pp): 433 return "union %s" % (self.name,) 434 435class StructDef(Def): 436 def _pp_ex(self, pp, suffix=';'): 437 members = [ pp.run(x) for x in self.members ] 438 return "struct %s { %s }%s" % \ 439 (self._name_opt(), ' '.join(members), suffix) 440 def _pp(self, pp): 441 if self.name: 442 pp.run_nested(self) 443 return "struct %s" % (self.name,) 444 else: 445 return self._pp_ex(pp, suffix='') 446 447class UnionDef(Def): 448 def _pp_ex(self, pp, suffix=';'): 449 members = [ pp.run(x) for x in self.members ] 450 return "union %s { %s }%s" % \ 451 (self._name_opt(), ' '.join(members), suffix) 452 def _pp(self, pp): 453 if self.name: 454 pp.run_nested(self) 455 return "union %s" % (self.name,) 456 else: 457 return self._pp_ex(pp, suffix='') 458 459class MemberDef(Def): 460 def _pp(self, pp): 461 t = pp.run(self.type) 462 if self.bit_size: 463 bits = ":%s" % self.bit_size 464 else: 465 bits = "" 466 return "%s %s%s;" % (t, self._name_opt(), bits) 467 468class Dwarf(object): 469 470 cmpcache = Cache(enabled=Config.cmpcache_enabled) 471 472 def __init__(self, dump): 473 self.dump = dump 474 475 def _build_optarg_type(self, praw): 476 type = praw.optarg('type', Void()) 477 if type != Void(): 478 type = self.buildref(praw.unit, type) 479 return type 480 481 def build_subprogram(self, raw): 482 if raw.optname == None: 483 raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc')); 484 params = [ self.build(x) for x in raw.nested ] 485 result = self._build_optarg_type(raw) 486 return FunctionDef(raw.id, raw.name, params=params, result=result) 487 488 def build_subroutine_type(self, raw): 489 params = [ self.build(x) for x in raw.nested ] 490 result = self._build_optarg_type(raw) 491 return FunctionTypeDef(raw.id, raw.optname, params=params, result=result) 492 493 def build_formal_parameter(self, raw): 494 type = self._build_optarg_type(raw) 495 return ParameterDef(raw.id, raw.optname, type=type) 496 497 def build_pointer_type(self, raw): 498 type = self._build_optarg_type(raw) 499 return PointerDef(raw.id, type=type) 500 501 def build_member(self, raw): 502 type = self.buildref(raw.unit, raw.arg('type')) 503 return MemberDef(raw.id, raw.name, type=type, 504 bit_size=raw.optarg('bit_size', None)) 505 506 def build_structure_type(self, raw): 507 incomplete = raw.unit.incomplete.get(raw.id) 508 if incomplete == None: 509 incomplete = StructIncompleteDef(raw.id, raw.optname) 510 raw.unit.incomplete.put(raw.id, incomplete) 511 else: 512 return incomplete 513 members = [ self.build(x) for x in raw.nested ] 514 byte_size = raw.optarg('byte_size', None) 515 if byte_size == None: 516 obj = StructForwardDef(raw.id, raw.name, members=members, 517 forcename=raw.name) 518 obj = StructDef(raw.id, raw.optname, members=members, 519 byte_size=byte_size) 520 incomplete.update(obj, cache=raw.unit.cache) 521 return obj 522 523 def build_union_type(self, raw): 524 incomplete = raw.unit.incomplete.get(raw.id) 525 if incomplete == None: 526 incomplete = UnionIncompleteDef(raw.id, raw.optname) 527 raw.unit.incomplete.put(raw.id, incomplete) 528 else: 529 return incomplete 530 members = [ self.build(x) for x in raw.nested ] 531 byte_size = raw.optarg('byte_size', None) 532 obj = UnionDef(raw.id, raw.optname, members=members, 533 byte_size=byte_size) 534 obj.incomplete = incomplete 535 incomplete.complete = obj 536 return obj 537 538 def build_typedef(self, raw): 539 type = self._build_optarg_type(raw) 540 return TypeAliasDef(raw.id, raw.name, type=type) 541 542 def build_const_type(self, raw): 543 type = self._build_optarg_type(raw) 544 return ConstTypeDef(raw.id, type=type) 545 546 def build_volatile_type(self, raw): 547 type = self._build_optarg_type(raw) 548 return VolatileTypeDef(raw.id, type=type) 549 550 def build_enumeration_type(self, raw): 551 # TODO handle DW_TAG_enumerator ??? 552 return EnumerationTypeDef(raw.id, name=raw.optname, 553 byte_size=raw.arg('byte_size')) 554 555 def build_base_type(self, raw): 556 return BaseTypeDef(raw.id, raw.optname, 557 byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding')) 558 559 def build_array_type(self, raw): 560 type = self.buildref(raw.unit, raw.arg('type')) 561 subranges = [ self.build(x) for x in raw.nested ] 562 return ArrayDef(raw.id, type=type, subranges=subranges) 563 564 def build_subrange_type(self, raw): 565 type = self.buildref(raw.unit, raw.arg('type')) 566 return ArraySubrangeDef(raw.id, type=type, 567 upper_bound=raw.optarg('upper_bound', 0)) 568 569 def build_unspecified_parameters(self, raw): 570 return VarArgs(raw.id) 571 572 def _get_id(self, id): 573 try: 574 return int(id) 575 except ValueError: 576 if (id.startswith('<') and id.endswith('>')): 577 return int(id[1:-1]) 578 else: 579 raise ValueError("Invalid dwarf id: %s" % id) 580 581 def build(self, raw): 582 obj = raw.unit.cache.get(raw.id) 583 if obj != None: 584 return obj 585 builder_name = raw.tag.replace('DW_TAG_', 'build_') 586 try: 587 builder = getattr(self, builder_name) 588 except AttributeError: 589 raise AttributeError("Unknown dwarf tag: %s" % raw) 590 obj = builder(raw) 591 raw.unit.cache.put(obj.id, obj) 592 return obj 593 594 def buildref(self, unit, id): 595 id = self._get_id(id) 596 raw = unit.tags[id] 597 obj = self.build(raw) 598 return obj 599 600# }}} 601 602class Shlib(object): 603 def __init__(self, libfile): 604 self.libfile = libfile 605 self.versions = {} 606 self.alias_syms = {} 607 608 def parse_objdump(self): 609 objdump = ObjdumpParser(self.libfile) 610 objdump.run() 611 for p in objdump.dynamic_symbols: 612 vername = p['ver'] 613 if vername.startswith('(') and vername.endswith(')'): 614 vername = vername[1:-1] 615 if not Config.version_filter.match(vername): 616 continue 617 if not Config.symbol_filter.match(p['symbol']): 618 continue 619 sym = Symbol(p['symbol'], p['offset'], vername, self) 620 if not self.versions.has_key(vername): 621 self.versions[vername] = VersionMap(vername) 622 self.versions[vername].append(sym) 623 if Config.alias_prefixes: 624 self.local_offsetmap = objdump.local_offsetmap 625 for p in objdump.local_symbols: 626 for prefix in Config.alias_prefixes: 627 if not p['symbol'].startswith(prefix): 628 continue 629 alias = SymbolAlias(p['symbol'], prefix, p['offset']) 630 if self.alias_syms.has_key(alias.name): 631 prevalias = self.alias_syms[alias.name] 632 if alias.name != prevalias.name or \ 633 alias.offset != prevalias.offset: 634 warn(Config.w_alias, "Symbol alias is " \ 635 "already defined: %s: %s at %08x -- %s at %08x" % \ 636 (alias.alias, alias.name, alias.offset, 637 prevalias.name, prevalias.offset)) 638 self.alias_syms[alias.name] = alias 639 640 def parse_dwarfdump(self): 641 dwarfdump = DwarfdumpParser(self.libfile) 642 def lookup(sym): 643 raw = None 644 try: 645 raw = dwarfdump.offsetmap[sym.offset] 646 except: 647 try: 648 localnames = self.local_offsetmap[sym.offset] 649 localnames.sort(key=lambda x: -len(x)) 650 for localname in localnames: 651 if not self.alias_syms.has_key(localname): 652 continue 653 alias = self.alias_syms[localname] 654 raw = dwarfdump.offsetmap[alias.offset] 655 break 656 except: 657 pass 658 return raw 659 dwarfdump.run() 660 dwarf = Dwarf(dwarfdump) 661 for ver in self.versions.values(): 662 for sym in ver.symbols.values(): 663 raw = lookup(sym); 664 if not raw: 665 warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \ 666 (sym.name_ver, self.libfile, sym.offset)) 667 continue 668 if Config.verbose >= 3: 669 print "Parsing symbol %s (%s)" % (sym.name_ver, self.libfile) 670 sym.definition = dwarf.build(raw) 671 672 def parse(self): 673 if not os.path.isfile(self.libfile): 674 print >> sys.stderr, ("No such file: %s" % self.libfile) 675 sys.exit(1) 676 self.parse_objdump() 677 self.parse_dwarfdump() 678 679# {{{ parsers 680 681class Parser(object): 682 def __init__(self, proc): 683 self.proc = proc 684 self.parser = self.parse_begin 685 686 def run(self): 687 fd = os.popen(self.proc, 'r') 688 while True: 689 line = fd.readline() 690 if (not line): 691 break 692 line = line.strip() 693 if (line): 694 self.parser(line) 695 err = fd.close() 696 if err: 697 print >> sys.stderr, ("Execution failed: %s" % self.proc) 698 sys.exit(2) 699 700 def parse_begin(self, line): 701 print(line) 702 703class ObjdumpParser(Parser): 704 705 re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:') 706 707 re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)') 708 re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*') 709 710 re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)') 711 712 def __init__(self, libfile): 713 Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile)) 714 self.dynamic_symbols = [] 715 self.local_symbols = [] 716 self.local_offsetmap = {} 717 718 def parse_begin(self, line): 719 self.parse_header(line) 720 721 def add_symbol(self, table, symbol, offsetmap = None): 722 offset = int(symbol['offset'], 16); 723 symbol['offset'] = offset 724 if (offset == 0): 725 return 726 table.append(symbol) 727 if offsetmap != None: 728 if not offsetmap.has_key(offset): 729 offsetmap[offset] = [symbol['symbol']] 730 else: 731 offsetmap[offset].append(symbol['symbol']) 732 733 def parse_header(self, line): 734 m = self.re_header.match(line) 735 if (m): 736 table = m.group('table') 737 if (table == "DYNAMIC"): 738 self.parser = self.parse_dynamic 739 elif table == '': 740 self.parser = self.parse_local 741 else: 742 raise ValueError("Invalid symbol table: %s" % table) 743 return True 744 return False 745 746 def parse_local(self, line): 747 if (self.parse_header(line)): 748 return 749 if (self.re_lame_symbol.match(line)): 750 return 751 m = self.re_local_symbol.match(line) 752 if (not m): 753 return 754 #raise ValueError("Invalid symbol definition: %s" % line) 755 p = m.groupdict() 756 if (p['symbol'] and p['symbol'].find('@') == -1): 757 self.add_symbol(self.local_symbols, p, self.local_offsetmap); 758 759 def parse_dynamic(self, line): 760 if (self.parse_header(line)): 761 return 762 if (self.re_lame_symbol.match(line)): 763 return 764 m = self.re_dynamic_symbol.match(line) 765 if (not m): 766 raise ValueError("Invalid symbol definition: %s" % line) 767 p = m.groupdict() 768 if (p['symbol'] and p['ver']): 769 self.add_symbol(self.dynamic_symbols, p); 770 771class DwarfdumpParser(Parser): 772 773 tagcache_stats = Cache.CacheStats() 774 775 class Unit(object): 776 def __init__(self): 777 self.cache = Cache(enabled=Config.dwarfcache_enabled, 778 stats=DwarfdumpParser.tagcache_stats) 779 self.incomplete = Cache() 780 self.tags = {} 781 782 class Tag(object): 783 def __init__(self, unit, data): 784 self.unit = unit 785 self.id = int(data['id']) 786 self.level = int(data['level']) 787 self.tag = data['tag'] 788 self.args = {} 789 self.nested = [] 790 791 @property 792 def name(self): 793 return self.arg('name') 794 795 @property 796 def optname(self): 797 return self.optarg('name', None) 798 799 def setname(self, name): 800 self.args['DW_AT_name'] = name 801 802 def arg(self, a): 803 name = 'DW_AT_' + a 804 try: 805 return self.args[name] 806 except KeyError: 807 raise KeyError("Argument '%s' not found in %s: %s" % 808 (name, self, self.args)) 809 810 def optarg(self, a, default): 811 try: 812 return self.arg(a) 813 except KeyError: 814 return default 815 816 def __repr__(self): 817 return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag) 818 819 re_header = re.compile('<(?P<level>\d+)><(?P<id>\d+\+*\d*)><(?P<tag>\w+)>') 820 re_argname = re.compile('(?P<arg>\w+)<') 821 re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>') 822 823 skip_tags = set([ 824 'DW_TAG_lexical_block', 825 'DW_TAG_inlined_subroutine', 826 'DW_TAG_label', 827 'DW_TAG_variable', 828 ]) 829 830 def __init__(self, libfile): 831 Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile)) 832 self.current_unit = None 833 self.offsetmap = {} 834 self.stack = [] 835 836 def parse_begin(self, line): 837 if line == '.debug_info': 838 self.parser = self.parse_debuginfo 839 else: 840 raise ValueError("Invalid dwarfdump header: %s" % line) 841 842 def parse_argvalue(self, args): 843 assert args.startswith('<') 844 i = 1 845 cnt = 1 846 while i < len(args) and args[i]: 847 if args[i] == '<': 848 cnt += 1 849 elif args[i] == '>': 850 cnt -= 1 851 if cnt == 0: 852 break 853 i = i + 1 854 value = args[1:i] 855 args = args[i+1:] 856 return (args, value) 857 858 def parse_arg(self, tag, args): 859 m = self.re_argname.match(args) 860 if not m: 861 m = self.re_argunknown.match(args) 862 if not m: 863 raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" % 864 args) 865 args = args[len(m.group(0)):].lstrip() 866 return args 867 argname = m.group('arg') 868 args = args[len(argname):] 869 value = [] 870 while len(args) > 0 and args.startswith('<'): 871 (args, v) = self.parse_argvalue(args) 872 value.append(v) 873 args = args.lstrip() 874 if len(value) == 1: 875 value = value[0] 876 tag.args[argname] = value 877 return args 878 879 def parse_debuginfo(self, line): 880 m = self.re_header.match(line) 881 if not m: 882 raise ValueError("Invalid dwarfdump: %s" % line) 883 if m.group('level') == '0': 884 self.current_unit = DwarfdumpParser.Unit() 885 return 886 tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict()) 887 args = line[len(m.group(0)):].lstrip() 888 while args: 889 args = self.parse_arg(tag, args) 890 tag.unit.tags[tag.id] = tag 891 if tag.args.has_key('DW_AT_low_pc') and \ 892 tag.tag not in DwarfdumpParser.skip_tags: 893 offset = int(tag.args['DW_AT_low_pc'], 16) 894 if self.offsetmap.has_key(offset): 895 raise ValueError("Dwarf dump parse error: " + 896 "symbol is aleady defined at offset 0x%x" % offset) 897 self.offsetmap[offset] = tag 898 if len(self.stack) > 0: 899 prev = self.stack.pop() 900 while prev.level >= tag.level and len(self.stack) > 0: 901 prev = self.stack.pop() 902 if prev.level < tag.level: 903 assert prev.level == tag.level - 1 904 # TODO check DW_AT_sibling ??? 905 if tag.tag not in DwarfdumpParser.skip_tags: 906 prev.nested.append(tag) 907 self.stack.append(prev) 908 self.stack.append(tag) 909 assert len(self.stack) == tag.level 910 911# }}} 912 913def list_str(l): 914 l = [ str(x) for x in l ] 915 l.sort() 916 return ', '.join(l) 917 918def names_ver_str(vername, names): 919 return list_str([ x + "@" + vername for x in names ]) 920 921def common_symbols(origlib, newlib): 922 result = [] 923 verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys()) 924 if Config.verbose >= 1: 925 print 'Original versions: ', list_str(verdiff.orig) 926 print 'New versions: ', list_str(verdiff.new) 927 for vername in verdiff.added: 928 print 'Added version: ', vername 929 print ' Added symbols: ', \ 930 names_ver_str(vername, newlib.versions[vername].names()) 931 for vername in verdiff.removed: 932 print 'Removed version: ', vername 933 print ' Removed symbols: ', \ 934 names_ver_str(vername, origlib.versions[vername].names()) 935 added = [] 936 removed = [] 937 for vername in verdiff.common: 938 origver = origlib.versions[vername] 939 newver = newlib.versions[vername] 940 namediff = ListDiff(origver.names(), newver.names()) 941 if namediff.added: 942 added.append(names_ver_str(vername, namediff.added)) 943 if namediff.removed: 944 removed.append(names_ver_str(vername, namediff.removed)) 945 commonver = VersionMap(vername) 946 result.append(commonver) 947 for n in namediff.common: 948 sym = CommonSymbol(origver.symbols[n], newver.symbols[n]) 949 commonver.append(sym) 950 if added: 951 print 'Added symbols:' 952 for i in added: 953 print ' ', i 954 if removed: 955 print 'Removed symbols:' 956 for i in removed: 957 print ' ', i 958 return result 959 960def cmp_symbols(commonver): 961 for ver in commonver: 962 names = ver.names(); 963 names.sort() 964 for symname in names: 965 sym = ver.symbols[symname] 966 match = sym.origsym.definition == sym.newsym.definition 967 if not match: 968 App.result_code = 1 969 if Config.verbose >= 1 or not match: 970 print '%s: definitions %smatch' % \ 971 (sym.origsym.name_ver, "" if match else "mis") 972 if Config.dump or (not match and not Config.no_dump): 973 for x in [(sym.origsym, Config.origfile), 974 (sym.newsym, Config.newfile)]: 975 xsym = x[0] 976 xout = x[1].out 977 if not xsym.definition: 978 print >> xout, '\n// Definition not found: %s %s' % \ 979 (xsym.name_ver, xsym.lib.libfile) 980 continue 981 print >> xout, '\n// Definitions mismatch: %s %s' % \ 982 (xsym.name_ver, xsym.lib.libfile) 983 pp = PrettyPrinter() 984 pp.run(xsym.definition) 985 for i in pp.nested(): 986 print >> xout, i 987 print >> xout, pp.result() 988 989def dump_symbols(commonver): 990 class SymbolDump(object): 991 def __init__(self, io_conf): 992 self.io_conf = io_conf 993 self.pp = PrettyPrinter() 994 self.res = [] 995 def run(self, sym): 996 r = self.pp.run(sym.definition) 997 self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r)) 998 def finish(self): 999 print >> self.io_conf.out, '\n// Symbol dump: version %s, library %s' % \ 1000 (ver.name, self.io_conf.filename) 1001 for i in self.pp.nested(): 1002 print >> self.io_conf.out, i 1003 print >> self.io_conf.out, '' 1004 for i in self.res: 1005 print >> self.io_conf.out, i 1006 for ver in commonver: 1007 names = sorted(ver.names()); 1008 d_orig = SymbolDump(Config.origfile) 1009 d_new = SymbolDump(Config.newfile) 1010 for symname in names: 1011 sym = ver.symbols[symname] 1012 if not sym.origsym.definition or not sym.newsym.definition: 1013 # XXX 1014 warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \ 1015 (symname, ver.name)) 1016 continue 1017 d_orig.run(sym.origsym) 1018 d_new.run(sym.newsym) 1019 d_orig.finish() 1020 d_new.finish() 1021 1022if __name__ == '__main__': 1023 Config.init() 1024 parser = optparse.OptionParser(usage="usage: %prog origlib newlib", 1025 version="%prog " + Config.version) 1026 parser.add_option('-v', '--verbose', action='count', 1027 help="verbose mode, may be specified several times") 1028 parser.add_option('--alias-prefix', action='append', 1029 help="name prefix to try for symbol alias lookup", metavar="STR") 1030 parser.add_option('--dump', action='store_true', 1031 help="dump symbol definitions") 1032 parser.add_option('--no-dump', action='store_true', 1033 help="disable dump for mismatched symbols") 1034 parser.add_option('--out-orig', action='store', 1035 help="result output file for original library", metavar="ORIGFILE") 1036 parser.add_option('--out-new', action='store', 1037 help="result output file for new library", metavar="NEWFILE") 1038 parser.add_option('--exclude-ver', action='append', metavar="RE") 1039 parser.add_option('--include-ver', action='append', metavar="RE") 1040 parser.add_option('--exclude-sym', action='append', metavar="RE") 1041 parser.add_option('--include-sym', action='append', metavar="RE") 1042 for opt in ['alias', 'cached', 'symbol']: 1043 parser.add_option("--w-" + opt, 1044 action="store_true", dest="w_" + opt) 1045 parser.add_option("--w-no-" + opt, 1046 action="store_false", dest="w_" + opt) 1047 (opts, args) = parser.parse_args() 1048 1049 if len(args) != 2: 1050 parser.print_help() 1051 sys.exit(-1) 1052 if opts.out_orig: 1053 Config.origfile.init(opts.out_orig) 1054 if opts.out_new: 1055 Config.newfile.init(opts.out_new) 1056 if opts.no_dump: 1057 Config.dump = False 1058 Config.no_dump = True 1059 if opts.dump: 1060 Config.dump = True 1061 Config.no_dump = False 1062 Config.verbose = 1 1063 if opts.verbose: 1064 Config.verbose = opts.verbose 1065 if opts.alias_prefix: 1066 Config.alias_prefixes = opts.alias_prefix 1067 Config.alias_prefixes.sort(key=lambda x: -len(x)) 1068 for (k, v) in ({ '_sym': Config.symbol_filter, 1069 '_ver': Config.version_filter }).items(): 1070 for a in [ 'exclude', 'include' ]: 1071 opt = getattr(opts, a + k) 1072 if opt: 1073 getattr(v, a).extend(opt) 1074 Config.version_filter.compile() 1075 Config.symbol_filter.compile() 1076 for w in ['w_alias', 'w_cached', 'w_symbol']: 1077 if hasattr(opts, w): 1078 v = getattr(opts, w) 1079 if v != None: 1080 setattr(Config, w, v) 1081 1082 (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1]) 1083 1084 origlib = Shlib(Config.origfile.filename) 1085 origlib.parse() 1086 newlib = Shlib(Config.newfile.filename) 1087 newlib.parse() 1088 1089 commonver = common_symbols(origlib, newlib) 1090 if Config.dump: 1091 dump_symbols(commonver) 1092 cmp_symbols(commonver) 1093 if Config.verbose >= 4: 1094 print Dwarf.cmpcache.stats.show('Cmp') 1095 print DwarfdumpParser.tagcache_stats.show('Dwarf tag') 1096 1097 sys.exit(App.result_code) 1098