meta2deps.py revision 249033
1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.12 2013/03/31 22:31:59 sjg Exp $ 39 40 Copyright (c) 2011-2013, Juniper Networks, Inc. 41 All rights reserved. 42 43 Redistribution and use in source and binary forms, with or without 44 modification, are permitted provided that the following conditions 45 are met: 46 1. Redistributions of source code must retain the above copyright 47 notice, this list of conditions and the following disclaimer. 48 2. Redistributions in binary form must reproduce the above copyright 49 notice, this list of conditions and the following disclaimer in the 50 documentation and/or other materials provided with the distribution. 51 52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 63 64""" 65 66import os, re, sys 67 68def getv(dict, key, d=None): 69 """Lookup key in dict and return value or the supplied default.""" 70 if key in dict: 71 return dict[key] 72 return d 73 74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 75 """ 76 Return an absolute path, resolving via cwd or last_dir if needed. 77 """ 78 if path.endswith('/.'): 79 path = path[0:-2] 80 if path[0] == '/': 81 return path 82 if path == '.': 83 return cwd 84 if path.startswith('./'): 85 return cwd + path[1:] 86 if last_dir == cwd: 87 last_dir = None 88 for d in [last_dir, cwd]: 89 if not d: 90 continue 91 p = '/'.join([d,path]) 92 if debug > 2: 93 print >> debug_out, "looking for:", p, 94 if not os.path.exists(p): 95 if debug > 2: 96 print >> debug_out, "nope" 97 p = None 98 continue 99 if debug > 2: 100 print >> debug_out, "found:", p 101 return p 102 return None 103 104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 105 """ 106 Return an absolute path, resolving via cwd or last_dir if needed. 107 this gets called a lot, so we try to avoid calling realpath 108 until we know we have something. 109 """ 110 path = resolve(path, cwd, last_dir, debug, debug_out) 111 if path and (path.find('./') > 0 or 112 path.endswith('/..') or 113 os.path.islink(path)): 114 return os.path.realpath(path) 115 return path 116 117def sort_unique(list, cmp=None, key=None, reverse=False): 118 list.sort(cmp, key, reverse) 119 nl = [] 120 le = None 121 for e in list: 122 if e == le: 123 continue 124 nl.append(e) 125 return nl 126 127class MetaFile: 128 """class to parse meta files generated by bmake.""" 129 130 conf = None 131 dirdep_re = None 132 host_target = None 133 srctops = [] 134 objroots = [] 135 136 seen = {} 137 obj_deps = [] 138 src_deps = [] 139 file_deps = [] 140 141 def __init__(self, name, conf={}): 142 """if name is set we will parse it now. 143 conf can have the follwing keys: 144 145 SRCTOPS list of tops of the src tree(s). 146 147 CURDIR the src directory 'bmake' was run from. 148 149 RELDIR the relative path from SRCTOP to CURDIR 150 151 MACHINE the machine we built for. 152 set to 'none' if we are not cross-building. 153 More specifically if machine cannot be deduced from objdirs. 154 155 HOST_TARGET 156 when we build for the psuedo machine 'host' 157 the object tree uses HOST_TARGET rather than MACHINE. 158 159 OBJROOTS a list of the common prefix for all obj dirs it might 160 end in '/' or '-'. 161 162 DPDEPS names an optional file to which per file dependencies 163 will be appended. 164 For example if 'some/path/foo.h' is read from SRCTOP 165 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 166 This can allow 'bmake' to learn all the dirs within 167 the tree that depend on 'foo.h' 168 169 debug desired debug level 170 171 debug_out open file to send debug output to (sys.stderr) 172 173 """ 174 175 self.name = name 176 self.debug = getv(conf, 'debug', 0) 177 self.debug_out = getv(conf, 'debug_out', sys.stderr) 178 179 self.machine = getv(conf, 'MACHINE', '') 180 self.curdir = getv(conf, 'CURDIR') 181 self.reldir = getv(conf, 'RELDIR') 182 self.dpdeps = getv(conf, 'DPDEPS') 183 184 if not self.conf: 185 # some of the steps below we want to do only once 186 self.conf = conf 187 self.host_target = getv(conf, 'HOST_TARGET') 188 for srctop in getv(conf, 'SRCTOPS', []): 189 if srctop[-1] != '/': 190 srctop += '/' 191 if not srctop in self.srctops: 192 self.srctops.append(srctop) 193 _srctop = os.path.realpath(srctop) 194 if _srctop[-1] != '/': 195 _srctop += '/' 196 if not _srctop in self.srctops: 197 self.srctops.append(_srctop) 198 199 trim_list = ['/' + self.machine + '/', 200 '/' + self.machine, 201 self.machine + '/', 202 self.machine] 203 204 if self.machine == 'host': 205 trim_list += ['/' + self.host_target + '/', 206 '/' + self.host_target, 207 self.host_target + '/', 208 self.host_target] 209 210 for objroot in getv(conf, 'OBJROOTS', []): 211 for e in trim_list: 212 if objroot.endswith(e): 213 # this is not what we want - fix it 214 objroot = objroot[0:-len(e)] 215 if e.endswith('/'): 216 objroot += '/' 217 if not objroot in self.objroots: 218 self.objroots.append(objroot) 219 _objroot = os.path.realpath(objroot) 220 if objroot[-1] == '/': 221 _objroot += '/' 222 if not _objroot in self.objroots: 223 self.objroots.append(_objroot) 224 225 # we want the longest match 226 self.srctops.sort(reverse=True) 227 self.objroots.sort(reverse=True) 228 229 if self.debug: 230 print >> self.debug_out, "host_target=", self.host_target 231 print >> self.debug_out, "srctops=", self.srctops 232 print >> self.debug_out, "objroots=", self.objroots 233 234 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 235 236 if self.dpdeps and not self.reldir: 237 if self.debug: 238 print >> self.debug_out, "need reldir:", 239 if self.curdir: 240 srctop = self.find_top(self.curdir, self.srctops) 241 if srctop: 242 self.reldir = self.curdir.replace(srctop,'') 243 if self.debug: 244 print >> self.debug_out, self.reldir 245 if not self.reldir: 246 self.dpdeps = None # we cannot do it? 247 248 self.cwd = os.getcwd() # make sure this is initialized 249 250 if name: 251 self.parse() 252 253 def reset(self): 254 """reset state if we are being passed meta files from multiple directories.""" 255 self.seen = {} 256 self.obj_deps = [] 257 self.src_deps = [] 258 self.file_deps = [] 259 260 def dirdeps(self, sep='\n'): 261 """return DIRDEPS""" 262 return sep.strip() + sep.join(self.obj_deps) 263 264 def src_dirdeps(self, sep='\n'): 265 """return SRC_DIRDEPS""" 266 return sep.strip() + sep.join(self.src_deps) 267 268 def file_depends(self, out=None): 269 """Append DPDEPS_${file} += ${RELDIR} 270 for each file we saw, to the output file.""" 271 if not self.reldir: 272 return None 273 for f in sort_unique(self.file_deps): 274 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 275 276 def seenit(self, dir): 277 """rememer that we have seen dir.""" 278 self.seen[dir] = 1 279 280 def add(self, list, data, clue=''): 281 """add data to list if it isn't already there.""" 282 if data not in list: 283 list.append(data) 284 if self.debug: 285 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 286 287 def find_top(self, path, list): 288 """the logical tree may be split accross multiple trees""" 289 for top in list: 290 if path.startswith(top): 291 if self.debug > 2: 292 print >> self.debug_out, "found in", top 293 return top 294 return None 295 296 def find_obj(self, objroot, dir, path, input): 297 """return path within objroot, taking care of .dirdep files""" 298 ddep = None 299 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 300 if not ddep and os.path.exists(ddepf): 301 ddep = open(ddepf, 'rb').readline().strip('# \n') 302 if self.debug > 1: 303 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 304 if ddep.endswith(self.machine): 305 ddep = ddep[0:-(1+len(self.machine))] 306 307 if not ddep: 308 # no .dirdeps, so remember that we've seen the raw input 309 self.seenit(input) 310 self.seenit(dir) 311 if self.machine == 'none': 312 if dir.startswith(objroot): 313 return dir.replace(objroot,'') 314 return None 315 m = self.dirdep_re.match(dir.replace(objroot,'')) 316 if m: 317 ddep = m.group(2) 318 dmachine = m.group(1) 319 if dmachine != self.machine: 320 if not (self.machine == 'host' and 321 dmachine == self.host_target): 322 if self.debug > 2: 323 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 324 ddep += '.' + dmachine 325 326 return ddep 327 328 def parse(self, name=None, file=None): 329 """A meta file looks like: 330 331 # Meta data file "path" 332 CMD "command-line" 333 CWD "cwd" 334 TARGET "target" 335 -- command output -- 336 -- filemon acquired metadata -- 337 # buildmon version 3 338 V 3 339 C "pid" "cwd" 340 E "pid" "path" 341 F "pid" "child" 342 R "pid" "path" 343 W "pid" "path" 344 X "pid" "status" 345 D "pid" "path" 346 L "pid" "src" "target" 347 M "pid" "old" "new" 348 S "pid" "path" 349 # Bye bye 350 351 We go to some effort to avoid processing a dependency more than once. 352 Of the above record types only C,E,F,L,R,V and W are of interest. 353 """ 354 355 version = 0 # unknown 356 if name: 357 self.name = name; 358 if file: 359 f = file 360 cwd = last_dir = self.cwd 361 else: 362 f = open(self.name, 'rb') 363 skip = True 364 pid_cwd = {} 365 pid_last_dir = {} 366 last_pid = 0 367 368 if self.curdir: 369 self.seenit(self.curdir) # we ignore this 370 371 interesting = 'CEFLRV' 372 for line in f: 373 # ignore anything we don't care about 374 if not line[0] in interesting: 375 continue 376 if self.debug > 2: 377 print >> self.debug_out, "input:", line, 378 w = line.split() 379 380 if skip: 381 if w[0] == 'V': 382 skip = False 383 version = int(w[1]) 384 """ 385 if version < 4: 386 # we cannot ignore 'W' records 387 # as they may be 'rw' 388 interesting += 'W' 389 """ 390 elif w[0] == 'CWD': 391 self.cwd = cwd = last_dir = w[1] 392 self.seenit(cwd) # ignore this 393 if self.debug: 394 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 395 continue 396 397 pid = int(w[1]) 398 if pid != last_pid: 399 if last_pid: 400 pid_cwd[last_pid] = cwd 401 pid_last_dir[last_pid] = last_dir 402 cwd = getv(pid_cwd, pid, self.cwd) 403 last_dir = getv(pid_last_dir, pid, self.cwd) 404 last_pid = pid 405 406 # process operations 407 if w[0] == 'F': 408 npid = int(w[2]) 409 pid_cwd[npid] = cwd 410 pid_last_dir[npid] = cwd 411 last_pid = npid 412 continue 413 elif w[0] == 'C': 414 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 415 if cwd.endswith('/.'): 416 cwd = cwd[0:-2] 417 last_dir = cwd 418 if self.debug > 1: 419 print >> self.debug_out, "cwd=", cwd 420 continue 421 422 if w[2] in self.seen: 423 if self.debug > 2: 424 print >> self.debug_out, "seen:", w[2] 425 continue 426 # file operations 427 if w[0] in 'ML': 428 path = w[2].strip("'") 429 else: 430 path = w[2] 431 # we are never interested in .dirdep files as dependencies 432 if path.endswith('.dirdep'): 433 continue 434 # we don't want to resolve the last component if it is 435 # a symlink 436 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 437 if not path: 438 continue 439 dir,base = os.path.split(path) 440 if dir in self.seen: 441 if self.debug > 2: 442 print >> self.debug_out, "seen:", dir 443 continue 444 # we can have a path in an objdir which is a link 445 # to the src dir, we may need to add dependencies for each 446 rdir = dir 447 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 448 if rdir == dir or rdir.find('./') > 0: 449 rdir = None 450 # now put path back together 451 path = '/'.join([dir,base]) 452 if self.debug > 1: 453 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 454 if w[0] in 'SRWL': 455 if w[0] == 'W' and path.endswith('.dirdep'): 456 continue 457 if path in [last_dir, cwd, self.cwd, self.curdir]: 458 if self.debug > 1: 459 print >> self.debug_out, "skipping:", path 460 continue 461 if os.path.isdir(path): 462 if w[0] in 'RW': 463 last_dir = path; 464 if self.debug > 1: 465 print >> self.debug_out, "ldir=", last_dir 466 continue 467 468 if w[0] in 'REWML': 469 # finally, we get down to it 470 if dir == self.cwd or dir == self.curdir: 471 continue 472 srctop = self.find_top(path, self.srctops) 473 if srctop: 474 if self.dpdeps: 475 self.add(self.file_deps, path.replace(srctop,''), 'file') 476 self.add(self.src_deps, dir.replace(srctop,''), 'src') 477 self.seenit(w[2]) 478 self.seenit(dir) 479 if rdir and not rdir.startswith(srctop): 480 dir = rdir # for below 481 rdir = None 482 else: 483 continue 484 485 objroot = None 486 for dir in [dir,rdir]: 487 if not dir: 488 continue 489 objroot = self.find_top(dir, self.objroots) 490 if objroot: 491 break 492 if objroot: 493 ddep = self.find_obj(objroot, dir, path, w[2]) 494 if ddep: 495 self.add(self.obj_deps, ddep, 'obj') 496 else: 497 # don't waste time looking again 498 self.seenit(w[2]) 499 self.seenit(dir) 500 if not file: 501 f.close() 502 503 504def main(argv, klass=MetaFile, xopts='', xoptf=None): 505 """Simple driver for class MetaFile. 506 507 Usage: 508 script [options] [key=value ...] "meta" ... 509 510 Options and key=value pairs contribute to the 511 dictionary passed to MetaFile. 512 513 -S "SRCTOP" 514 add "SRCTOP" to the "SRCTOPS" list. 515 516 -C "CURDIR" 517 518 -O "OBJROOT" 519 add "OBJROOT" to the "OBJROOTS" list. 520 521 -m "MACHINE" 522 523 -H "HOST_TARGET" 524 525 -D "DPDEPS" 526 527 -d bumps debug level 528 529 """ 530 import getopt 531 532 # import Psyco if we can 533 # it can speed things up quite a bit 534 have_psyco = 0 535 try: 536 import psyco 537 psyco.full() 538 have_psyco = 1 539 except: 540 pass 541 542 conf = { 543 'SRCTOPS': [], 544 'OBJROOTS': [], 545 } 546 547 try: 548 machine = os.environ['MACHINE'] 549 if machine: 550 conf['MACHINE'] = machine 551 srctop = os.environ['SB_SRC'] 552 if srctop: 553 conf['SRCTOPS'].append(srctop) 554 objroot = os.environ['SB_OBJROOT'] 555 if objroot: 556 conf['OBJROOTS'].append(objroot) 557 except: 558 pass 559 560 debug = 0 561 output = True 562 563 opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts) 564 for o, a in opts: 565 if o == '-d': 566 debug += 1 567 elif o == '-q': 568 output = False 569 elif o == '-H': 570 conf['HOST_TARGET'] = a 571 elif o == '-S': 572 if a not in conf['SRCTOPS']: 573 conf['SRCTOPS'].append(a) 574 elif o == '-C': 575 conf['CURDIR'] = a 576 elif o == '-O': 577 if a not in conf['OBJROOTS']: 578 conf['OBJROOTS'].append(a) 579 elif o == '-R': 580 conf['RELDIR'] = a 581 elif o == '-D': 582 conf['DPDEPS'] = a 583 elif o == '-m': 584 conf['MACHINE'] = a 585 elif xoptf: 586 xoptf(o, a, conf) 587 588 conf['debug'] = debug 589 590 # get any var=val assignments 591 eaten = [] 592 for a in args: 593 if a.find('=') > 0: 594 k,v = a.split('=') 595 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 596 if k == 'SRCTOP': 597 k = 'SRCTOPS' 598 elif k == 'OBJROOT': 599 k = 'OBJROOTS' 600 if v not in conf[k]: 601 conf[k].append(v) 602 else: 603 conf[k] = v 604 eaten.append(a) 605 continue 606 break 607 608 for a in eaten: 609 args.remove(a) 610 611 debug_out = getv(conf, 'debug_out', sys.stderr) 612 613 if debug: 614 print >> debug_out, "config:" 615 print >> debug_out, "psyco=", have_psyco 616 for k,v in conf.items(): 617 print >> debug_out, "%s=%s" % (k,v) 618 619 for a in args: 620 m = klass(a, conf) 621 622 if output: 623 print m.dirdeps() 624 625 print m.src_dirdeps('\nsrc:') 626 627 dpdeps = getv(conf, 'DPDEPS') 628 if dpdeps: 629 m.file_depends(open(dpdeps, 'wb')) 630 631 return m 632 633if __name__ == '__main__': 634 try: 635 main(sys.argv) 636 except: 637 # yes, this goes to stdout 638 print "ERROR: ", sys.exc_info()[1] 639 raise 640 641