meta2deps.py revision 246223
1#!/usr/bin/env python
2
3"""
4This script parses each "meta" file and extracts the
5information needed to deduce build and src dependencies.
6
7It works much the same as the original shell script, but is
8*much* more efficient.
9
10The parsing work is handled by the class MetaFile.
11We only pay attention to a subset of the information in the
12"meta" files.  Specifically:
13
14'CWD'	to initialize our notion.
15
16'C'	to track chdir(2) on a per process basis
17
18'R'	files read are what we really care about.
19	directories read, provide a clue to resolving
20	subsequent relative paths.  That is if we cannot find
21	them relative to 'cwd', we check relative to the last
22	dir read.
23
24'W'	files opened for write or read-write,
25	for filemon V3 and earlier.
26
27'E'	files executed.
28
29'L'	files linked
30
31'V'	the filemon version, this record is used as a clue
32	that we have reached the interesting bit.
33
34"""
35
36"""
37RCSid:
38	$Id: meta2deps.py,v 1.7 2012/11/06 05:44:03 sjg Exp $
39
40	Copyright (c) 2011, Juniper Networks, Inc.
41
42	Redistribution and use in source and binary forms, with or without
43	modification, are permitted provided that the following conditions
44	are met:
45	1. Redistributions of source code must retain the above copyright
46	   notice, this list of conditions and the following disclaimer.
47	2. Redistributions in binary form must reproduce the above copyright
48	   notice, this list of conditions and the following disclaimer in the
49	   documentation and/or other materials provided with the distribution.
50
51	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62
63"""
64
65import os, re, sys
66
67def getv(dict, key, d=None):
68    """Lookup key in dict and return value or the supplied default."""
69    if key in dict:
70        return dict[key]
71    return d
72
73def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
74    """
75    Return an absolute path, resolving via cwd or last_dir if needed.
76    """
77    if path.endswith('/.'):
78        path = path[0:-2]
79    if path[0] == '/':
80        return path
81    if path == '.':
82        return cwd
83    if path.startswith('./'):
84        return cwd + path[1:]
85    if last_dir == cwd:
86        last_dir = None
87    for d in [last_dir, cwd]:
88        if not d:
89            continue
90        p = '/'.join([d,path])
91        if debug > 2:
92            print >> debug_out, "looking for:", p,
93        if not os.path.exists(p):
94            if debug > 2:
95                print >> debug_out, "nope"
96            p = None
97            continue
98        if debug > 2:
99            print >> debug_out, "found:", p
100        return p
101    return None
102
103def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
104    """
105    Return an absolute path, resolving via cwd or last_dir if needed.
106    this gets called a lot, so we try to avoid calling realpath
107    until we know we have something.
108    """
109    path = resolve(path, cwd, last_dir, debug, debug_out)
110    if path and (path.find('./') > 0 or
111                 path.endswith('/..') or
112                 os.path.islink(path)):
113        return os.path.realpath(path)
114    return path
115
116def sort_unique(list, cmp=None, key=None, reverse=False):
117    list.sort(cmp, key, reverse)
118    nl = []
119    le = None
120    for e in list:
121        if e == le:
122            continue
123        nl.append(e)
124    return nl
125
126class MetaFile:
127    """class to parse meta files generated by bmake."""
128
129    conf = None
130    dirdep_re = None
131    host_target = None
132    srctops = []
133    objroots = []
134
135    seen = {}
136    obj_deps = []
137    src_deps = []
138    file_deps = []
139
140    def __init__(self, name, conf={}):
141        """if name is set we will parse it now.
142        conf can have the follwing keys:
143
144        SRCTOPS	list of tops of the src tree(s).
145
146        CURDIR	the src directory 'bmake' was run from.
147
148        RELDIR	the relative path from SRCTOP to CURDIR
149
150        MACHINE	the machine we built for.
151        	set to 'none' if we are not cross-building.
152
153        HOST_TARGET
154		when we build for the psuedo machine 'host'
155		the object tree uses HOST_TARGET rather than MACHINE.
156
157        OBJROOTS a list of the common prefix for all obj dirs it might
158		end in '/' or '-'.
159
160        DPDEPS	names an optional file to which per file dependencies
161		will be appended.
162		For example if 'some/path/foo.h' is read from SRCTOP
163		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
164		This can allow 'bmake' to learn all the dirs within
165 		the tree that depend on 'foo.h'
166
167        debug	desired debug level
168
169        debug_out open file to send debug output to (sys.stderr)
170
171        """
172
173        self.name = name
174        self.debug = getv(conf, 'debug', 0)
175        self.debug_out = getv(conf, 'debug_out', sys.stderr)
176
177        if not self.conf:
178            # some of the steps below we want to do only once
179            self.conf = conf
180            self.host_target = getv(conf, 'HOST_TARGET')
181            for srctop in getv(conf, 'SRCTOPS', []):
182                if srctop[-1] != '/':
183                    srctop += '/'
184                if not srctop in self.srctops:
185                    self.srctops.append(srctop)
186                _srctop = os.path.realpath(srctop)
187                if _srctop[-1] != '/':
188                    _srctop += '/'
189                if not _srctop in self.srctops:
190                    self.srctops.append(_srctop)
191
192            for objroot in getv(conf, 'OBJROOTS', []):
193                if not objroot in self.objroots:
194                    self.objroots.append(objroot)
195                    _objroot = os.path.realpath(objroot)
196                    if objroot[-1] == '/':
197                        _objroot += '/'
198                    if not _objroot in self.objroots:
199                        self.objroots.append(_objroot)
200
201            if self.debug:
202                print >> self.debug_out, "host_target=", self.host_target
203                print >> self.debug_out, "srctops=", self.srctops
204                print >> self.debug_out, "objroots=", self.objroots
205
206            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
207
208        self.curdir = getv(conf, 'CURDIR')
209        self.machine = getv(conf, 'MACHINE', '')
210        self.reldir = getv(conf, 'RELDIR')
211        self.dpdeps = getv(conf, 'DPDEPS')
212        if self.dpdeps and not self.reldir:
213            if self.debug:
214                print >> self.debug_out, "need reldir:",
215            if self.curdir:
216                srctop = self.find_top(self.curdir, self.srctops)
217                if srctop:
218                    self.reldir = self.curdir.replace(srctop,'')
219                    if self.debug:
220                        print >> self.debug_out, self.reldir
221            if not self.reldir:
222                self.dpdeps = None      # we cannot do it?
223
224        if name:
225            self.parse()
226
227    def reset(self):
228        """reset state if we are being passed meta files from multiple directories."""
229        self.seen = {}
230        self.obj_deps = []
231        self.src_deps = []
232        self.file_deps = []
233
234    def dirdeps(self, sep='\n'):
235        """return DIRDEPS"""
236        return sep.strip() + sep.join(self.obj_deps)
237
238    def src_dirdeps(self, sep='\n'):
239        """return SRC_DIRDEPS"""
240        return sep.strip() + sep.join(self.src_deps)
241
242    def file_depends(self, out=None):
243        """Append DPDEPS_${file} += ${RELDIR}
244        for each file we saw, to the output file."""
245        if not self.reldir:
246            return None
247        for f in sort_unique(self.file_deps):
248            print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
249
250    def seenit(self, dir):
251        """rememer that we have seen dir."""
252        self.seen[dir] = 1
253
254    def add(self, list, data, clue=''):
255        """add data to list if it isn't already there."""
256        if data not in list:
257            list.append(data)
258            if self.debug:
259                print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
260
261    def find_top(self, path, list):
262        """the logical tree may be split accross multiple trees"""
263        for top in list:
264            if path.startswith(top):
265                if self.debug > 2:
266                    print >> self.debug_out, "found in", top
267                return top
268        return None
269
270    def find_obj(self, objroot, dir, path, input):
271        """return path within objroot, taking care of .dirdep files"""
272        ddep = None
273        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
274            if not ddep and os.path.exists(ddepf):
275                ddep = open(ddepf, 'rb').readline().strip('# \n')
276                if self.debug > 1:
277                    print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
278                if ddep.endswith(self.machine):
279                    ddep = ddep[0:-(1+len(self.machine))]
280
281        if not ddep:
282            # no .dirdeps, so remember that we've seen the raw input
283            self.seenit(input)
284            self.seenit(dir)
285            if self.machine == 'none':
286                if dir.startswith(objroot):
287                    return dir.replace(objroot,'')
288                return None
289            m = self.dirdep_re.match(dir.replace(objroot,''))
290            if m:
291                ddep = m.group(2)
292                dmachine = m.group(1)
293                if dmachine != self.machine:
294                    if not (self.machine == 'host' and
295                            dmachine == self.host_target):
296                        if self.debug > 2:
297                            print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
298                        ddep += '.' + dmachine
299
300        return ddep
301
302    def parse(self, name=None, file=None):
303        """A meta file looks like:
304
305	# Meta data file "path"
306	CMD "command-line"
307	CWD "cwd"
308	TARGET "target"
309	-- command output --
310	-- filemon acquired metadata --
311	# buildmon version 3
312	V 3
313	C "pid" "cwd"
314	E "pid" "path"
315        F "pid" "child"
316	R "pid" "path"
317	W "pid" "path"
318	X "pid" "status"
319        D "pid" "path"
320        L "pid" "src" "target"
321        M "pid" "old" "new"
322        S "pid" "path"
323        # Bye bye
324
325        We go to some effort to avoid processing a dependency more than once.
326        Of the above record types only C,E,F,L,R,V and W are of interest.
327        """
328
329        version = 0                     # unknown
330        if name:
331            self.name = name;
332        if file:
333            f = file
334            cwd = last_dir = self.cwd
335        else:
336            f = open(self.name, 'rb')
337        skip = True
338        pid_cwd = {}
339        pid_last_dir = {}
340        last_pid = 0
341
342        if self.curdir:
343            self.seenit(self.curdir)    # we ignore this
344
345        interesting = 'CEFLRV'
346        for line in f:
347            # ignore anything we don't care about
348            if not line[0] in interesting:
349                continue
350            if self.debug > 2:
351                print >> self.debug_out, "input:", line,
352            w = line.split()
353
354            if skip:
355                if w[0] == 'V':
356                    skip = False
357                    version = int(w[1])
358                    """
359                    if version < 4:
360                        # we cannot ignore 'W' records
361                        # as they may be 'rw'
362                        interesting += 'W'
363                    """
364                elif w[0] == 'CWD':
365                    self.cwd = cwd = last_dir = w[1]
366                    self.seenit(cwd)    # ignore this
367                    if self.debug:
368                        print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
369                continue
370
371            pid = int(w[1])
372            if pid != last_pid:
373                if last_pid:
374                    pid_cwd[last_pid] = cwd
375                    pid_last_dir[last_pid] = last_dir
376                cwd = getv(pid_cwd, pid, self.cwd)
377                last_dir = getv(pid_last_dir, pid, self.cwd)
378                last_pid = pid
379
380            # process operations
381            if w[0] == 'F':
382                npid = int(w[2])
383                pid_cwd[npid] = cwd
384                pid_last_dir[npid] = cwd
385                last_pid = npid
386                continue
387            elif w[0] == 'C':
388                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
389                if cwd.endswith('/.'):
390                    cwd = cwd[0:-2]
391                last_dir = cwd
392                if self.debug > 1:
393                    print >> self.debug_out, "cwd=", cwd
394                continue
395
396            if w[2] in self.seen:
397                if self.debug > 2:
398                    print >> self.debug_out, "seen:", w[2]
399                continue
400            # file operations
401            if w[0] in 'ML':
402                path = w[2].strip("'")
403            else:
404                path = w[2]
405            # we are never interested in .dirdep files as dependencies
406            if path.endswith('.dirdep'):
407                continue
408            # we don't want to resolve the last component if it is
409            # a symlink
410            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
411            if not path:
412                continue
413            dir,base = os.path.split(path)
414            if dir in self.seen:
415                if self.debug > 2:
416                    print >> self.debug_out, "seen:", dir
417                continue
418            # we can have a path in an objdir which is a link
419            # to the src dir, we may need to add dependencies for each
420            rdir = dir
421            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
422            if rdir == dir or rdir.find('./') > 0:
423                rdir = None
424            # now put path back together
425            path = '/'.join([dir,base])
426            if self.debug > 1:
427                print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
428            if w[0] in 'SRWL':
429                if w[0] == 'W' and path.endswith('.dirdep'):
430                    continue
431                if path in [last_dir, cwd, self.cwd, self.curdir]:
432                    if self.debug > 1:
433                        print >> self.debug_out, "skipping:", path
434                    continue
435                if os.path.isdir(path):
436                    if w[0] in 'RW':
437                        last_dir = path;
438                    if self.debug > 1:
439                        print >> self.debug_out, "ldir=", last_dir
440                    continue
441
442            if w[0] in 'REWML':
443                # finally, we get down to it
444                if dir == self.cwd or dir == self.curdir:
445                    continue
446                srctop = self.find_top(path, self.srctops)
447                if srctop:
448                    if self.dpdeps:
449                        self.add(self.file_deps, path.replace(srctop,''), 'file')
450                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
451                    self.seenit(w[2])
452                    self.seenit(dir)
453                    if rdir and not rdir.startswith(srctop):
454                        dir = rdir      # for below
455                        rdir = None
456                    else:
457                        continue
458
459                objroot = None
460                for dir in [dir,rdir]:
461                    if not dir:
462                        continue
463                    objroot = self.find_top(dir, self.objroots)
464                    if objroot:
465                        break
466                if objroot:
467                    ddep = self.find_obj(objroot, dir, path, w[2])
468                    if ddep:
469                        self.add(self.obj_deps, ddep, 'obj')
470                else:
471                    # don't waste time looking again
472                    self.seenit(w[2])
473                    self.seenit(dir)
474        if not file:
475            f.close()
476
477
478def main(argv, klass=MetaFile, xopts='', xoptf=None):
479    """Simple driver for class MetaFile.
480
481    Usage:
482    	script [options] [key=value ...] "meta" ...
483
484    Options and key=value pairs contribute to the
485    dictionary passed to MetaFile.
486
487    -S "SRCTOP"
488		add "SRCTOP" to the "SRCTOPS" list.
489
490    -C "CURDIR"
491
492    -O "OBJROOT"
493    		add "OBJROOT" to the "OBJROOTS" list.
494
495    -m "MACHINE"
496
497    -H "HOST_TARGET"
498
499    -D "DPDEPS"
500
501    -d	bumps debug level
502
503    """
504    import getopt
505
506    # import Psyco if we can
507    # it can speed things up quite a bit
508    have_psyco = 0
509    try:
510        import psyco
511        psyco.full()
512        have_psyco = 1
513    except:
514        pass
515
516    conf = {
517        'SRCTOPS': [],
518        'OBJROOTS': [],
519        }
520
521    try:
522        machine = os.environ['MACHINE']
523        if machine:
524            conf['MACHINE'] = machine
525        srctop = os.environ['SB_SRC']
526        if srctop:
527            conf['SRCTOPS'].append(srctop)
528        objroot = os.environ['SB_OBJROOT']
529        if objroot:
530            conf['OBJROOTS'].append(objroot)
531    except:
532        pass
533
534    debug = 0
535    output = True
536
537    opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts)
538    for o, a in opts:
539        if o == '-d':
540            debug += 1
541        elif o == '-q':
542            output = False
543        elif o == '-H':
544            conf['HOST_TARGET'] = a
545        elif o == '-S':
546            if a not in conf['SRCTOPS']:
547                conf['SRCTOPS'].append(a)
548        elif o == '-C':
549            conf['CURDIR'] = a
550        elif o == '-O':
551            if a not in conf['OBJROOTS']:
552                conf['OBJROOTS'].append(a)
553        elif o == '-R':
554            conf['RELDIR'] = a
555        elif o == '-D':
556            conf['DPDEPS'] = a
557        elif o == '-m':
558            conf['MACHINE'] = a
559        elif xoptf:
560            xoptf(o, a, conf)
561
562    conf['debug'] = debug
563
564    # get any var=val assignments
565    eaten = []
566    for a in args:
567        if a.find('=') > 0:
568            k,v = a.split('=')
569            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
570                if k == 'SRCTOP':
571                    k = 'SRCTOPS'
572                elif k == 'OBJROOT':
573                    k = 'OBJROOTS'
574                if v not in conf[k]:
575                    conf[k].append(v)
576            else:
577                conf[k] = v
578            eaten.append(a)
579            continue
580        break
581
582    for a in eaten:
583        args.remove(a)
584
585    debug_out = getv(conf, 'debug_out', sys.stderr)
586
587    if debug:
588        print >> debug_out, "config:"
589        print >> debug_out, "psyco=", have_psyco
590        for k,v in conf.items():
591            print >> debug_out, "%s=%s" % (k,v)
592
593    for a in args:
594        m = klass(a, conf)
595
596    if output:
597        print m.dirdeps()
598
599        print m.src_dirdeps('\nsrc:')
600
601        dpdeps = getv(conf, 'DPDEPS')
602        if dpdeps:
603            m.file_depends(open(dpdeps, 'wb'))
604
605    return m
606
607if __name__ == '__main__':
608    try:
609        main(sys.argv)
610    except:
611        # yes, this goes to stdout
612        print "ERROR: ", sys.exc_info()[1]
613        raise
614
615