1#!/usr/bin/env python
2
3"""
4This script parses each "meta" file and extracts the
5information needed to deduce build and src dependencies.
6
7It works much the same as the original shell script, but is
8*much* more efficient.
9
10The parsing work is handled by the class MetaFile.
11We only pay attention to a subset of the information in the
12"meta" files.  Specifically:
13
14'CWD'	to initialize our notion.
15
16'C'	to track chdir(2) on a per process basis
17
18'R'	files read are what we really care about.
19	directories read, provide a clue to resolving
20	subsequent relative paths.  That is if we cannot find
21	them relative to 'cwd', we check relative to the last
22	dir read.
23
24'W'	files opened for write or read-write,
25	for filemon V3 and earlier.
26
27'E'	files executed.
28
29'L'	files linked
30
31'V'	the filemon version, this record is used as a clue
32	that we have reached the interesting bit.
33
34"""
35
36"""
37RCSid:
38	$Id: meta2deps.py,v 1.15 2013/07/29 20:41:23 sjg Exp $
39
40	Copyright (c) 2011-2013, Juniper Networks, Inc.
41	All rights reserved.
42
43	Redistribution and use in source and binary forms, with or without
44	modification, are permitted provided that the following conditions
45	are met:
46	1. Redistributions of source code must retain the above copyright
47	   notice, this list of conditions and the following disclaimer.
48	2. Redistributions in binary form must reproduce the above copyright
49	   notice, this list of conditions and the following disclaimer in the
50	   documentation and/or other materials provided with the distribution.
51
52	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
53	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
54	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
55	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
56	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63
64"""
65
66import os, re, sys
67
68def getv(dict, key, d=None):
69    """Lookup key in dict and return value or the supplied default."""
70    if key in dict:
71        return dict[key]
72    return d
73
74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
75    """
76    Return an absolute path, resolving via cwd or last_dir if needed.
77    """
78    if path.endswith('/.'):
79        path = path[0:-2]
80    if len(path) > 0 and path[0] == '/':
81        return path
82    if path == '.':
83        return cwd
84    if path.startswith('./'):
85        return cwd + path[1:]
86    if last_dir == cwd:
87        last_dir = None
88    for d in [last_dir, cwd]:
89        if not d:
90            continue
91        p = '/'.join([d,path])
92        if debug > 2:
93            print >> debug_out, "looking for:", p,
94        if not os.path.exists(p):
95            if debug > 2:
96                print >> debug_out, "nope"
97            p = None
98            continue
99        if debug > 2:
100            print >> debug_out, "found:", p
101        return p
102    return None
103
104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
105    """
106    Return an absolute path, resolving via cwd or last_dir if needed.
107    this gets called a lot, so we try to avoid calling realpath
108    until we know we have something.
109    """
110    rpath = resolve(path, cwd, last_dir, debug, debug_out)
111    if rpath:
112        path = rpath
113    if (path.find('./') > 0 or
114        path.endswith('/..') or
115        os.path.islink(path)):
116        return os.path.realpath(path)
117    return path
118
119def sort_unique(list, cmp=None, key=None, reverse=False):
120    list.sort(cmp, key, reverse)
121    nl = []
122    le = None
123    for e in list:
124        if e == le:
125            continue
126        nl.append(e)
127    return nl
128
129def add_trims(x):
130    return ['/' + x + '/',
131            '/' + x,
132            x + '/',
133            x]
134
135class MetaFile:
136    """class to parse meta files generated by bmake."""
137
138    conf = None
139    dirdep_re = None
140    host_target = None
141    srctops = []
142    objroots = []
143
144    seen = {}
145    obj_deps = []
146    src_deps = []
147    file_deps = []
148
149    def __init__(self, name, conf={}):
150        """if name is set we will parse it now.
151        conf can have the follwing keys:
152
153        SRCTOPS	list of tops of the src tree(s).
154
155        CURDIR	the src directory 'bmake' was run from.
156
157        RELDIR	the relative path from SRCTOP to CURDIR
158
159        MACHINE	the machine we built for.
160        	set to 'none' if we are not cross-building.
161		More specifically if machine cannot be deduced from objdirs.
162
163        TARGET_SPEC
164        	Sometimes MACHINE isn't enough.
165
166        HOST_TARGET
167		when we build for the psuedo machine 'host'
168		the object tree uses HOST_TARGET rather than MACHINE.
169
170        OBJROOTS a list of the common prefix for all obj dirs it might
171		end in '/' or '-'.
172
173        DPDEPS	names an optional file to which per file dependencies
174		will be appended.
175		For example if 'some/path/foo.h' is read from SRCTOP
176		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
177		This can allow 'bmake' to learn all the dirs within
178 		the tree that depend on 'foo.h'
179
180        debug	desired debug level
181
182        debug_out open file to send debug output to (sys.stderr)
183
184        """
185
186        self.name = name
187        self.debug = getv(conf, 'debug', 0)
188        self.debug_out = getv(conf, 'debug_out', sys.stderr)
189
190        self.machine = getv(conf, 'MACHINE', '')
191        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
192        self.target_spec = getv(conf, 'TARGET_SPEC', '')
193        self.curdir = getv(conf, 'CURDIR')
194        self.reldir = getv(conf, 'RELDIR')
195        self.dpdeps = getv(conf, 'DPDEPS')
196        self.line = 0
197
198        if not self.conf:
199            # some of the steps below we want to do only once
200            self.conf = conf
201            self.host_target = getv(conf, 'HOST_TARGET')
202            for srctop in getv(conf, 'SRCTOPS', []):
203                if srctop[-1] != '/':
204                    srctop += '/'
205                if not srctop in self.srctops:
206                    self.srctops.append(srctop)
207                _srctop = os.path.realpath(srctop)
208                if _srctop[-1] != '/':
209                    _srctop += '/'
210                if not _srctop in self.srctops:
211                    self.srctops.append(_srctop)
212
213            trim_list = add_trims(self.machine)
214            if self.machine == 'host':
215                trim_list += add_trims(self.host_target)
216            if self.target_spec:
217                trim_list += add_trims(self.target_spec)
218
219            for objroot in getv(conf, 'OBJROOTS', []):
220                for e in trim_list:
221                    if objroot.endswith(e):
222                        # this is not what we want - fix it
223                        objroot = objroot[0:-len(e)]
224                        if e.endswith('/'):
225                            objroot += '/'
226                if not objroot in self.objroots:
227                    self.objroots.append(objroot)
228                    _objroot = os.path.realpath(objroot)
229                    if objroot[-1] == '/':
230                        _objroot += '/'
231                    if not _objroot in self.objroots:
232                        self.objroots.append(_objroot)
233
234            # we want the longest match
235            self.srctops.sort(reverse=True)
236            self.objroots.sort(reverse=True)
237
238            if self.debug:
239                print >> self.debug_out, "host_target=", self.host_target
240                print >> self.debug_out, "srctops=", self.srctops
241                print >> self.debug_out, "objroots=", self.objroots
242
243            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
244
245        if self.dpdeps and not self.reldir:
246            if self.debug:
247                print >> self.debug_out, "need reldir:",
248            if self.curdir:
249                srctop = self.find_top(self.curdir, self.srctops)
250                if srctop:
251                    self.reldir = self.curdir.replace(srctop,'')
252                    if self.debug:
253                        print >> self.debug_out, self.reldir
254            if not self.reldir:
255                self.dpdeps = None      # we cannot do it?
256
257        self.cwd = os.getcwd()          # make sure this is initialized
258
259        if name:
260            self.try_parse()
261
262    def reset(self):
263        """reset state if we are being passed meta files from multiple directories."""
264        self.seen = {}
265        self.obj_deps = []
266        self.src_deps = []
267        self.file_deps = []
268
269    def dirdeps(self, sep='\n'):
270        """return DIRDEPS"""
271        return sep.strip() + sep.join(self.obj_deps)
272
273    def src_dirdeps(self, sep='\n'):
274        """return SRC_DIRDEPS"""
275        return sep.strip() + sep.join(self.src_deps)
276
277    def file_depends(self, out=None):
278        """Append DPDEPS_${file} += ${RELDIR}
279        for each file we saw, to the output file."""
280        if not self.reldir:
281            return None
282        for f in sort_unique(self.file_deps):
283            print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
284
285    def seenit(self, dir):
286        """rememer that we have seen dir."""
287        self.seen[dir] = 1
288
289    def add(self, list, data, clue=''):
290        """add data to list if it isn't already there."""
291        if data not in list:
292            list.append(data)
293            if self.debug:
294                print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
295
296    def find_top(self, path, list):
297        """the logical tree may be split accross multiple trees"""
298        for top in list:
299            if path.startswith(top):
300                if self.debug > 2:
301                    print >> self.debug_out, "found in", top
302                return top
303        return None
304
305    def find_obj(self, objroot, dir, path, input):
306        """return path within objroot, taking care of .dirdep files"""
307        ddep = None
308        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
309            if not ddep and os.path.exists(ddepf):
310                ddep = open(ddepf, 'rb').readline().strip('# \n')
311                if self.debug > 1:
312                    print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
313                if ddep.endswith(self.machine):
314                    ddep = ddep[0:-(1+len(self.machine))]
315                elif self.target_spec and ddep.endswith(self.target_spec):
316                    ddep = ddep[0:-(1+len(self.target_spec))]
317
318        if not ddep:
319            # no .dirdeps, so remember that we've seen the raw input
320            self.seenit(input)
321            self.seenit(dir)
322            if self.machine == 'none':
323                if dir.startswith(objroot):
324                    return dir.replace(objroot,'')
325                return None
326            m = self.dirdep_re.match(dir.replace(objroot,''))
327            if m:
328                ddep = m.group(2)
329                dmachine = m.group(1)
330                if dmachine != self.machine:
331                    if not (self.machine == 'host' and
332                            dmachine == self.host_target):
333                        if self.debug > 2:
334                            print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
335                        ddep += '.' + dmachine
336
337        return ddep
338
339    def try_parse(self, name=None, file=None):
340        """give file and line number causing exception"""
341        try:
342            self.parse(name, file)
343        except:
344            # give a useful clue
345            print >> sys.stderr, '{}:{}: '.format(self.name, self.line),
346            raise
347
348    def parse(self, name=None, file=None):
349        """A meta file looks like:
350
351	# Meta data file "path"
352	CMD "command-line"
353	CWD "cwd"
354	TARGET "target"
355	-- command output --
356	-- filemon acquired metadata --
357	# buildmon version 3
358	V 3
359	C "pid" "cwd"
360	E "pid" "path"
361        F "pid" "child"
362	R "pid" "path"
363	W "pid" "path"
364	X "pid" "status"
365        D "pid" "path"
366        L "pid" "src" "target"
367        M "pid" "old" "new"
368        S "pid" "path"
369        # Bye bye
370
371        We go to some effort to avoid processing a dependency more than once.
372        Of the above record types only C,E,F,L,R,V and W are of interest.
373        """
374
375        version = 0                     # unknown
376        if name:
377            self.name = name;
378        if file:
379            f = file
380            cwd = last_dir = self.cwd
381        else:
382            f = open(self.name, 'rb')
383        skip = True
384        pid_cwd = {}
385        pid_last_dir = {}
386        last_pid = 0
387
388        self.line = 0
389        if self.curdir:
390            self.seenit(self.curdir)    # we ignore this
391
392        interesting = 'CEFLRV'
393        for line in f:
394            self.line += 1
395            # ignore anything we don't care about
396            if not line[0] in interesting:
397                continue
398            if self.debug > 2:
399                print >> self.debug_out, "input:", line,
400            w = line.split()
401
402            if skip:
403                if w[0] == 'V':
404                    skip = False
405                    version = int(w[1])
406                    """
407                    if version < 4:
408                        # we cannot ignore 'W' records
409                        # as they may be 'rw'
410                        interesting += 'W'
411                    """
412                elif w[0] == 'CWD':
413                    self.cwd = cwd = last_dir = w[1]
414                    self.seenit(cwd)    # ignore this
415                    if self.debug:
416                        print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
417                continue
418
419            pid = int(w[1])
420            if pid != last_pid:
421                if last_pid:
422                    pid_cwd[last_pid] = cwd
423                    pid_last_dir[last_pid] = last_dir
424                cwd = getv(pid_cwd, pid, self.cwd)
425                last_dir = getv(pid_last_dir, pid, self.cwd)
426                last_pid = pid
427
428            # process operations
429            if w[0] == 'F':
430                npid = int(w[2])
431                pid_cwd[npid] = cwd
432                pid_last_dir[npid] = cwd
433                last_pid = npid
434                continue
435            elif w[0] == 'C':
436                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
437                if cwd.endswith('/.'):
438                    cwd = cwd[0:-2]
439                last_dir = cwd
440                if self.debug > 1:
441                    print >> self.debug_out, "cwd=", cwd
442                continue
443
444            if w[2] in self.seen:
445                if self.debug > 2:
446                    print >> self.debug_out, "seen:", w[2]
447                continue
448            # file operations
449            if w[0] in 'ML':
450                path = w[2].strip("'")
451            else:
452                path = w[2]
453            # we are never interested in .dirdep files as dependencies
454            if path.endswith('.dirdep'):
455                continue
456            # we don't want to resolve the last component if it is
457            # a symlink
458            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
459            if not path:
460                continue
461            dir,base = os.path.split(path)
462            if dir in self.seen:
463                if self.debug > 2:
464                    print >> self.debug_out, "seen:", dir
465                continue
466            # we can have a path in an objdir which is a link
467            # to the src dir, we may need to add dependencies for each
468            rdir = dir
469            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
470            if rdir == dir or rdir.find('./') > 0:
471                rdir = None
472            # now put path back together
473            path = '/'.join([dir,base])
474            if self.debug > 1:
475                print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
476            if w[0] in 'SRWL':
477                if w[0] == 'W' and path.endswith('.dirdep'):
478                    continue
479                if path in [last_dir, cwd, self.cwd, self.curdir]:
480                    if self.debug > 1:
481                        print >> self.debug_out, "skipping:", path
482                    continue
483                if os.path.isdir(path):
484                    if w[0] in 'RW':
485                        last_dir = path;
486                    if self.debug > 1:
487                        print >> self.debug_out, "ldir=", last_dir
488                    continue
489
490            if w[0] in 'REWML':
491                # finally, we get down to it
492                if dir == self.cwd or dir == self.curdir:
493                    continue
494                srctop = self.find_top(path, self.srctops)
495                if srctop:
496                    if self.dpdeps:
497                        self.add(self.file_deps, path.replace(srctop,''), 'file')
498                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
499                    self.seenit(w[2])
500                    self.seenit(dir)
501                    if rdir and not rdir.startswith(srctop):
502                        dir = rdir      # for below
503                        rdir = None
504                    else:
505                        continue
506
507                objroot = None
508                for dir in [dir,rdir]:
509                    if not dir:
510                        continue
511                    objroot = self.find_top(dir, self.objroots)
512                    if objroot:
513                        break
514                if objroot:
515                    ddep = self.find_obj(objroot, dir, path, w[2])
516                    if ddep:
517                        self.add(self.obj_deps, ddep, 'obj')
518                else:
519                    # don't waste time looking again
520                    self.seenit(w[2])
521                    self.seenit(dir)
522        if not file:
523            f.close()
524
525
526def main(argv, klass=MetaFile, xopts='', xoptf=None):
527    """Simple driver for class MetaFile.
528
529    Usage:
530    	script [options] [key=value ...] "meta" ...
531
532    Options and key=value pairs contribute to the
533    dictionary passed to MetaFile.
534
535    -S "SRCTOP"
536		add "SRCTOP" to the "SRCTOPS" list.
537
538    -C "CURDIR"
539
540    -O "OBJROOT"
541    		add "OBJROOT" to the "OBJROOTS" list.
542
543    -m "MACHINE"
544
545    -a "MACHINE_ARCH"
546
547    -H "HOST_TARGET"
548
549    -D "DPDEPS"
550
551    -d	bumps debug level
552
553    """
554    import getopt
555
556    # import Psyco if we can
557    # it can speed things up quite a bit
558    have_psyco = 0
559    try:
560        import psyco
561        psyco.full()
562        have_psyco = 1
563    except:
564        pass
565
566    conf = {
567        'SRCTOPS': [],
568        'OBJROOTS': [],
569        }
570
571    try:
572        machine = os.environ['MACHINE']
573        if machine:
574            conf['MACHINE'] = machine
575        machine_arch = os.environ['MACHINE_ARCH']
576        if machine_arch:
577            conf['MACHINE_ARCH'] = machine_arch
578        srctop = os.environ['SB_SRC']
579        if srctop:
580            conf['SRCTOPS'].append(srctop)
581        objroot = os.environ['SB_OBJROOT']
582        if objroot:
583            conf['OBJROOTS'].append(objroot)
584    except:
585        pass
586
587    debug = 0
588    output = True
589
590    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
591    for o, a in opts:
592        if o == '-a':
593            conf['MACHINE_ARCH'] = a
594        elif o == '-d':
595            debug += 1
596        elif o == '-q':
597            output = False
598        elif o == '-H':
599            conf['HOST_TARGET'] = a
600        elif o == '-S':
601            if a not in conf['SRCTOPS']:
602                conf['SRCTOPS'].append(a)
603        elif o == '-C':
604            conf['CURDIR'] = a
605        elif o == '-O':
606            if a not in conf['OBJROOTS']:
607                conf['OBJROOTS'].append(a)
608        elif o == '-R':
609            conf['RELDIR'] = a
610        elif o == '-D':
611            conf['DPDEPS'] = a
612        elif o == '-m':
613            conf['MACHINE'] = a
614        elif o == '-T':
615            conf['TARGET_SPEC'] = a
616        elif xoptf:
617            xoptf(o, a, conf)
618
619    conf['debug'] = debug
620
621    # get any var=val assignments
622    eaten = []
623    for a in args:
624        if a.find('=') > 0:
625            k,v = a.split('=')
626            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
627                if k == 'SRCTOP':
628                    k = 'SRCTOPS'
629                elif k == 'OBJROOT':
630                    k = 'OBJROOTS'
631                if v not in conf[k]:
632                    conf[k].append(v)
633            else:
634                conf[k] = v
635            eaten.append(a)
636            continue
637        break
638
639    for a in eaten:
640        args.remove(a)
641
642    debug_out = getv(conf, 'debug_out', sys.stderr)
643
644    if debug:
645        print >> debug_out, "config:"
646        print >> debug_out, "psyco=", have_psyco
647        for k,v in conf.items():
648            print >> debug_out, "%s=%s" % (k,v)
649
650    for a in args:
651        if a.endswith('.meta'):
652            m = klass(a, conf)
653        elif a.startswith('@'):
654            # there can actually multiple files per line
655            for line in open(a[1:]):
656                for f in line.strip().split():
657                    m = klass(f, conf)
658
659    if output:
660        print m.dirdeps()
661
662        print m.src_dirdeps('\nsrc:')
663
664        dpdeps = getv(conf, 'DPDEPS')
665        if dpdeps:
666            m.file_depends(open(dpdeps, 'wb'))
667
668    return m
669
670if __name__ == '__main__':
671    try:
672        main(sys.argv)
673    except:
674        # yes, this goes to stdout
675        print "ERROR: ", sys.exc_info()[1]
676        raise
677
678