meta2deps.py revision 250837
167811Sobrien#!/usr/bin/env python
267811Sobrien
3188895Sru"""
4188895SruThis script parses each "meta" file and extracts the
5188895Sruinformation needed to deduce build and src dependencies.
696340Sobrien
7169718SkanIt works much the same as the original shell script, but is
896340Sobrien*much* more efficient.
996340Sobrien
1096340SobrienThe parsing work is handled by the class MetaFile.
1196796SpeterWe only pay attention to a subset of the information in the
1296796Speter"meta" files.  Specifically:
13169718Skan
14169718Skan'CWD'	to initialize our notion.
15235566Smarcel
16115663Sobrien'C'	to track chdir(2) on a per process basis
17169718Skan
18260096Sdim'R'	files read are what we really care about.
19259851Sdim	directories read, provide a clue to resolving
20259851Sdim	subsequent relative paths.  That is if we cannot find
21259851Sdim	them relative to 'cwd', we check relative to the last
22169718Skan	dir read.
2396340Sobrien
2496796Speter'W'	files opened for write or read-write,
25126688Sru	for filemon V3 and earlier.
2667811Sobrien
27245539Sandrew'E'	files executed.
28245539Sandrew
29245539Sandrew'L'	files linked
30245539Sandrew
31211725Simp'V'	the filemon version, this record is used as a clue
3296796Speter	that we have reached the interesting bit.
3396796Speter
3496796Speter"""
35183440Smarcel
3696796Speter"""
3796796SpeterRCSid:
38211725Simp	$Id: meta2deps.py,v 1.13 2013/05/11 05:16:26 sjg Exp $
3996778Sobrien
4096778Sobrien	Copyright (c) 2011-2013, Juniper Networks, Inc.
4196778Sobrien	All rights reserved.
42211725Simp
4396340Sobrien	Redistribution and use in source and binary forms, with or without
4496340Sobrien	modification, are permitted provided that the following conditions
4596340Sobrien	are met:
46172609Sru	1. Redistributions of source code must retain the above copyright
47172609Sru	   notice, this list of conditions and the following disclaimer.
4867811Sobrien	2. Redistributions in binary form must reproduce the above copyright
4996340Sobrien	   notice, this list of conditions and the following disclaimer in the
50135029Sru	   documentation and/or other materials provided with the distribution.
5196340Sobrien
5296530Sru	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
5396530Sru	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
5496796Speter	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
5567811Sobrien	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
5697314Sobrien	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5767811Sobrien	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58169718Skan	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59169718Skan	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60169718Skan	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61169718Skan	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62235566Smarcel	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6367811Sobrien
6497314Sobrien"""
6567811Sobrien
6696796Speterimport os, re, sys
6767811Sobrien
6897314Sobriendef getv(dict, key, d=None):
6967811Sobrien    """Lookup key in dict and return value or the supplied default."""
70235566Smarcel    if key in dict:
7167811Sobrien        return dict[key]
7297314Sobrien    return d
7367811Sobrien
74169718Skandef resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
75169718Skan    """
7696340Sobrien    Return an absolute path, resolving via cwd or last_dir if needed.
7767811Sobrien    """
7867811Sobrien    if path.endswith('/.'):
7996340Sobrien        path = path[0:-2]
80100872Sru    if path[0] == '/':
81235566Smarcel        return path
8267811Sobrien    if path == '.':
8367811Sobrien        return cwd
8467811Sobrien    if path.startswith('./'):
85        return cwd + path[1:]
86    if last_dir == cwd:
87        last_dir = None
88    for d in [last_dir, cwd]:
89        if not d:
90            continue
91        p = '/'.join([d,path])
92        if debug > 2:
93            print >> debug_out, "looking for:", p,
94        if not os.path.exists(p):
95            if debug > 2:
96                print >> debug_out, "nope"
97            p = None
98            continue
99        if debug > 2:
100            print >> debug_out, "found:", p
101        return p
102    return None
103
104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
105    """
106    Return an absolute path, resolving via cwd or last_dir if needed.
107    this gets called a lot, so we try to avoid calling realpath
108    until we know we have something.
109    """
110    path = resolve(path, cwd, last_dir, debug, debug_out)
111    if path and (path.find('./') > 0 or
112                 path.endswith('/..') or
113                 os.path.islink(path)):
114        return os.path.realpath(path)
115    return path
116
117def sort_unique(list, cmp=None, key=None, reverse=False):
118    list.sort(cmp, key, reverse)
119    nl = []
120    le = None
121    for e in list:
122        if e == le:
123            continue
124        nl.append(e)
125    return nl
126
127def add_trims(x):
128    return ['/' + x + '/',
129            '/' + x,
130            x + '/',
131            x]
132
133class MetaFile:
134    """class to parse meta files generated by bmake."""
135
136    conf = None
137    dirdep_re = None
138    host_target = None
139    srctops = []
140    objroots = []
141
142    seen = {}
143    obj_deps = []
144    src_deps = []
145    file_deps = []
146
147    def __init__(self, name, conf={}):
148        """if name is set we will parse it now.
149        conf can have the follwing keys:
150
151        SRCTOPS	list of tops of the src tree(s).
152
153        CURDIR	the src directory 'bmake' was run from.
154
155        RELDIR	the relative path from SRCTOP to CURDIR
156
157        MACHINE	the machine we built for.
158        	set to 'none' if we are not cross-building.
159		More specifically if machine cannot be deduced from objdirs.
160
161        TARGET_SPEC
162        	Sometimes MACHINE isn't enough.
163
164        HOST_TARGET
165		when we build for the psuedo machine 'host'
166		the object tree uses HOST_TARGET rather than MACHINE.
167
168        OBJROOTS a list of the common prefix for all obj dirs it might
169		end in '/' or '-'.
170
171        DPDEPS	names an optional file to which per file dependencies
172		will be appended.
173		For example if 'some/path/foo.h' is read from SRCTOP
174		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
175		This can allow 'bmake' to learn all the dirs within
176 		the tree that depend on 'foo.h'
177
178        debug	desired debug level
179
180        debug_out open file to send debug output to (sys.stderr)
181
182        """
183
184        self.name = name
185        self.debug = getv(conf, 'debug', 0)
186        self.debug_out = getv(conf, 'debug_out', sys.stderr)
187
188        self.machine = getv(conf, 'MACHINE', '')
189        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
190        self.target_spec = getv(conf, 'TARGET_SPEC', '')
191        self.curdir = getv(conf, 'CURDIR')
192        self.reldir = getv(conf, 'RELDIR')
193        self.dpdeps = getv(conf, 'DPDEPS')
194
195        if not self.conf:
196            # some of the steps below we want to do only once
197            self.conf = conf
198            self.host_target = getv(conf, 'HOST_TARGET')
199            for srctop in getv(conf, 'SRCTOPS', []):
200                if srctop[-1] != '/':
201                    srctop += '/'
202                if not srctop in self.srctops:
203                    self.srctops.append(srctop)
204                _srctop = os.path.realpath(srctop)
205                if _srctop[-1] != '/':
206                    _srctop += '/'
207                if not _srctop in self.srctops:
208                    self.srctops.append(_srctop)
209
210            trim_list = add_trims(self.machine)
211            if self.machine == 'host':
212                trim_list += add_trims(self.host_target)
213            if self.target_spec:
214                trim_list += add_trims(self.target_spec)
215
216            for objroot in getv(conf, 'OBJROOTS', []):
217                for e in trim_list:
218                    if objroot.endswith(e):
219                        # this is not what we want - fix it
220                        objroot = objroot[0:-len(e)]
221                        if e.endswith('/'):
222                            objroot += '/'
223                if not objroot in self.objroots:
224                    self.objroots.append(objroot)
225                    _objroot = os.path.realpath(objroot)
226                    if objroot[-1] == '/':
227                        _objroot += '/'
228                    if not _objroot in self.objroots:
229                        self.objroots.append(_objroot)
230
231            # we want the longest match
232            self.srctops.sort(reverse=True)
233            self.objroots.sort(reverse=True)
234
235            if self.debug:
236                print >> self.debug_out, "host_target=", self.host_target
237                print >> self.debug_out, "srctops=", self.srctops
238                print >> self.debug_out, "objroots=", self.objroots
239
240            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
241
242        if self.dpdeps and not self.reldir:
243            if self.debug:
244                print >> self.debug_out, "need reldir:",
245            if self.curdir:
246                srctop = self.find_top(self.curdir, self.srctops)
247                if srctop:
248                    self.reldir = self.curdir.replace(srctop,'')
249                    if self.debug:
250                        print >> self.debug_out, self.reldir
251            if not self.reldir:
252                self.dpdeps = None      # we cannot do it?
253
254        self.cwd = os.getcwd()          # make sure this is initialized
255
256        if name:
257            self.parse()
258
259    def reset(self):
260        """reset state if we are being passed meta files from multiple directories."""
261        self.seen = {}
262        self.obj_deps = []
263        self.src_deps = []
264        self.file_deps = []
265
266    def dirdeps(self, sep='\n'):
267        """return DIRDEPS"""
268        return sep.strip() + sep.join(self.obj_deps)
269
270    def src_dirdeps(self, sep='\n'):
271        """return SRC_DIRDEPS"""
272        return sep.strip() + sep.join(self.src_deps)
273
274    def file_depends(self, out=None):
275        """Append DPDEPS_${file} += ${RELDIR}
276        for each file we saw, to the output file."""
277        if not self.reldir:
278            return None
279        for f in sort_unique(self.file_deps):
280            print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
281
282    def seenit(self, dir):
283        """rememer that we have seen dir."""
284        self.seen[dir] = 1
285
286    def add(self, list, data, clue=''):
287        """add data to list if it isn't already there."""
288        if data not in list:
289            list.append(data)
290            if self.debug:
291                print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
292
293    def find_top(self, path, list):
294        """the logical tree may be split accross multiple trees"""
295        for top in list:
296            if path.startswith(top):
297                if self.debug > 2:
298                    print >> self.debug_out, "found in", top
299                return top
300        return None
301
302    def find_obj(self, objroot, dir, path, input):
303        """return path within objroot, taking care of .dirdep files"""
304        ddep = None
305        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
306            if not ddep and os.path.exists(ddepf):
307                ddep = open(ddepf, 'rb').readline().strip('# \n')
308                if self.debug > 1:
309                    print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
310                if ddep.endswith(self.machine):
311                    ddep = ddep[0:-(1+len(self.machine))]
312                elif self.target_spec and ddep.endswith(self.target_spec):
313                    ddep = ddep[0:-(1+len(self.target_spec))]
314
315        if not ddep:
316            # no .dirdeps, so remember that we've seen the raw input
317            self.seenit(input)
318            self.seenit(dir)
319            if self.machine == 'none':
320                if dir.startswith(objroot):
321                    return dir.replace(objroot,'')
322                return None
323            m = self.dirdep_re.match(dir.replace(objroot,''))
324            if m:
325                ddep = m.group(2)
326                dmachine = m.group(1)
327                if dmachine != self.machine:
328                    if not (self.machine == 'host' and
329                            dmachine == self.host_target):
330                        if self.debug > 2:
331                            print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
332                        ddep += '.' + dmachine
333
334        return ddep
335
336    def parse(self, name=None, file=None):
337        """A meta file looks like:
338
339	# Meta data file "path"
340	CMD "command-line"
341	CWD "cwd"
342	TARGET "target"
343	-- command output --
344	-- filemon acquired metadata --
345	# buildmon version 3
346	V 3
347	C "pid" "cwd"
348	E "pid" "path"
349        F "pid" "child"
350	R "pid" "path"
351	W "pid" "path"
352	X "pid" "status"
353        D "pid" "path"
354        L "pid" "src" "target"
355        M "pid" "old" "new"
356        S "pid" "path"
357        # Bye bye
358
359        We go to some effort to avoid processing a dependency more than once.
360        Of the above record types only C,E,F,L,R,V and W are of interest.
361        """
362
363        version = 0                     # unknown
364        if name:
365            self.name = name;
366        if file:
367            f = file
368            cwd = last_dir = self.cwd
369        else:
370            f = open(self.name, 'rb')
371        skip = True
372        pid_cwd = {}
373        pid_last_dir = {}
374        last_pid = 0
375
376        if self.curdir:
377            self.seenit(self.curdir)    # we ignore this
378
379        interesting = 'CEFLRV'
380        for line in f:
381            # ignore anything we don't care about
382            if not line[0] in interesting:
383                continue
384            if self.debug > 2:
385                print >> self.debug_out, "input:", line,
386            w = line.split()
387
388            if skip:
389                if w[0] == 'V':
390                    skip = False
391                    version = int(w[1])
392                    """
393                    if version < 4:
394                        # we cannot ignore 'W' records
395                        # as they may be 'rw'
396                        interesting += 'W'
397                    """
398                elif w[0] == 'CWD':
399                    self.cwd = cwd = last_dir = w[1]
400                    self.seenit(cwd)    # ignore this
401                    if self.debug:
402                        print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
403                continue
404
405            pid = int(w[1])
406            if pid != last_pid:
407                if last_pid:
408                    pid_cwd[last_pid] = cwd
409                    pid_last_dir[last_pid] = last_dir
410                cwd = getv(pid_cwd, pid, self.cwd)
411                last_dir = getv(pid_last_dir, pid, self.cwd)
412                last_pid = pid
413
414            # process operations
415            if w[0] == 'F':
416                npid = int(w[2])
417                pid_cwd[npid] = cwd
418                pid_last_dir[npid] = cwd
419                last_pid = npid
420                continue
421            elif w[0] == 'C':
422                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
423                if cwd.endswith('/.'):
424                    cwd = cwd[0:-2]
425                last_dir = cwd
426                if self.debug > 1:
427                    print >> self.debug_out, "cwd=", cwd
428                continue
429
430            if w[2] in self.seen:
431                if self.debug > 2:
432                    print >> self.debug_out, "seen:", w[2]
433                continue
434            # file operations
435            if w[0] in 'ML':
436                path = w[2].strip("'")
437            else:
438                path = w[2]
439            # we are never interested in .dirdep files as dependencies
440            if path.endswith('.dirdep'):
441                continue
442            # we don't want to resolve the last component if it is
443            # a symlink
444            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
445            if not path:
446                continue
447            dir,base = os.path.split(path)
448            if dir in self.seen:
449                if self.debug > 2:
450                    print >> self.debug_out, "seen:", dir
451                continue
452            # we can have a path in an objdir which is a link
453            # to the src dir, we may need to add dependencies for each
454            rdir = dir
455            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
456            if rdir == dir or rdir.find('./') > 0:
457                rdir = None
458            # now put path back together
459            path = '/'.join([dir,base])
460            if self.debug > 1:
461                print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
462            if w[0] in 'SRWL':
463                if w[0] == 'W' and path.endswith('.dirdep'):
464                    continue
465                if path in [last_dir, cwd, self.cwd, self.curdir]:
466                    if self.debug > 1:
467                        print >> self.debug_out, "skipping:", path
468                    continue
469                if os.path.isdir(path):
470                    if w[0] in 'RW':
471                        last_dir = path;
472                    if self.debug > 1:
473                        print >> self.debug_out, "ldir=", last_dir
474                    continue
475
476            if w[0] in 'REWML':
477                # finally, we get down to it
478                if dir == self.cwd or dir == self.curdir:
479                    continue
480                srctop = self.find_top(path, self.srctops)
481                if srctop:
482                    if self.dpdeps:
483                        self.add(self.file_deps, path.replace(srctop,''), 'file')
484                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
485                    self.seenit(w[2])
486                    self.seenit(dir)
487                    if rdir and not rdir.startswith(srctop):
488                        dir = rdir      # for below
489                        rdir = None
490                    else:
491                        continue
492
493                objroot = None
494                for dir in [dir,rdir]:
495                    if not dir:
496                        continue
497                    objroot = self.find_top(dir, self.objroots)
498                    if objroot:
499                        break
500                if objroot:
501                    ddep = self.find_obj(objroot, dir, path, w[2])
502                    if ddep:
503                        self.add(self.obj_deps, ddep, 'obj')
504                else:
505                    # don't waste time looking again
506                    self.seenit(w[2])
507                    self.seenit(dir)
508        if not file:
509            f.close()
510
511
512def main(argv, klass=MetaFile, xopts='', xoptf=None):
513    """Simple driver for class MetaFile.
514
515    Usage:
516    	script [options] [key=value ...] "meta" ...
517
518    Options and key=value pairs contribute to the
519    dictionary passed to MetaFile.
520
521    -S "SRCTOP"
522		add "SRCTOP" to the "SRCTOPS" list.
523
524    -C "CURDIR"
525
526    -O "OBJROOT"
527    		add "OBJROOT" to the "OBJROOTS" list.
528
529    -m "MACHINE"
530
531    -a "MACHINE_ARCH"
532
533    -H "HOST_TARGET"
534
535    -D "DPDEPS"
536
537    -d	bumps debug level
538
539    """
540    import getopt
541
542    # import Psyco if we can
543    # it can speed things up quite a bit
544    have_psyco = 0
545    try:
546        import psyco
547        psyco.full()
548        have_psyco = 1
549    except:
550        pass
551
552    conf = {
553        'SRCTOPS': [],
554        'OBJROOTS': [],
555        }
556
557    try:
558        machine = os.environ['MACHINE']
559        if machine:
560            conf['MACHINE'] = machine
561        machine_arch = os.environ['MACHINE_ARCH']
562        if machine_arch:
563            conf['MACHINE_ARCH'] = machine_arch
564        srctop = os.environ['SB_SRC']
565        if srctop:
566            conf['SRCTOPS'].append(srctop)
567        objroot = os.environ['SB_OBJROOT']
568        if objroot:
569            conf['OBJROOTS'].append(objroot)
570    except:
571        pass
572
573    debug = 0
574    output = True
575
576    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
577    for o, a in opts:
578        if o == '-a':
579            conf['MACHINE_ARCH'] = a
580        elif o == '-d':
581            debug += 1
582        elif o == '-q':
583            output = False
584        elif o == '-H':
585            conf['HOST_TARGET'] = a
586        elif o == '-S':
587            if a not in conf['SRCTOPS']:
588                conf['SRCTOPS'].append(a)
589        elif o == '-C':
590            conf['CURDIR'] = a
591        elif o == '-O':
592            if a not in conf['OBJROOTS']:
593                conf['OBJROOTS'].append(a)
594        elif o == '-R':
595            conf['RELDIR'] = a
596        elif o == '-D':
597            conf['DPDEPS'] = a
598        elif o == '-m':
599            conf['MACHINE'] = a
600        elif o == '-T':
601            conf['TARGET_SPEC'] = a
602        elif xoptf:
603            xoptf(o, a, conf)
604
605    conf['debug'] = debug
606
607    # get any var=val assignments
608    eaten = []
609    for a in args:
610        if a.find('=') > 0:
611            k,v = a.split('=')
612            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
613                if k == 'SRCTOP':
614                    k = 'SRCTOPS'
615                elif k == 'OBJROOT':
616                    k = 'OBJROOTS'
617                if v not in conf[k]:
618                    conf[k].append(v)
619            else:
620                conf[k] = v
621            eaten.append(a)
622            continue
623        break
624
625    for a in eaten:
626        args.remove(a)
627
628    debug_out = getv(conf, 'debug_out', sys.stderr)
629
630    if debug:
631        print >> debug_out, "config:"
632        print >> debug_out, "psyco=", have_psyco
633        for k,v in conf.items():
634            print >> debug_out, "%s=%s" % (k,v)
635
636    for a in args:
637        m = klass(a, conf)
638
639    if output:
640        print m.dirdeps()
641
642        print m.src_dirdeps('\nsrc:')
643
644        dpdeps = getv(conf, 'DPDEPS')
645        if dpdeps:
646            m.file_depends(open(dpdeps, 'wb'))
647
648    return m
649
650if __name__ == '__main__':
651    try:
652        main(sys.argv)
653    except:
654        # yes, this goes to stdout
655        print "ERROR: ", sys.exc_info()[1]
656        raise
657
658