meta2deps.py revision 321964
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $
41
42	Copyright (c) 2011-2013, Juniper Networks, Inc.
43	All rights reserved.
44
45	Redistribution and use in source and binary forms, with or without
46	modification, are permitted provided that the following conditions
47	are met:
48	1. Redistributions of source code must retain the above copyright
49	   notice, this list of conditions and the following disclaimer.
50	2. Redistributions in binary form must reproduce the above copyright
51	   notice, this list of conditions and the following disclaimer in the
52	   documentation and/or other materials provided with the distribution.
53
54	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71    """Lookup key in dict and return value or the supplied default."""
72    if key in dict:
73        return dict[key]
74    return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77    """
78    Return an absolute path, resolving via cwd or last_dir if needed.
79    """
80    if path.endswith('/.'):
81        path = path[0:-2]
82    if len(path) > 0 and path[0] == '/':
83        return path
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        if path == '..':
94            dw = d.split('/')
95            p = '/'.join(dw[:-1])
96            if not p:
97                p = '/'
98            return p
99        p = '/'.join([d,path])
100        if debug > 2:
101            print("looking for:", p, end=' ', file=debug_out)
102        if not os.path.exists(p):
103            if debug > 2:
104                print("nope", file=debug_out)
105            p = None
106            continue
107        if debug > 2:
108            print("found:", p, file=debug_out)
109        return p
110    return None
111
112def cleanpath(path):
113    """cleanup path without using realpath(3)"""
114    if path.startswith('/'):
115        r = '/'
116    else:
117        r = ''
118    p = []
119    w = path.split('/')
120    for d in w:
121        if not d or d == '.':
122            continue
123        if d == '..':
124            try:
125                p.pop()
126                continue
127            except:
128                break
129        p.append(d)
130
131    return r + '/'.join(p)
132
133def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
134    """
135    Return an absolute path, resolving via cwd or last_dir if needed.
136    this gets called a lot, so we try to avoid calling realpath.
137    """
138    rpath = resolve(path, cwd, last_dir, debug, debug_out)
139    if rpath:
140        path = rpath
141    if (path.find('/') < 0 or
142        path.find('./') > 0 or
143        path.endswith('/..')):
144        path = cleanpath(path)
145    return path
146
147def sort_unique(list, cmp=None, key=None, reverse=False):
148    list.sort(cmp, key, reverse)
149    nl = []
150    le = None
151    for e in list:
152        if e == le:
153            continue
154        le = e
155        nl.append(e)
156    return nl
157
158def add_trims(x):
159    return ['/' + x + '/',
160            '/' + x,
161            x + '/',
162            x]
163
164class MetaFile:
165    """class to parse meta files generated by bmake."""
166
167    conf = None
168    dirdep_re = None
169    host_target = None
170    srctops = []
171    objroots = []
172    excludes = []
173    seen = {}
174    obj_deps = []
175    src_deps = []
176    file_deps = []
177
178    def __init__(self, name, conf={}):
179        """if name is set we will parse it now.
180        conf can have the follwing keys:
181
182        SRCTOPS list of tops of the src tree(s).
183
184        CURDIR  the src directory 'bmake' was run from.
185
186        RELDIR  the relative path from SRCTOP to CURDIR
187
188        MACHINE the machine we built for.
189                set to 'none' if we are not cross-building.
190                More specifically if machine cannot be deduced from objdirs.
191
192        TARGET_SPEC
193                Sometimes MACHINE isn't enough.
194
195        HOST_TARGET
196                when we build for the pseudo machine 'host'
197                the object tree uses HOST_TARGET rather than MACHINE.
198
199        OBJROOTS a list of the common prefix for all obj dirs it might
200                end in '/' or '-'.
201
202        DPDEPS  names an optional file to which per file dependencies
203                will be appended.
204                For example if 'some/path/foo.h' is read from SRCTOP
205                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
206                This can allow 'bmake' to learn all the dirs within
207                the tree that depend on 'foo.h'
208
209        EXCLUDES
210                A list of paths to ignore.
211                ccache(1) can otherwise be trouble.
212
213        debug   desired debug level
214
215        debug_out open file to send debug output to (sys.stderr)
216
217        """
218
219        self.name = name
220        self.debug = getv(conf, 'debug', 0)
221        self.debug_out = getv(conf, 'debug_out', sys.stderr)
222
223        self.machine = getv(conf, 'MACHINE', '')
224        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
225        self.target_spec = getv(conf, 'TARGET_SPEC', '')
226        self.curdir = getv(conf, 'CURDIR')
227        self.reldir = getv(conf, 'RELDIR')
228        self.dpdeps = getv(conf, 'DPDEPS')
229        self.line = 0
230
231        if not self.conf:
232            # some of the steps below we want to do only once
233            self.conf = conf
234            self.host_target = getv(conf, 'HOST_TARGET')
235            for srctop in getv(conf, 'SRCTOPS', []):
236                if srctop[-1] != '/':
237                    srctop += '/'
238                if not srctop in self.srctops:
239                    self.srctops.append(srctop)
240                _srctop = os.path.realpath(srctop)
241                if _srctop[-1] != '/':
242                    _srctop += '/'
243                if not _srctop in self.srctops:
244                    self.srctops.append(_srctop)
245
246            trim_list = add_trims(self.machine)
247            if self.machine == 'host':
248                trim_list += add_trims(self.host_target)
249            if self.target_spec:
250                trim_list += add_trims(self.target_spec)
251
252            for objroot in getv(conf, 'OBJROOTS', []):
253                for e in trim_list:
254                    if objroot.endswith(e):
255                        # this is not what we want - fix it
256                        objroot = objroot[0:-len(e)]
257
258                if objroot[-1] != '/':
259                    objroot += '/'
260                if not objroot in self.objroots:
261                    self.objroots.append(objroot)
262                    _objroot = os.path.realpath(objroot)
263                    if objroot[-1] == '/':
264                        _objroot += '/'
265                    if not _objroot in self.objroots:
266                        self.objroots.append(_objroot)
267
268            # we want the longest match
269            self.srctops.sort(reverse=True)
270            self.objroots.sort(reverse=True)
271
272            self.excludes = getv(conf, 'EXCLUDES', [])
273
274            if self.debug:
275                print("host_target=", self.host_target, file=self.debug_out)
276                print("srctops=", self.srctops, file=self.debug_out)
277                print("objroots=", self.objroots, file=self.debug_out)
278                print("excludes=", self.excludes, file=self.debug_out)
279
280            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
281
282        if self.dpdeps and not self.reldir:
283            if self.debug:
284                print("need reldir:", end=' ', file=self.debug_out)
285            if self.curdir:
286                srctop = self.find_top(self.curdir, self.srctops)
287                if srctop:
288                    self.reldir = self.curdir.replace(srctop,'')
289                    if self.debug:
290                        print(self.reldir, file=self.debug_out)
291            if not self.reldir:
292                self.dpdeps = None      # we cannot do it?
293
294        self.cwd = os.getcwd()          # make sure this is initialized
295        self.last_dir = self.cwd
296
297        if name:
298            self.try_parse()
299
300    def reset(self):
301        """reset state if we are being passed meta files from multiple directories."""
302        self.seen = {}
303        self.obj_deps = []
304        self.src_deps = []
305        self.file_deps = []
306
307    def dirdeps(self, sep='\n'):
308        """return DIRDEPS"""
309        return sep.strip() + sep.join(self.obj_deps)
310
311    def src_dirdeps(self, sep='\n'):
312        """return SRC_DIRDEPS"""
313        return sep.strip() + sep.join(self.src_deps)
314
315    def file_depends(self, out=None):
316        """Append DPDEPS_${file} += ${RELDIR}
317        for each file we saw, to the output file."""
318        if not self.reldir:
319            return None
320        for f in sort_unique(self.file_deps):
321            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
322        # these entries provide for reverse DIRDEPS lookup
323        for f in self.obj_deps:
324            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
325
326    def seenit(self, dir):
327        """rememer that we have seen dir."""
328        self.seen[dir] = 1
329
330    def add(self, list, data, clue=''):
331        """add data to list if it isn't already there."""
332        if data not in list:
333            list.append(data)
334            if self.debug:
335                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
336
337    def find_top(self, path, list):
338        """the logical tree may be split across multiple trees"""
339        for top in list:
340            if path.startswith(top):
341                if self.debug > 2:
342                    print("found in", top, file=self.debug_out)
343                return top
344        return None
345
346    def find_obj(self, objroot, dir, path, input):
347        """return path within objroot, taking care of .dirdep files"""
348        ddep = None
349        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
350            if not ddep and os.path.exists(ddepf):
351                ddep = open(ddepf, 'r').readline().strip('# \n')
352                if self.debug > 1:
353                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
354                if ddep.endswith(self.machine):
355                    ddep = ddep[0:-(1+len(self.machine))]
356                elif self.target_spec and ddep.endswith(self.target_spec):
357                    ddep = ddep[0:-(1+len(self.target_spec))]
358
359        if not ddep:
360            # no .dirdeps, so remember that we've seen the raw input
361            self.seenit(input)
362            self.seenit(dir)
363            if self.machine == 'none':
364                if dir.startswith(objroot):
365                    return dir.replace(objroot,'')
366                return None
367            m = self.dirdep_re.match(dir.replace(objroot,''))
368            if m:
369                ddep = m.group(2)
370                dmachine = m.group(1)
371                if dmachine != self.machine:
372                    if not (self.machine == 'host' and
373                            dmachine == self.host_target):
374                        if self.debug > 2:
375                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
376                        ddep += '.' + dmachine
377
378        return ddep
379
380    def try_parse(self, name=None, file=None):
381        """give file and line number causing exception"""
382        try:
383            self.parse(name, file)
384        except:
385            # give a useful clue
386            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
387            raise
388
389    def parse(self, name=None, file=None):
390        """A meta file looks like:
391
392        # Meta data file "path"
393        CMD "command-line"
394        CWD "cwd"
395        TARGET "target"
396        -- command output --
397        -- filemon acquired metadata --
398        # buildmon version 3
399        V 3
400        C "pid" "cwd"
401        E "pid" "path"
402        F "pid" "child"
403        R "pid" "path"
404        W "pid" "path"
405        X "pid" "status"
406        D "pid" "path"
407        L "pid" "src" "target"
408        M "pid" "old" "new"
409        S "pid" "path"
410        # Bye bye
411
412        We go to some effort to avoid processing a dependency more than once.
413        Of the above record types only C,E,F,L,R,V and W are of interest.
414        """
415
416        version = 0                     # unknown
417        if name:
418            self.name = name;
419        if file:
420            f = file
421            cwd = self.last_dir = self.cwd
422        else:
423            f = open(self.name, 'r')
424        skip = True
425        pid_cwd = {}
426        pid_last_dir = {}
427        last_pid = 0
428
429        self.line = 0
430        if self.curdir:
431            self.seenit(self.curdir)    # we ignore this
432
433        interesting = 'CEFLRV'
434        for line in f:
435            self.line += 1
436            # ignore anything we don't care about
437            if not line[0] in interesting:
438                continue
439            if self.debug > 2:
440                print("input:", line, end=' ', file=self.debug_out)
441            w = line.split()
442
443            if skip:
444                if w[0] == 'V':
445                    skip = False
446                    version = int(w[1])
447                    """
448                    if version < 4:
449                        # we cannot ignore 'W' records
450                        # as they may be 'rw'
451                        interesting += 'W'
452                    """
453                elif w[0] == 'CWD':
454                    self.cwd = cwd = self.last_dir = w[1]
455                    self.seenit(cwd)    # ignore this
456                    if self.debug:
457                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
458                continue
459
460            pid = int(w[1])
461            if pid != last_pid:
462                if last_pid:
463                    pid_last_dir[last_pid] = self.last_dir
464                cwd = getv(pid_cwd, pid, self.cwd)
465                self.last_dir = getv(pid_last_dir, pid, self.cwd)
466                last_pid = pid
467
468            # process operations
469            if w[0] == 'F':
470                npid = int(w[2])
471                pid_cwd[npid] = cwd
472                pid_last_dir[npid] = cwd
473                last_pid = npid
474                continue
475            elif w[0] == 'C':
476                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
477                if cwd.endswith('/.'):
478                    cwd = cwd[0:-2]
479                self.last_dir = pid_last_dir[pid] = cwd
480                pid_cwd[pid] = cwd
481                if self.debug > 1:
482                    print("cwd=", cwd, file=self.debug_out)
483                continue
484
485            if w[2] in self.seen:
486                if self.debug > 2:
487                    print("seen:", w[2], file=self.debug_out)
488                continue
489            # file operations
490            if w[0] in 'ML':
491                # these are special, tread src as read and
492                # target as write
493                self.parse_path(w[1].strip("'"), cwd, 'R', w)
494                self.parse_path(w[2].strip("'"), cwd, 'W', w)
495                continue
496            elif w[0] in 'ERWS':
497                path = w[2]
498                self.parse_path(path, cwd, w[0], w)
499
500        if not file:
501            f.close()
502
503    def is_src(self, base, dir, rdir):
504        """is base in srctop"""
505        for dir in [dir,rdir]:
506            if not dir:
507                continue
508            path = '/'.join([dir,base])
509            srctop = self.find_top(path, self.srctops)
510            if srctop:
511                if self.dpdeps:
512                    self.add(self.file_deps, path.replace(srctop,''), 'file')
513                self.add(self.src_deps, dir.replace(srctop,''), 'src')
514                self.seenit(dir)
515                return True
516        return False
517
518    def parse_path(self, path, cwd, op=None, w=[]):
519        """look at a path for the op specified"""
520
521        if not op:
522            op = w[0]
523
524        # we are never interested in .dirdep files as dependencies
525        if path.endswith('.dirdep'):
526            return
527        for p in self.excludes:
528            if p and path.startswith(p):
529                if self.debug > 2:
530                    print("exclude:", p, path, file=self.debug_out)
531                return
532        # we don't want to resolve the last component if it is
533        # a symlink
534        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
535        if not path:
536            return
537        dir,base = os.path.split(path)
538        if dir in self.seen:
539            if self.debug > 2:
540                print("seen:", dir, file=self.debug_out)
541            return
542        # we can have a path in an objdir which is a link
543        # to the src dir, we may need to add dependencies for each
544        rdir = dir
545        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
546        rdir = os.path.realpath(dir)
547        if rdir == dir:
548            rdir = None
549        # now put path back together
550        path = '/'.join([dir,base])
551        if self.debug > 1:
552            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
553        if op in 'RWS':
554            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
555                if self.debug > 1:
556                    print("skipping:", path, file=self.debug_out)
557                return
558            if os.path.isdir(path):
559                if op in 'RW':
560                    self.last_dir = path;
561                if self.debug > 1:
562                    print("ldir=", self.last_dir, file=self.debug_out)
563                return
564
565        if op in 'ERW':
566            # finally, we get down to it
567            if dir == self.cwd or dir == self.curdir:
568                return
569            if self.is_src(base, dir, rdir):
570                self.seenit(w[2])
571                if not rdir:
572                    return
573
574            objroot = None
575            for dir in [dir,rdir]:
576                if not dir:
577                    continue
578                objroot = self.find_top(dir, self.objroots)
579                if objroot:
580                    break
581            if objroot:
582                ddep = self.find_obj(objroot, dir, path, w[2])
583                if ddep:
584                    self.add(self.obj_deps, ddep, 'obj')
585                    if self.dpdeps and objroot.endswith('/stage/'):
586                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
587                        self.add(self.file_deps, sp, 'file')
588            else:
589                # don't waste time looking again
590                self.seenit(w[2])
591                self.seenit(dir)
592
593
594def main(argv, klass=MetaFile, xopts='', xoptf=None):
595    """Simple driver for class MetaFile.
596
597    Usage:
598        script [options] [key=value ...] "meta" ...
599
600    Options and key=value pairs contribute to the
601    dictionary passed to MetaFile.
602
603    -S "SRCTOP"
604                add "SRCTOP" to the "SRCTOPS" list.
605
606    -C "CURDIR"
607
608    -O "OBJROOT"
609                add "OBJROOT" to the "OBJROOTS" list.
610
611    -m "MACHINE"
612
613    -a "MACHINE_ARCH"
614
615    -H "HOST_TARGET"
616
617    -D "DPDEPS"
618
619    -d  bumps debug level
620
621    """
622    import getopt
623
624    # import Psyco if we can
625    # it can speed things up quite a bit
626    have_psyco = 0
627    try:
628        import psyco
629        psyco.full()
630        have_psyco = 1
631    except:
632        pass
633
634    conf = {
635        'SRCTOPS': [],
636        'OBJROOTS': [],
637        'EXCLUDES': [],
638        }
639
640    try:
641        machine = os.environ['MACHINE']
642        if machine:
643            conf['MACHINE'] = machine
644        machine_arch = os.environ['MACHINE_ARCH']
645        if machine_arch:
646            conf['MACHINE_ARCH'] = machine_arch
647        srctop = os.environ['SB_SRC']
648        if srctop:
649            conf['SRCTOPS'].append(srctop)
650        objroot = os.environ['SB_OBJROOT']
651        if objroot:
652            conf['OBJROOTS'].append(objroot)
653    except:
654        pass
655
656    debug = 0
657    output = True
658
659    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
660    for o, a in opts:
661        if o == '-a':
662            conf['MACHINE_ARCH'] = a
663        elif o == '-d':
664            debug += 1
665        elif o == '-q':
666            output = False
667        elif o == '-H':
668            conf['HOST_TARGET'] = a
669        elif o == '-S':
670            if a not in conf['SRCTOPS']:
671                conf['SRCTOPS'].append(a)
672        elif o == '-C':
673            conf['CURDIR'] = a
674        elif o == '-O':
675            if a not in conf['OBJROOTS']:
676                conf['OBJROOTS'].append(a)
677        elif o == '-R':
678            conf['RELDIR'] = a
679        elif o == '-D':
680            conf['DPDEPS'] = a
681        elif o == '-m':
682            conf['MACHINE'] = a
683        elif o == '-T':
684            conf['TARGET_SPEC'] = a
685        elif o == '-X':
686            if a not in conf['EXCLUDES']:
687                conf['EXCLUDES'].append(a)
688        elif xoptf:
689            xoptf(o, a, conf)
690
691    conf['debug'] = debug
692
693    # get any var=val assignments
694    eaten = []
695    for a in args:
696        if a.find('=') > 0:
697            k,v = a.split('=')
698            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
699                if k == 'SRCTOP':
700                    k = 'SRCTOPS'
701                elif k == 'OBJROOT':
702                    k = 'OBJROOTS'
703                if v not in conf[k]:
704                    conf[k].append(v)
705            else:
706                conf[k] = v
707            eaten.append(a)
708            continue
709        break
710
711    for a in eaten:
712        args.remove(a)
713
714    debug_out = getv(conf, 'debug_out', sys.stderr)
715
716    if debug:
717        print("config:", file=debug_out)
718        print("psyco=", have_psyco, file=debug_out)
719        for k,v in list(conf.items()):
720            print("%s=%s" % (k,v), file=debug_out)
721
722    m = None
723    for a in args:
724        if a.endswith('.meta'):
725            if not os.path.exists(a):
726                continue
727            m = klass(a, conf)
728        elif a.startswith('@'):
729            # there can actually multiple files per line
730            for line in open(a[1:]):
731                for f in line.strip().split():
732                    if not os.path.exists(f):
733                        continue
734                    m = klass(f, conf)
735
736    if output and m:
737        print(m.dirdeps())
738
739        print(m.src_dirdeps('\nsrc:'))
740
741        dpdeps = getv(conf, 'DPDEPS')
742        if dpdeps:
743            m.file_depends(open(dpdeps, 'wb'))
744
745    return m
746
747if __name__ == '__main__':
748    try:
749        main(sys.argv)
750    except:
751        # yes, this goes to stdout
752        print("ERROR: ", sys.exc_info()[1])
753        raise
754
755