summaryrefslogtreecommitdiff
path: root/mk/meta2deps.py
diff options
context:
space:
mode:
authorSimon J. Gerraty <sjg@FreeBSD.org>2013-01-31 16:44:23 +0000
committerSimon J. Gerraty <sjg@FreeBSD.org>2013-01-31 16:44:23 +0000
commit70bd6b310d1f226fcbc7a14b3550c55d9223648d (patch)
treecd8c62d639d54c7d3d9c6803debc5613a5ad27bc /mk/meta2deps.py
parent7ab046e1f1b382bc125699b0c0d589273adcd420 (diff)
downloadsrc-test2-70bd6b310d1f226fcbc7a14b3550c55d9223648d.tar.gz
src-test2-70bd6b310d1f226fcbc7a14b3550c55d9223648d.zip
Notes
Diffstat (limited to 'mk/meta2deps.py')
-rwxr-xr-xmk/meta2deps.py614
1 files changed, 614 insertions, 0 deletions
diff --git a/mk/meta2deps.py b/mk/meta2deps.py
new file mode 100755
index 000000000000..cb6d3213b221
--- /dev/null
+++ b/mk/meta2deps.py
@@ -0,0 +1,614 @@
+#!/usr/bin/env python
+
+"""
+This script parses each "meta" file and extracts the
+information needed to deduce build and src dependencies.
+
+It works much the same as the original shell script, but is
+*much* more efficient.
+
+The parsing work is handled by the class MetaFile.
+We only pay attention to a subset of the information in the
+"meta" files. Specifically:
+
+'CWD' to initialize our notion.
+
+'C' to track chdir(2) on a per process basis
+
+'R' files read are what we really care about.
+ directories read, provide a clue to resolving
+ subsequent relative paths. That is if we cannot find
+ them relative to 'cwd', we check relative to the last
+ dir read.
+
+'W' files opened for write or read-write,
+ for filemon V3 and earlier.
+
+'E' files executed.
+
+'L' files linked
+
+'V' the filemon version, this record is used as a clue
+ that we have reached the interesting bit.
+
+"""
+
+"""
+RCSid:
+ $Id: meta2deps.py,v 1.7 2012/11/06 05:44:03 sjg Exp $
+
+ Copyright (c) 2011, Juniper Networks, Inc.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+import os, re, sys
+
+def getv(dict, key, d=None):
+ """Lookup key in dict and return value or the supplied default."""
+ if key in dict:
+ return dict[key]
+ return d
+
+def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
+ """
+ Return an absolute path, resolving via cwd or last_dir if needed.
+ """
+ if path.endswith('/.'):
+ path = path[0:-2]
+ if path[0] == '/':
+ return path
+ if path == '.':
+ return cwd
+ if path.startswith('./'):
+ return cwd + path[1:]
+ if last_dir == cwd:
+ last_dir = None
+ for d in [last_dir, cwd]:
+ if not d:
+ continue
+ p = '/'.join([d,path])
+ if debug > 2:
+ print >> debug_out, "looking for:", p,
+ if not os.path.exists(p):
+ if debug > 2:
+ print >> debug_out, "nope"
+ p = None
+ continue
+ if debug > 2:
+ print >> debug_out, "found:", p
+ return p
+ return None
+
+def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
+ """
+ Return an absolute path, resolving via cwd or last_dir if needed.
+ this gets called a lot, so we try to avoid calling realpath
+ until we know we have something.
+ """
+ path = resolve(path, cwd, last_dir, debug, debug_out)
+ if path and (path.find('./') > 0 or
+ path.endswith('/..') or
+ os.path.islink(path)):
+ return os.path.realpath(path)
+ return path
+
+def sort_unique(list, cmp=None, key=None, reverse=False):
+ list.sort(cmp, key, reverse)
+ nl = []
+ le = None
+ for e in list:
+ if e == le:
+ continue
+ nl.append(e)
+ return nl
+
+class MetaFile:
+ """class to parse meta files generated by bmake."""
+
+ conf = None
+ dirdep_re = None
+ host_target = None
+ srctops = []
+ objroots = []
+
+ seen = {}
+ obj_deps = []
+ src_deps = []
+ file_deps = []
+
+ def __init__(self, name, conf={}):
+ """if name is set we will parse it now.
+ conf can have the follwing keys:
+
+ SRCTOPS list of tops of the src tree(s).
+
+ CURDIR the src directory 'bmake' was run from.
+
+ RELDIR the relative path from SRCTOP to CURDIR
+
+ MACHINE the machine we built for.
+ set to 'none' if we are not cross-building.
+
+ HOST_TARGET
+ when we build for the psuedo machine 'host'
+ the object tree uses HOST_TARGET rather than MACHINE.
+
+ OBJROOTS a list of the common prefix for all obj dirs it might
+ end in '/' or '-'.
+
+ DPDEPS names an optional file to which per file dependencies
+ will be appended.
+ For example if 'some/path/foo.h' is read from SRCTOP
+ then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
+ This can allow 'bmake' to learn all the dirs within
+ the tree that depend on 'foo.h'
+
+ debug desired debug level
+
+ debug_out open file to send debug output to (sys.stderr)
+
+ """
+
+ self.name = name
+ self.debug = getv(conf, 'debug', 0)
+ self.debug_out = getv(conf, 'debug_out', sys.stderr)
+
+ if not self.conf:
+ # some of the steps below we want to do only once
+ self.conf = conf
+ self.host_target = getv(conf, 'HOST_TARGET')
+ for srctop in getv(conf, 'SRCTOPS', []):
+ if srctop[-1] != '/':
+ srctop += '/'
+ if not srctop in self.srctops:
+ self.srctops.append(srctop)
+ _srctop = os.path.realpath(srctop)
+ if _srctop[-1] != '/':
+ _srctop += '/'
+ if not _srctop in self.srctops:
+ self.srctops.append(_srctop)
+
+ for objroot in getv(conf, 'OBJROOTS', []):
+ if not objroot in self.objroots:
+ self.objroots.append(objroot)
+ _objroot = os.path.realpath(objroot)
+ if objroot[-1] == '/':
+ _objroot += '/'
+ if not _objroot in self.objroots:
+ self.objroots.append(_objroot)
+
+ if self.debug:
+ print >> self.debug_out, "host_target=", self.host_target
+ print >> self.debug_out, "srctops=", self.srctops
+ print >> self.debug_out, "objroots=", self.objroots
+
+ self.dirdep_re = re.compile(r'([^/]+)/(.+)')
+
+ self.curdir = getv(conf, 'CURDIR')
+ self.machine = getv(conf, 'MACHINE', '')
+ self.reldir = getv(conf, 'RELDIR')
+ self.dpdeps = getv(conf, 'DPDEPS')
+ if self.dpdeps and not self.reldir:
+ if self.debug:
+ print >> self.debug_out, "need reldir:",
+ if self.curdir:
+ srctop = self.find_top(self.curdir, self.srctops)
+ if srctop:
+ self.reldir = self.curdir.replace(srctop,'')
+ if self.debug:
+ print >> self.debug_out, self.reldir
+ if not self.reldir:
+ self.dpdeps = None # we cannot do it?
+
+ if name:
+ self.parse()
+
+ def reset(self):
+ """reset state if we are being passed meta files from multiple directories."""
+ self.seen = {}
+ self.obj_deps = []
+ self.src_deps = []
+ self.file_deps = []
+
+ def dirdeps(self, sep='\n'):
+ """return DIRDEPS"""
+ return sep.strip() + sep.join(self.obj_deps)
+
+ def src_dirdeps(self, sep='\n'):
+ """return SRC_DIRDEPS"""
+ return sep.strip() + sep.join(self.src_deps)
+
+ def file_depends(self, out=None):
+ """Append DPDEPS_${file} += ${RELDIR}
+ for each file we saw, to the output file."""
+ if not self.reldir:
+ return None
+ for f in sort_unique(self.file_deps):
+ print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
+
+ def seenit(self, dir):
+ """rememer that we have seen dir."""
+ self.seen[dir] = 1
+
+ def add(self, list, data, clue=''):
+ """add data to list if it isn't already there."""
+ if data not in list:
+ list.append(data)
+ if self.debug:
+ print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
+
+ def find_top(self, path, list):
+ """the logical tree may be split accross multiple trees"""
+ for top in list:
+ if path.startswith(top):
+ if self.debug > 2:
+ print >> self.debug_out, "found in", top
+ return top
+ return None
+
+ def find_obj(self, objroot, dir, path, input):
+ """return path within objroot, taking care of .dirdep files"""
+ ddep = None
+ for ddepf in [path + '.dirdep', dir + '/.dirdep']:
+ if not ddep and os.path.exists(ddepf):
+ ddep = open(ddepf, 'rb').readline().strip('# \n')
+ if self.debug > 1:
+ print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
+ if ddep.endswith(self.machine):
+ ddep = ddep[0:-(1+len(self.machine))]
+
+ if not ddep:
+ # no .dirdeps, so remember that we've seen the raw input
+ self.seenit(input)
+ self.seenit(dir)
+ if self.machine == 'none':
+ if dir.startswith(objroot):
+ return dir.replace(objroot,'')
+ return None
+ m = self.dirdep_re.match(dir.replace(objroot,''))
+ if m:
+ ddep = m.group(2)
+ dmachine = m.group(1)
+ if dmachine != self.machine:
+ if not (self.machine == 'host' and
+ dmachine == self.host_target):
+ if self.debug > 2:
+ print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
+ ddep += '.' + dmachine
+
+ return ddep
+
+ def parse(self, name=None, file=None):
+ """A meta file looks like:
+
+ # Meta data file "path"
+ CMD "command-line"
+ CWD "cwd"
+ TARGET "target"
+ -- command output --
+ -- filemon acquired metadata --
+ # buildmon version 3
+ V 3
+ C "pid" "cwd"
+ E "pid" "path"
+ F "pid" "child"
+ R "pid" "path"
+ W "pid" "path"
+ X "pid" "status"
+ D "pid" "path"
+ L "pid" "src" "target"
+ M "pid" "old" "new"
+ S "pid" "path"
+ # Bye bye
+
+ We go to some effort to avoid processing a dependency more than once.
+ Of the above record types only C,E,F,L,R,V and W are of interest.
+ """
+
+ version = 0 # unknown
+ if name:
+ self.name = name;
+ if file:
+ f = file
+ cwd = last_dir = self.cwd
+ else:
+ f = open(self.name, 'rb')
+ skip = True
+ pid_cwd = {}
+ pid_last_dir = {}
+ last_pid = 0
+
+ if self.curdir:
+ self.seenit(self.curdir) # we ignore this
+
+ interesting = 'CEFLRV'
+ for line in f:
+ # ignore anything we don't care about
+ if not line[0] in interesting:
+ continue
+ if self.debug > 2:
+ print >> self.debug_out, "input:", line,
+ w = line.split()
+
+ if skip:
+ if w[0] == 'V':
+ skip = False
+ version = int(w[1])
+ """
+ if version < 4:
+ # we cannot ignore 'W' records
+ # as they may be 'rw'
+ interesting += 'W'
+ """
+ elif w[0] == 'CWD':
+ self.cwd = cwd = last_dir = w[1]
+ self.seenit(cwd) # ignore this
+ if self.debug:
+ print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
+ continue
+
+ pid = int(w[1])
+ if pid != last_pid:
+ if last_pid:
+ pid_cwd[last_pid] = cwd
+ pid_last_dir[last_pid] = last_dir
+ cwd = getv(pid_cwd, pid, self.cwd)
+ last_dir = getv(pid_last_dir, pid, self.cwd)
+ last_pid = pid
+
+ # process operations
+ if w[0] == 'F':
+ npid = int(w[2])
+ pid_cwd[npid] = cwd
+ pid_last_dir[npid] = cwd
+ last_pid = npid
+ continue
+ elif w[0] == 'C':
+ cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
+ if cwd.endswith('/.'):
+ cwd = cwd[0:-2]
+ last_dir = cwd
+ if self.debug > 1:
+ print >> self.debug_out, "cwd=", cwd
+ continue
+
+ if w[2] in self.seen:
+ if self.debug > 2:
+ print >> self.debug_out, "seen:", w[2]
+ continue
+ # file operations
+ if w[0] in 'ML':
+ path = w[2].strip("'")
+ else:
+ path = w[2]
+ # we are never interested in .dirdep files as dependencies
+ if path.endswith('.dirdep'):
+ continue
+ # we don't want to resolve the last component if it is
+ # a symlink
+ path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
+ if not path:
+ continue
+ dir,base = os.path.split(path)
+ if dir in self.seen:
+ if self.debug > 2:
+ print >> self.debug_out, "seen:", dir
+ continue
+ # we can have a path in an objdir which is a link
+ # to the src dir, we may need to add dependencies for each
+ rdir = dir
+ dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
+ if rdir == dir or rdir.find('./') > 0:
+ rdir = None
+ # now put path back together
+ path = '/'.join([dir,base])
+ if self.debug > 1:
+ print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
+ if w[0] in 'SRWL':
+ if w[0] == 'W' and path.endswith('.dirdep'):
+ continue
+ if path in [last_dir, cwd, self.cwd, self.curdir]:
+ if self.debug > 1:
+ print >> self.debug_out, "skipping:", path
+ continue
+ if os.path.isdir(path):
+ if w[0] in 'RW':
+ last_dir = path;
+ if self.debug > 1:
+ print >> self.debug_out, "ldir=", last_dir
+ continue
+
+ if w[0] in 'REWML':
+ # finally, we get down to it
+ if dir == self.cwd or dir == self.curdir:
+ continue
+ srctop = self.find_top(path, self.srctops)
+ if srctop:
+ if self.dpdeps:
+ self.add(self.file_deps, path.replace(srctop,''), 'file')
+ self.add(self.src_deps, dir.replace(srctop,''), 'src')
+ self.seenit(w[2])
+ self.seenit(dir)
+ if rdir and not rdir.startswith(srctop):
+ dir = rdir # for below
+ rdir = None
+ else:
+ continue
+
+ objroot = None
+ for dir in [dir,rdir]:
+ if not dir:
+ continue
+ objroot = self.find_top(dir, self.objroots)
+ if objroot:
+ break
+ if objroot:
+ ddep = self.find_obj(objroot, dir, path, w[2])
+ if ddep:
+ self.add(self.obj_deps, ddep, 'obj')
+ else:
+ # don't waste time looking again
+ self.seenit(w[2])
+ self.seenit(dir)
+ if not file:
+ f.close()
+
+
+def main(argv, klass=MetaFile, xopts='', xoptf=None):
+ """Simple driver for class MetaFile.
+
+ Usage:
+ script [options] [key=value ...] "meta" ...
+
+ Options and key=value pairs contribute to the
+ dictionary passed to MetaFile.
+
+ -S "SRCTOP"
+ add "SRCTOP" to the "SRCTOPS" list.
+
+ -C "CURDIR"
+
+ -O "OBJROOT"
+ add "OBJROOT" to the "OBJROOTS" list.
+
+ -m "MACHINE"
+
+ -H "HOST_TARGET"
+
+ -D "DPDEPS"
+
+ -d bumps debug level
+
+ """
+ import getopt
+
+ # import Psyco if we can
+ # it can speed things up quite a bit
+ have_psyco = 0
+ try:
+ import psyco
+ psyco.full()
+ have_psyco = 1
+ except:
+ pass
+
+ conf = {
+ 'SRCTOPS': [],
+ 'OBJROOTS': [],
+ }
+
+ try:
+ machine = os.environ['MACHINE']
+ if machine:
+ conf['MACHINE'] = machine
+ srctop = os.environ['SB_SRC']
+ if srctop:
+ conf['SRCTOPS'].append(srctop)
+ objroot = os.environ['SB_OBJROOT']
+ if objroot:
+ conf['OBJROOTS'].append(objroot)
+ except:
+ pass
+
+ debug = 0
+ output = True
+
+ opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts)
+ for o, a in opts:
+ if o == '-d':
+ debug += 1
+ elif o == '-q':
+ output = False
+ elif o == '-H':
+ conf['HOST_TARGET'] = a
+ elif o == '-S':
+ if a not in conf['SRCTOPS']:
+ conf['SRCTOPS'].append(a)
+ elif o == '-C':
+ conf['CURDIR'] = a
+ elif o == '-O':
+ if a not in conf['OBJROOTS']:
+ conf['OBJROOTS'].append(a)
+ elif o == '-R':
+ conf['RELDIR'] = a
+ elif o == '-D':
+ conf['DPDEPS'] = a
+ elif o == '-m':
+ conf['MACHINE'] = a
+ elif xoptf:
+ xoptf(o, a, conf)
+
+ conf['debug'] = debug
+
+ # get any var=val assignments
+ eaten = []
+ for a in args:
+ if a.find('=') > 0:
+ k,v = a.split('=')
+ if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
+ if k == 'SRCTOP':
+ k = 'SRCTOPS'
+ elif k == 'OBJROOT':
+ k = 'OBJROOTS'
+ if v not in conf[k]:
+ conf[k].append(v)
+ else:
+ conf[k] = v
+ eaten.append(a)
+ continue
+ break
+
+ for a in eaten:
+ args.remove(a)
+
+ debug_out = getv(conf, 'debug_out', sys.stderr)
+
+ if debug:
+ print >> debug_out, "config:"
+ print >> debug_out, "psyco=", have_psyco
+ for k,v in conf.items():
+ print >> debug_out, "%s=%s" % (k,v)
+
+ for a in args:
+ m = klass(a, conf)
+
+ if output:
+ print m.dirdeps()
+
+ print m.src_dirdeps('\nsrc:')
+
+ dpdeps = getv(conf, 'DPDEPS')
+ if dpdeps:
+ m.file_depends(open(dpdeps, 'wb'))
+
+ return m
+
+if __name__ == '__main__':
+ try:
+ main(sys.argv)
+ except:
+ # yes, this goes to stdout
+ print "ERROR: ", sys.exc_info()[1]
+ raise
+