diff options
-rwxr-xr-x | Tools/portbuild/scripts/straslivy.py | 319 |
1 files changed, 181 insertions, 138 deletions
diff --git a/Tools/portbuild/scripts/straslivy.py b/Tools/portbuild/scripts/straslivy.py index 10e7d7007040..b67b75dd2a8d 100755 --- a/Tools/portbuild/scripts/straslivy.py +++ b/Tools/portbuild/scripts/straslivy.py @@ -1,147 +1,190 @@ #!/usr/bin/env python +# Initially by pav, refactored by kris +# +# XXX Todo: handle ipv6 sockets (used by e.g. sparc64) + import commands import socket import sys -# get list of pdispatch processes -args = sys.argv -del args[0] -s = commands.getoutput('pgrep -lf "pdispatch '+' '.join(args)+'"') -lines = s.splitlines() -pids = [ ] -data = { } archwidth = branchwidth = portwidth = hostwidth = 0 -for line in lines: - if line.count('pgrep -lf "pdispatch'): - continue; - fields = line.split() - pid = fields.pop(0) - arch = fields[2] - branch = fields[3] - port = fields[6].replace('/usr/ports/','') - line = ' '.join(fields) - pids.append(pid) - if len(arch) > archwidth: - archwidth = len(arch) - if len(branch) > branchwidth: - branchwidth = len(branch) - if len(port) > portwidth: - portwidth = len(port) - blob = {'arch': arch, 'branch': branch, 'port': port} - data[pid] = blob - if branch == 'pgrep': - print line - -for pid in data: - blob = data[pid] - -# get list of subprocesses (ptimeout.host, sleep 15, ...) -ppid_arg = ','.join(data) -if len(ppid_arg) == 0: - sys.exit() - -s = commands.getoutput('pgrep -P '+ppid_arg) -ppids = s.splitlines() -ppid_arg = ','.join(ppids) -s = commands.getoutput('ps ax -o pid,ppid,etime,command -p '+ppid_arg) -list = s.splitlines() -list.pop(0) -ppid_map = { } -idlers = { } -for line in list: - fields = line.split() - pid = fields.pop(0) - ppid = fields.pop(0) - time = fields.pop(0) - command = ' '.join(fields) - if command.count('ptimeout.host'): - command = "building ("+time+")" - elif command == 'sleep 15': - command = "waiting for idle node" - idlers[ppid] = ppid - elif command.count('scripts/clean-chroot'): - command = "cleaning up the node ("+time+")" - elif command.count('scripts/claim-chroot'): - command = "preparing node ("+time+")" - elif command.count('tar --unlink'): - command = "copying package ("+time+")" - elif command.count('scp ') and command.count('.log '): - command = "copying logs ("+time+")" - else: - command = command+" ("+time+")" - if data.has_key(ppid): - blob = data[ppid] - blob['command'] = command - ppid_map[pid] = ppid - -# fill in sleeper's parent pid etimes -if len(idlers): - ppid_arg = ','.join(idlers) - s = commands.getoutput('ps ax -o pid,etime -p '+ppid_arg) - lines = s.splitlines() - lines.pop(0) - for line in lines: + +def getpdispatch(): + """ get list of pdispatch processes """ + + global archwidth, branchwidth, portwidth; + + args = sys.argv + data = {} + + cmd = 'pgrep -lf "pdispatch '+' '.join(args[1:])+'"' + for line in commands.getoutput(cmd).splitlines(): + if line.count('pgrep -lf "pdispatch'): + continue; + fields = line.split() + pid = fields[0] + arch = fields[3] + branch = fields[4] + port = fields[7].replace('/usr/ports/','') + line = ' '.join(fields) + + if len(arch) > archwidth: + archwidth = len(arch) + if len(branch) > branchwidth: + branchwidth = len(branch) + if len(port) > portwidth: + portwidth = len(port) + + data[pid] = {'arch': arch, 'branch': branch, 'port': port} + if branch == 'pgrep': + print line + return data + +def getparent(ppid_map, pid): + """walk up the ppid tree and return the parent pdispatch""" + + next = pid + while ppid_map.has_key(next): + next = ppid_map[next]['ppid'] + return next + +def getallsubprocs(pids): + """recursively find all subprocs of the list in pids""" + + ppids = [] + nppids = pids + while len(nppids): + pidlist=",".join(nppids) + nppids = commands.getoutput('pgrep -P ' + pidlist).splitlines() + ppids.extend(nppids) + return ppids + +def dosubprocs(data): + """ recursively get list of subprocesses (ptimeout.host, sleep 15, ...) and fill in data """ + + ppid_map = { } + idlers = { } + + ppid_arg = ",".join(getallsubprocs(data.keys())) + list = commands.getoutput('ps ax -o pid,ppid,etime,command -p ' + ppid_arg).splitlines()[1:] + for line in list: fields = line.split() - ppid = fields[0] - time = fields[1] - blob = data[ppid] - blob['command'] = blob['command'] + ' ('+time+')' - -# get list of sub-subprocesses (ssh is interesting here) -ssh_map = { } -ppid_arg = ','.join(ppid_map) -if len(ppid_arg) > 0: - s = commands.getoutput('pgrep -P '+ppid_arg) - ppids = s.splitlines() - ppid_arg = ','.join(ppids) - if len(ppid_arg) > 0: - s = commands.getoutput('ps ax -o pid,ppid,comm -p '+ppid_arg) - list = s.splitlines() - list.pop(0) - for line in list: + pid = fields.pop(0) + ppid = fields.pop(0) + time = fields.pop(0) + command = ' '.join(fields) + if command.count('ptimeout.host'): + command = "building" + elif command == 'sleep 15': + command = "waiting for idle node" + idlers[ppid] = ppid + elif command.count('scripts/clean-chroot'): + command = "cleaning up the node" + elif command.count('scripts/claim-chroot'): + command = "preparing node" + elif command.count('tar --unlink'): + command = "copying package" + elif command.count('buildsuccess'): + command = "registering success" + elif command.count('buildfailure'): + command = "registering failure" + elif command.count('scp ') and command.count('.log '): + command = "copying logs" + elif command.count('ssh'): + command = "building" + ppid_map[pid] = {'ppid':ppid, 'command':command, 'time':time} + + # fill in sleeper's parent pid etimes so we display how long the pdispatch + # has been trying to acquire a chroot, instead of the <15 second sleep + # lifetime + if len(idlers): + ppid_arg = ','.join(idlers) + lines = commands.getoutput('ps ax -o pid,etime -p ' + ppid_arg).splitlines()[1:] + for line in lines: fields = line.split() - spid = fields[0] - pid = fields[1] - command = fields[2] - if command != 'ssh': - continue - if ppid_map.has_key(pid): - ssh_map[spid] = ppid_map[pid] - -# get list of network sockets -s = commands.getoutput('sockstat -4 -c -p 22') -list = s.splitlines() -list.pop(0) -for line in list: - line = line[20:] - fields = line.split() - spid = fields.pop(0) - host, port = fields.pop(3).split(':') - if port != '22': - continue - if ssh_map.has_key(spid): - ppid = ssh_map[spid] - if data.has_key(ppid): - try: - hostname, bork, bork = socket.gethostbyaddr(host) - except socket.herror: - hostname = host - blob = data[ppid] - blob['host'] = hostname - if len(hostname) > hostwidth: - hostwidth = len(hostname) - -# format for output -output = [ ] -for pid in data: - blob = data[pid] - if not blob.has_key('host'): - blob['host'] = '' - if not blob.has_key('command'): - blob['command'] = '' - output.append(blob['arch'].ljust(archwidth + 2) + blob['branch'].ljust(branchwidth + 2) + blob['port'].ljust(portwidth + 2) + blob['host'].ljust(hostwidth + 2) + blob['command']) - -# sort, output -output.sort() -print '\n'.join(output) + ppid = fields[0] + time = fields[1] + data[ppid]['time'] = time + + # Propagate commands and runtime to parent pdispatch. We have + # to do this after the loop above because ps sorts it output + # and we are not guaranteed to have processes the ppid before + # the pid. The alternative is multiple ps invocations which + # is slower. + for pid in ppid_map.iterkeys(): + pppid = getparent(ppid_map, pid) # Find ancestor pdispatch + ppid_map[pid].update({'pppid':pppid}) + blob = data[pppid] + + # propagate command and time to parent if necessary + if not blob.has_key('command'): + blob['command'] = ppid_map[pid]['command'] + if not blob.has_key('time'): + blob['time'] = ppid_map[pid]['time'] + + return ppid_map + +# get list of network sockets and match to pdispatch children +def getsockets(ppid_map, data): + global hostwidth + + # XXX what about ipv6? + s = commands.getoutput('sockstat -4 -c') + list = s.splitlines() + list.pop(0) + for line in list: + line = line[20:] + fields = line.split() + spid = fields.pop(0) + if len(fields) < 3: + continue + tuple = fields.pop(3) + if tuple.find(':') == -1: + continue + (host, port) = tuple.split(':') + + # Check if the socket belongs to one of our pids + if ppid_map.has_key(spid): + ppid = ppid_map[spid]['pppid'] # Map to pdispatch + if data.has_key(ppid): + try: + (hostname, bork, bork) = socket.gethostbyaddr(host) + except socket.herror: + hostname = host + blob = data[ppid] + blob['host'] = hostname + if len(hostname) > hostwidth: + hostwidth = len(hostname) + +if __name__ == "__main__": + data = getpdispatch() + + if len(data) == 0: + print "No matching jobs" + sys.exit() + pids = data.keys() + + ppid_map = dosubprocs(data) + getsockets(ppid_map, data) + + # format for output + output = [ ] + for pid in data: + blob = data[pid] + if not blob.has_key('host'): + blob['host'] = '' + if not blob.has_key('command'): + blob['command'] = 'Dispatching' + if not blob.has_key('time'): + blob['time'] = '00:00' + + output.append(blob['arch'].ljust(archwidth + 2) + \ + blob['branch'].ljust(branchwidth + 2) + \ + blob['port'].ljust(portwidth + 2) + \ + blob['host'].ljust(hostwidth + 2) + \ + blob['command'] + " (" + blob['time'] + ")") + + # sort, output + output.sort() + print '\n'.join(output) |