aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPav Lucistnik <pav@FreeBSD.org>2009-05-21 16:17:15 +0000
committerPav Lucistnik <pav@FreeBSD.org>2009-05-21 16:17:15 +0000
commitce4e44feb45d4624500b527986c5a1cda9a26200 (patch)
tree8a11906f3f3047037cedcb008823b870b0b28604
parent46e2ac6a6ff9bc4358abd6fb41d209224a61d990 (diff)
- Update to reflect a reality on pointyhat
Notes
Notes: svn path=/head/; revision=234345
-rwxr-xr-xTools/portbuild/scripts/build59
-rwxr-xr-xTools/portbuild/scripts/buildproxy-client14
-rwxr-xr-xTools/portbuild/scripts/buildscript4
-rwxr-xr-xTools/portbuild/scripts/dopackages75
-rwxr-xr-xTools/portbuild/scripts/dosetupnode17
-rwxr-xr-xTools/portbuild/scripts/makeduds11
-rwxr-xr-xTools/portbuild/scripts/makerestr3
-rwxr-xr-xTools/portbuild/scripts/makeworld2
-rwxr-xr-xTools/portbuild/scripts/packagebuild936
-rwxr-xr-xTools/portbuild/scripts/pdispatch264
-rwxr-xr-xTools/portbuild/scripts/pollmachine148
-rw-r--r--Tools/portbuild/scripts/ptimeout.c2
-rwxr-xr-xTools/portbuild/scripts/reportload4
-rwxr-xr-xTools/portbuild/scripts/straslivy.py2
-rwxr-xr-xTools/portbuild/scripts/zbackup2
-rwxr-xr-xTools/portbuild/scripts/zclient124
-rwxr-xr-xTools/portbuild/scripts/zsync38
17 files changed, 989 insertions, 716 deletions
diff --git a/Tools/portbuild/scripts/build b/Tools/portbuild/scripts/build
index bfa94e004d78..ed2c33971eff 100755
--- a/Tools/portbuild/scripts/build
+++ b/Tools/portbuild/scripts/build
@@ -94,7 +94,7 @@ do_clone() {
if [ -d ${newbuilddir} ]; then
if [ ! -f ${pbab}/builds/previous/.keep ]; then
- build destroy ${arch} ${branch} previous
+ /var/portbuild/scripts/build destroy ${arch} ${branch} previous
fi
rm -f ${pbab}/builds/previous
mv ${pbab}/builds/latest ${pbab}/builds/previous
@@ -111,13 +111,22 @@ do_portsupdate() {
buildid=$3
builddir=$4
shift 4
+ if [ $# -gt 0 ]; then
+ arg=$1
+ shift
+ fi
portsfs=a/portbuild/${arch}/${buildid}/ports
+ destroy_fs a/portbuild/${arch} ${buildid} /ports || exit 1
+
+ if [ "${arg}" = "-umount" ]; then
+ return
+ fi
+
echo "================================================"
echo "Reimaging ZFS ports tree on ${builddir}/ports"
echo "================================================"
- destroy_fs a/portbuild/${arch} ${buildid} /ports || exit 1
now=$(now)
zfs snapshot a/snap/ports@${now}
@@ -132,15 +141,24 @@ do_srcupdate() {
buildid=$3
builddir=$4
shift 4
+ if [ $# -gt 0 ]; then
+ arg=$1
+ shift
+ fi
srcfs=a/portbuild/${arch}/${buildid}/src
+ destroy_fs a/portbuild/${arch} ${buildid} /src || exit 1
+
+ if [ "${arg}" = "-umount" ]; then
+ return
+ fi
+
echo "================================================"
echo "Reimaging ZFS src tree on ${builddir}/src"
echo "================================================"
- destroy_fs a/portbuild/${arch} ${buildid} /src || exit 1
-
+
case ${branch} in
8|8-exp)
srcbranch=HEAD
@@ -173,11 +191,8 @@ cleanup_client() {
test -f ${pb}/${arch}/portbuild.${mach} && . ${pb}/${arch}/portbuild.${mach}
# Kill off builds and clean up chroot
- ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${mach} -nocopy -queue
-
- if [ "${arg}" = "-full" ]; then
- ${ssh_cmd} ${client_user}@${mach} ${sudo_cmd} rm -rf ${pb}/${arch}/${branch}/builds/${buildid}/.ready ${pb}/${arch}/${branch}/builds/${buildid} /tmp/.setup-${buildid}
- fi
+ ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${mach} -nocopy -queue -full
+
echo "Finished cleaning up ${arch}/${branch} build ID ${buildid} on ${mach}"
}
@@ -211,7 +226,7 @@ do_upload() {
test_fs() {
local fs=$1
- zfs list -Ht filesystem | awk '{print $1}' | grep -q "$fs"
+ zfs list -Ht filesystem ${fs} > /dev/null 2>&1
}
@@ -286,22 +301,22 @@ destroy_fs() {
# We might have snapshots on the target filesystem, e.g. if it
# is both the head and tail of its clone tree. They should be
# unreferenced.
- (zfs list -H -o name | grep "^${fullfs}@" | xargs -n 1 zfs destroy) || return 1
+ # We have to grep because zfs list -H returns an error instead of
+ # a null list if no snapshots exist
+ if ! (zfs list -r -H -o name -t snapshot ${fullfs} | grep "^${fullfs}@" | xargs -n 1 zfs destroy); then
+ return 1
+ fi
# The target filesystem should now be unreferenced
- zfs destroy -f "${fullfs}" || return 1
-
- # Clean up the initial snapshot(s) that were promoted onto a
- # cloned filesystem. It could have been propagated several
- # times so we don't know where it ended up. Therefore we
- # can't match for the ${buildid} part of ${fullfs}.
- #
- # XXX might be doing a substring match of subfs but we can't
- # prepend / because a null subfs will not match
+ if ! zfs destroy -f "${fullfs}"; then
+ return 1
+ fi
# Destroy the origin snapshot, which should be unreferenced
if [ ! -z "${parent}" ]; then
- zfs destroy -f ${parent} || return 1
+ if ! zfs destroy -f ${parent}; then
+ return 1
+ fi
fi
fi
}
@@ -374,7 +389,7 @@ proxy_user() {
eval "do_${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}"
error=$?
else
- su ports-${arch} -c "build ${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}"
+ su ports-${arch} -c "/var/portbuild/scripts/build ${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}"
error=$?
fi
diff --git a/Tools/portbuild/scripts/buildproxy-client b/Tools/portbuild/scripts/buildproxy-client
index e5ebcfed05cf..f8de6fe878e0 100755
--- a/Tools/portbuild/scripts/buildproxy-client
+++ b/Tools/portbuild/scripts/buildproxy-client
@@ -25,8 +25,18 @@ try:
s.close()
sys.exit(int(code))
-except:
- raise # XXX debug
+except Exception, e:
+ print "buildproxy-client: exception:"
+ print e
+ try:
+ if code == None:
+ print "buildproxy-client: error: code was None"
+ else:
+ print "buildproxy-client: error: code was '" + code + "'"
+ except Exception, e2:
+ print "buildproxy-client: exception 2:"
+ print e2
+ raise e # XXX debug
sys.exit(254)
diff --git a/Tools/portbuild/scripts/buildscript b/Tools/portbuild/scripts/buildscript
index ec05c6594347..fc32d7d42b02 100755
--- a/Tools/portbuild/scripts/buildscript
+++ b/Tools/portbuild/scripts/buildscript
@@ -126,11 +126,15 @@ restr=$(make -V RESTRICTED)
# Keep restricted distfiles in a subdirectory for extra protection
# against leakage
if [ ! -z "$restr" ]; then
+ echo "DISTDIR=${DISTDIR}"
export DISTDIR=${DISTDIR}/RESTRICTED
+ echo "DISTDIR=${DISTDIR}"
+ mkdir -p ${DISTDIR}
fi
if [ $phase = 1 ]; then
+ # note: if you change this header, also change processonelog and processlogs2
cd $dir || exit 1
echo "building for: $(uname -mr)"
echo "maintained by: $(make maintainer)"
diff --git a/Tools/portbuild/scripts/dopackages b/Tools/portbuild/scripts/dopackages
index 29e7f3398dfe..e2fc7676f37a 100755
--- a/Tools/portbuild/scripts/dopackages
+++ b/Tools/portbuild/scripts/dopackages
@@ -71,12 +71,14 @@ mailexit () {
}
srctar() {
- tar cfCj ${builddir}/src-${buildid}.tbz ${builddir} src/
+ rm -f ${builddir}/src-2*.tbz*
+ tar cfCj ${builddir}/src-${buildid}.tbz ${builddir} src/ 2>/dev/null
md5 ${builddir}/src-${buildid}.tbz > ${builddir}/src-${buildid}.tbz.md5
}
portstar() {
- tar cfCj ${builddir}/ports-${buildid}.tbz ${builddir} ports/
+ rm -f ${builddir}/ports-2*.tbz*
+ tar cfCj ${builddir}/ports-${buildid}.tbz ${builddir} ports/ 2>/dev/null
md5 ${builddir}/ports-${buildid}.tbz > ${builddir}/ports-${buildid}.tbz.md5
}
@@ -204,24 +206,16 @@ dobuild() {
arch=$2
branch=$3
builddir=$4
- phase=$5
-
- count=0
- for i in `cat ${pb}/${arch}/mlist`; do
- . ${pb}/${arch}/portbuild.conf
- test -f ${pb}/${arch}/portbuild.${i} && . ${pb}/${arch}/portbuild.${i}
- count=$((${count}+${maxjobs}))
- done
-
+
echo "================================================"
- echo "building packages (phase ${phase})"
+ echo "building packages"
echo "================================================"
echo "started at $(date)"
- phasestart=$(date +%s)
- make -k -j${count} quickports all > ${builddir}/make.${phase} 2>&1 </dev/null
+ start=$(date +%s)
+ /var/portbuild/evil/qmanager/packagebuild ${arch} ${branch} ${buildid} > ${builddir}/make 2>&1 < /dev/null
echo "ended at $(date)"
- phaseend=$(date +%s)
- echo "phase ${phase} took $(date -u -j -r $(($phaseend - $phasestart)) | awk '{print $4}')"
+ end=$(date +%s)
+ echo "Build took $(date -u -j -r $((end - start)) | awk '{print $4}')"
echo $(echo $(ls -1 ${builddir}/packages/All | wc -l) - 2 | bc) "packages built"
echo $(wc -l ${PORTSDIR}/${INDEXFILE} | awk '{print $1}') "lines in INDEX"
@@ -230,9 +224,9 @@ dobuild() {
echo $(echo $(du -sk ${builddir}/distfiles | awk '{print $1}') / 1024 | bc) "MB of distfiles"
cd ${builddir}
- if grep -qE '(ptimeout|pnohang): killing' make.${phase}; then
+ if grep -qE '(ptimeout|pnohang): killing' make; then
echo "The following port(s) timed out:"
- grep -E '(ptimeout|pnohang): killing' make.${phase} | sed -e 's/^.*ptimeout:/ptimeout:/' -e 's/^.*pnohang:/pnohang:/'
+ grep -E '(ptimeout|pnohang): killing' make | sed -e 's/^.*ptimeout:/ptimeout:/' -e 's/^.*pnohang:/pnohang:/'
fi
}
@@ -259,7 +253,7 @@ restart=0
cont=0
finish=0
nofinish=0
-dodistfiles=1
+nodistfiles=0
fetch_orig=0
trybroken=0
incremental=0
@@ -300,7 +294,7 @@ while [ $# -gt 0 ]; do
noplistcheck=1
;;
x-nodistfiles)
- dodistfiles=0
+ nodistfiles=1
;;
x-fetch-original)
fetch_orig=1
@@ -349,9 +343,8 @@ fi
echo | mail -s "$(basename $0) started for ${arch}-${branch} ${buildid} at $(date)" ${mailto}
-if [ "$dodistfiles" = 1 ]; then
- # XXX flip default to always collect
- export WANT_DISTFILES=1
+if [ "$nodistfiles" = 1 ]; then
+ export NO_DISTFILES=1
fi
if [ "$noplistcheck" = 1 ]; then
@@ -384,6 +377,11 @@ df -k | grep ${buildid}
# Set up our environment variables
buildenv ${pb} ${arch} ${branch} ${builddir}
+# bomb out if build clone failed
+if [ ! -d ${builddir} ]; then
+ mailexit 1
+fi
+
if [ "${keep}" -eq 1 ]; then
touch ${builddir}/.keep
fi
@@ -498,8 +496,6 @@ if [ "$skipstart" = 0 ]; then
cdromlist ${pb} ${arch} ${branch} ${builddir} &
fi
- ${pb}/scripts/makeparallel ${arch} ${branch} ${buildid} &
-
cd ${builddir}
mv distfiles/ .distfiles~
rm -rf .distfiles~ &
@@ -518,7 +514,7 @@ if [ "$skipstart" = 0 ]; then
# Create new log directories for archival
rm -rf ${newerrors}
- mkdir -p ${newerrors}/old-errors
+ mkdir -p ${newerrors}
ln -sf ${newerrors} ${builddir}/errors
rm -rf ${newlogs}
mkdir -p ${newlogs}
@@ -533,6 +529,10 @@ if [ "$skipstart" = 0 ]; then
fi
cp -p ${builddir}/duds ${newerrors}/duds
cp -p ${builddir}/duds ${newlogs}/duds
+ if [ -f "${builddir}/duds.verbose" ]; then
+ cp -p ${builddir}/duds.verbose ${newerrors}/duds.verbose
+ cp -p ${builddir}/duds.verbose ${newlogs}/duds.verbose
+ fi
cp -p ${builddir}/ports/${INDEXFILE} ${newerrors}/INDEX
cp -p ${builddir}/ports/${INDEXFILE} ${newlogs}/INDEX
@@ -594,24 +594,7 @@ if [ "$nobuild" = 0 ]; then
cp duds.orig duds
fi
- cd ${builddir}/packages/All
- ln -sf ../../Makefile .
-
- dobuild ${pb} ${arch} ${branch} ${builddir} 1
-
- ls -asFlrt ${builddir}/packages/All > ${builddir}/logs/ls-lrt
-
- cd ${builddir}/errors/
- find . -name '*.log' -depth 1 | cpio -dumpl ${builddir}/errors/old-errors
-
- # Clean up the clients
- ${pb}/scripts/build cleanup ${arch} ${branch} ${buildid}
-
- wait
- echo "setting up of nodes ended at $(date)"
-
- cd ${builddir}/packages/All
- dobuild ${pb} ${arch} ${branch} ${builddir} 2
+ dobuild ${pb} ${arch} ${branch} ${builddir}
fi
@@ -666,7 +649,7 @@ if [ "$nofinish" = 0 ]; then
${pb}/scripts/chopindex ${builddir}/ports/${INDEXFILE} ${builddir}/packages > ${builddir}/packages/INDEX
ls -asFlrt ${builddir}/packages/All > ${builddir}/logs/ls-lrt
- cp -p ${builddir}/make.[12] ${builddir}/logs
+ cp -p ${builddir}/make ${builddir}/logs
echo "================================================"
echo "copying distfiles"
@@ -712,4 +695,4 @@ echo "all done at $(date)"
echo "entire process took $(date -u -j -r $(($endtime - $starttime)) | awk '{print $4}')"
echo "================================================"
-mailexit 0 \ No newline at end of file
+mailexit 0
diff --git a/Tools/portbuild/scripts/dosetupnode b/Tools/portbuild/scripts/dosetupnode
index 33a6044a627f..243ce2ae3664 100755
--- a/Tools/portbuild/scripts/dosetupnode
+++ b/Tools/portbuild/scripts/dosetupnode
@@ -5,8 +5,6 @@
# -norsync|-nocopy : Don't copy files, just clean up builds
#
-# -queue : update queue once we finish setting up
-#
# -force : force file copying/extraction even if it appears it is
# up-to-date
#
@@ -99,9 +97,16 @@ setup() {
${client_setup} post-copy ${args} || (echo "post-copy for ${node} failed"; return 1)
if [ "${queue}" -eq 1 ]; then
- lockf -k ${pb}/${arch}/queue/.lock \
- ${pb}/scripts/pollmachine ${arch}/${node} -queue
+ jobs=$(python /var/portbuild/evil/qmanager/qclient jobs | grep "${node}" | grep "${arch}/${branch}/${buildid} package" | awk '{print $1}' | tail +1)
+ for j in ${jobs}; do
+ python /var/portbuild/evil/qmanager/qclient release $j
+ done
+ fi
+
+ if [ "${full}" -eq 1 ]; then
+ ${ssh_cmd} ${client_user}@${node} ${sudo_cmd} rm -rf ${pb}/${arch}/${branch}/builds/${buildid}/.ready ${pb}/${arch}/${branch}/builds/${buildid} /tmp/.setup-${buildid}
fi
+
echo "setting up of $node ended at $(date)"
}
@@ -109,6 +114,7 @@ pbab=${pb}/${arch}/${branch}
norsync=0
queue=0
+full=0
while [ $# -ge 1 ]; do
case $1 in
@@ -122,6 +128,9 @@ while [ $# -ge 1 ]; do
-force)
force=-force
;;
+ -full)
+ full=1
+ ;;
esac
shift
done
diff --git a/Tools/portbuild/scripts/makeduds b/Tools/portbuild/scripts/makeduds
index a087ebf69a6c..809f73821d07 100755
--- a/Tools/portbuild/scripts/makeduds
+++ b/Tools/portbuild/scripts/makeduds
@@ -40,9 +40,14 @@ export PKG_DBDIR=/nonexistentpkg
export PORT_DBDIR=/nonexistentport
cd ${PORTSDIR}
-make -j${DUDSJOBS} ignorelist ECHO_MSG=true > ${duds} || exit 1
-sort ${duds} > ${duds}.tmp
-mv -f ${duds}.tmp ${duds}
+make -j${DUDSJOBS} ignorelist-verbose ECHO_MSG=true > ${duds}.verbose 2> /dev/null || exit 1
+sort ${duds}.verbose > ${duds}.verbose.tmp
+mv -f ${duds}.verbose.tmp ${duds}.verbose
+cut -f 1 -d \| ${duds}.verbose > ${duds}
cp ${duds} ${duds}.orig
grep -Ff ${duds}.orig ${index} | cut -f 1 -d \| > ${duds}.full
+
+cat ${duds} ${duds}.full | sort | uniq -u | sed -e "s@\$@|IGNORE: dependent port@" > ${duds}.full.verbose.tmp
+cat ${duds}.verbose ${duds}.full.verbose.tmp | sort > ${duds}.full.verbose
+rm ${duds}.full.verbose.tmp
diff --git a/Tools/portbuild/scripts/makerestr b/Tools/portbuild/scripts/makerestr
index 4fe8ec944703..9da8db96edce 100755
--- a/Tools/portbuild/scripts/makerestr
+++ b/Tools/portbuild/scripts/makerestr
@@ -22,7 +22,8 @@ builddir=${pb}/${arch}/${branch}/builds/${buildid}
buildenv ${pb} ${arch} ${branch} ${builddir}
-duds=${builddir}/duds
+# XXX MCL 20080908 test removal of this
+#duds=${builddir}/duds
unset DISPLAY
diff --git a/Tools/portbuild/scripts/makeworld b/Tools/portbuild/scripts/makeworld
index 953f9d02a421..91845fff1e3c 100755
--- a/Tools/portbuild/scripts/makeworld
+++ b/Tools/portbuild/scripts/makeworld
@@ -23,6 +23,8 @@ builddir=${pb}/${arch}/${branch}/builds/${buildid}
# confuses cross-builds
export TARGET_ARCH=${arch}
+# Workaround needed for zfs - 20090321 erwin
+export NO_FSCHG=1
case "$branch" in
6*)
diff --git a/Tools/portbuild/scripts/packagebuild b/Tools/portbuild/scripts/packagebuild
index 9beeeb151d57..e0b73e0624b5 100755
--- a/Tools/portbuild/scripts/packagebuild
+++ b/Tools/portbuild/scripts/packagebuild
@@ -10,511 +10,571 @@
# where the entire cluster waits for a deep part of the tree to
# build on a small number of machines
#
-# Other advantages are that this system is easily customizable and
-# will let us customize things like the matching policy of jobs to
-# machines.
+# We can dynamically respond to changes in build machine availability,
+# since the queue manager will block jobs that cannot be immediately
+# satisfied and will unblock us when a job slot becomes available.
+#
+# When a package build fails, it is requeued with a lower priority
+# such that it will rebuild again as soon as no "phase 1" packages
+# are available to build. This prevents the cluster staying idle
+# until the last phase 1 package builds.
+#
+# Other advantages are that this system is easily customizable and in
+# the future will let us customize things like the matching policy of
+# jobs to machines. For example, we could avoid dispatching multiple
+# openoffice builds to the same system.
#
# TODO:
-# * External queue manager
-# * Mark completed packages instead of deleting them
+# * Combine build prep stages?
+# - initial check for file up-to-date
# * check mtime for package staleness (cf make)
-# * Check for parent mtimes after finishing child
-
-import os, sys, threading, time, subprocess, fcntl, operator
-#from itertools import ifilter, imap
-from random import choice
-
-def parseindex(indexfile):
-
- tmp={}
- pkghash={}
- for i in file(indexfile):
- line=i.rstrip().split("|")
- pkg = line[0]
- tmp[pkg] = line[1:]
-
- # XXX hash category names too
-
- # Trick python into storing package names by reference instead of copying strings and wasting 60MB
- pkghash[pkg] = pkg
-
- index=dict.fromkeys(tmp.keys())
- for pkg in tmp.iterkeys():
- line = tmp[pkg]
- data={'name': pkg, 'path':line[0],
- #'prefix':line[1],
- #'comment':line[2],
- #'descr':line[3],
- #'maintainer':line[4],
- 'categories':line[5], # XXX duplicates strings
- 'bdep':[pkghash[i] for i in line[6].split(None)],
- 'rdep':[pkghash[i] for i in line[7].split(None)],
- #'www':line[8],
- 'edep':[pkghash[i] for i in line[9].split(None)],
- 'pdep':[pkghash[i] for i in line[10].split(None)],
- 'fdep':[pkghash[i] for i in line[11].split(None)],
- 'height':None}
- if index[pkg] is None:
- index[pkg] = data
- else:
- index[pkg].update(data)
- if not index[pkg].has_key('parents'):
- index[pkg]['parents'] = []
-
- # XXX iter?
- deps=set()
- for j in ['bdep','rdep','edep','fdep','pdep']:
- deps.update(set(index[pkg][j]))
- index[pkg]['deps'] = [pkghash[i] for i in deps]
-
- for j in deps:
- # This grossness is needed to avoid a second pass through
- # the index, because we might be about to refer to
- # packages that have not yet been processed
- if index[j] is not None:
- if index[j].has_key('parents'):
- index[j]['parents'].append(pkghash[pkg])
- else:
- index[j]['parents'] = [pkghash[pkg]]
- else:
- index[j] = {'parents':[pkghash[pkg]]}
+# * option to skip phase 2
- return index
+from qmanagerclient import *
-def gettargets(index, targets):
- """ split command line arguments into list of packages to build. Returns set or iterable """
- # XXX make this return the full recursive list and use this later for processing wqueue
+import os, sys, threading, time, subprocess
+#import random
+from itertools import chain
+#import gc
+from stat import *
- plist = set()
- if len(targets) == 0:
- targets = ["all"]
- for i in targets:
- if i == "all":
- plist = index.iterkeys()
- break
- if i.endswith("-all"):
- cat = i.rpartition("-")[0]
- plist.update(j for j in index.iterkeys() if cat in index[j]['categories'])
- elif i.rstrip(".tbz") in index.iterkeys():
- plist.update([i.rstrip(".tbz")])
+from Queue import Queue
+from heapq import *
- return plist
+categories = {}
+ports = {}
-def heightindex(index, targets):
- """ Initial population of height tree """
+# When a build fails we requeue it with a lower priority such that it
+# will never preempt a phase 1 build but will build when spare
+# capacity is available.
+PHASE2_BASE_PRIO=1000
- for i in targets:
- heightdown(index, i)
+# Process success quickly so other jobs are started
+SUCCESS_PRIO = -1000
-def heightdown(index, pkgname):
- """
- Recursively populate the height tree down from a given package,
- assuming empty values on entries not yet visited
+# Failure should be a less common event :)
+FAILURE_PRIO = -900
+
+# Port status codes
+PENDING = 1 # Yet to build
+PHASE2 = 2 # Failed once
+
+class PriorityQueue(Queue):
+ """Variant of Queue that retrieves open entries in
+ priority order (lowest first).
+ Entries are typically tuples of the form: (priority number,
+ data)
+ This class can be found at: Python-2.6a3/Lib/Queue.py
"""
+ maxsize = 0
- pkg=index[pkgname]
- if pkg['height'] is None:
- if len(pkg['deps']) > 0:
- max = 0
- for i in pkg['deps']:
- w = heightdown(index, i)
- if w > max:
- max = w
- pkg['height'] = max + 1
- else:
- pkg['height'] = 1
- return pkg['height']
+ def _init(self, maxsize):
+ self.queue = []
-def heightup(index, pkgname):
- """ Recalculate the height tree going upwards from a package """
+ def _qsize(self, len=len):
+ return len(self.queue)
- if not index.has_key(pkgname):
- raise KeyError
+ def _put(self, item, heappush=heappush):
+ heappush(self.queue, item)
- parents=set(index[pkgname]['parents'])
+ def _get(self, heappop=heappop):
+ return heappop(self.queue)
- while len(parents) > 0:
- # XXX use a deque?
- pkgname = parents.pop()
- if not index.has_key(pkgname):
- # XXX can this happen?
- continue
- pkg=index[pkgname]
- oldheight=pkg['height']
- if oldheight is None:
- # Parent is in our build target list
- continue
- if len(pkg['deps']) == 0:
- newheight = 1
- else:
- newheight=max(index[j]['height'] for j in pkg['deps']) + 1
- if newheight > oldheight:
- print "%s height increasing: %d -> %d", pkg, oldheight, newheight
- assert(False)
- if newheight != oldheight:
- pkg['height'] = newheight
- parents.update(pkg['parents'])
-
-def deleteup(index, pkgname):
- if not index.has_key(pkgname):
- raise KeyError
-
- parents=set([pkgname])
-
- children=[]
- removed=[]
- while len(parents) > 0:
- pkgname = parents.pop()
- if not index.has_key(pkgname):
- # Parent was already deleted via another path
- # XXX can happen?
- print "YYYYYYYYYYYYYYYYYYYYYY %s deleted" % pkgname
- continue
- if index[pkgname]['height'] is None:
- # parent is not in our list of build targets
- continue
- pkg=index[pkgname]
-
- children.extend(pkg['deps'])
- parents.update(pkg['parents'])
- removed.append(pkgname)
- del index[pkgname]
-
- removed = set(removed)
- children = set(children)
-# print "Removed %d packages, touching %d children" % (len(removed), len(children))
-
- for i in children.difference(removed):
- par=index[i]['parents']
- index[i]['parents'] = list(set(par).difference(removed))
-
-# XXX return an iter
-def selectheights(index, level):
- return [i for i in index.iterkeys() if index[i]['height'] == level]
-
-def rank(index, ready, sortd, max = None):
- """ rank the list of ready packages according to those listed as
- dependencies in successive entries of the sorted list """
-
- input=set(ready)
- output = []
- count = 0
- print "Working on depth ",
- for i in sortd:
- deps = set(index[i]['deps'])
- both = deps.intersection(input)
- if len(both) > 0:
- print "%d " % index[i]['height'],
- input.difference_update(both)
- output.extend(list(both))
- if len(input) == 0:
- break
- if max:
- count+=len(both)
- if count > max:
- return output
- print
- output.extend(list(input))
+class Index(object):
- return output
+ def __init__(self, indexfile):
+ self.indexfile = indexfile
-def jobsuccess(index, job):
+ def parse(self, targets = None):
- pkg = index[job]
- # Build succeeded
- for i in pkg['parents']:
- index[i]['deps'].remove(job)
+ print "[MASTER] Read index"
+ f = file(self.indexfile)
+ index = f.readlines()
+ f.close()
+ f = None
+ del f
- # deps/parents tree now partially inconsistent but this is
- # what we need to avoid counting the height of the entry
- # we are about to remove (which would make it a NOP)
- heightup(index, job)
+ lines=[]
+ print "[MASTER] Phase 1"
+ for i in index:
+ (name, path, prefix, comment, descr, maintainer, categories, bdep,
+ rdep, www, edep, pdep, fdep) = i.rstrip().split("|")
- del index[job]
+ if targets is None or name in targets:
+ lines.append((name, bdep, rdep, edep, pdep, fdep))
-def jobfailure(index, job):
+ Port(name, path, "", "", "", "",
+ categories, "")
+ index = None
+ del index
- # Build failed
- deleteup(index, job)
-
-class worker(threading.Thread):
+ print "[MASTER] Phase 2"
+ for (name, bdep, rdep, edep, pdep, fdep) in lines:
+ ports[name].setdeps(bdep, rdep, edep, pdep, fdep)
- lock = threading.Lock()
+ lines = None
+ del lines
+ print "[MASTER] Done"
- # List of running threads
- tlist = []
+def depthindex(targets):
+ """ Initial population of depth tree """
- # List of running jobs
- running = []
+ for i in targets:
+ i.depth_recursive()
- # Used to signal dispatcher when we finish a job
- event = threading.Event()
+class Port(object):
- def __init__(self, mach, job, queue, arch, branch):
- threading.Thread.__init__(self)
- self.job = job
- self.mach = mach
- self.queue = queue
- self.arch = arch
- self.branch = branch
+ def __init__(self, name, path, prefix, comment, descr, maintainer,
+ cats, www):
- def run(self):
- global index
+ __slots__ = ["name", "path", "prefix", "comment", "descr",
+ "maintainer", "www", "bdep", "rdep", "edep", "pdep",
+ "fdep", "alldep", "parents", "depth", "categories"]
- pkg = index[self.job]
+ self.name = name
+ self.path = path
+ self.prefix = prefix
+ self.comment = comment
+ self.descr = descr
+ self.maintainer = maintainer
+ self.www = www
+
+ # Populated later
+ self.bdep = []
+ self.rdep = []
+ self.edep = []
+ self.pdep = []
+ self.fdep = []
+
+ self.alldep = []
+ self.parents = []
+ self.id = None # XXX
+
+ self.status = PENDING
+
+ # Whether the package build has completed and is hanging around
+ # to resolve dependencies for others XXX use status
+ self.done = False
+
+ # Depth is the maximum length of the dependency chain of this port
+ self.depth = None
+
+ self.categories=[]
+ scats = cats.split()
+ if len(scats) != len(set(scats)):
+ print "[MASTER] Warning: port %s includes duplicated categories: %s" % (name, cats)
+
+ for c in set(scats):
+ try:
+ cat = categories[c]
+ except KeyError:
+ cat = Category(c)
- if len(pkg['deps']) != 0:
- print "Running job with non-empty deps: %s" % pkg
- assert(False)
+ self.categories.append(cat)
+ cat.add(self)
- print "Running job %s" % (self.job)
- while True:
- retcode = subprocess.call(["/usr/bin/env", "FD=%s" % " ".join(["%s.tbz" % i for i in pkg['fdep']]), "ED=%s" % " ".join(["%s.tbz" % i for i in pkg['edep']]), "PD=%s" % " ".join(["%s.tbz" % i for i in pkg['pdep']]), "BD=%s" % " ".join(["%s.tbz" % i for i in pkg['bdep']]), "RD=%s" % " ".join(["%s.tbz" % i for i in pkg['rdep']]), "/var/portbuild/scripts/pdispatch2", self.mach, self.arch, self.branch, "/var/portbuild/scripts/portbuild", "%s.tbz" % self.job, pkg['path']])
- self.queue.release(self.mach)
- if retcode != 254:
- break
+ ports[name] = self
- # Failed to obtain job slot
- time.sleep(15)
- (self.mach, dummy) = self.queue.pick()
- print "Retrying on %s" % self.mach
+ def remove(self):
+ """ Clean ourselves up but don't touch references in other objects;
+they still need to know about us as dependencies etc """
- print "Finished job %s" % self.job,
+ self.fdep = None
+ self.edep = None
+ self.pdep = None
+ self.bdep = None
+ self.rdep = None
+ self.alldep = None
+ self.parents = None
- if retcode == 0:
- status = True
- print
- else:
- status = False
- print " with status %d" % retcode
+ for cat in self.categories:
+ cat.remove(self)
- worker.lock.acquire()
- worker.running.remove(self.job)
- worker.tlist.remove(self)
- if status == True:
- jobsuccess(index, self.job)
+ ports[self.name] = None
+ del ports[self.name]
+ del self
+
+ def destroy(self):
+ """ Remove a package and all references to it """
+
+ for pkg in self.alldep:
+ if pkg.parents is not None:
+ # Already removed but not destroyed
+ try:
+ pkg.parents.remove(self)
+ except ValueError:
+ continue
+
+ for pkg in self.parents:
+ try:
+ pkg.fdep.remove(self)
+ except ValueError:
+ pass
+ try:
+ pkg.edep.remove(self)
+ except ValueError:
+ pass
+ try:
+ pkg.pdep.remove(self)
+ except ValueError:
+ pass
+ try:
+ pkg.bdep.remove(self)
+ except ValueError:
+ pass
+ try:
+ pkg.rdep.remove(self)
+ except ValueError:
+ pass
+ pkg.alldep.remove(self)
+
+ sys.exc_clear()
+
+ self.remove()
+
+ def setdeps(self, bdep, rdep, edep, pdep, fdep):
+ self.fdep = [ports[p] for p in fdep.split()]
+ self.edep = [ports[p] for p in edep.split()]
+ self.pdep = [ports[p] for p in pdep.split()]
+ self.bdep = [ports[p] for p in bdep.split()]
+ self.rdep = [ports[p] for p in rdep.split()]
+
+ self.alldep = list(set(chain(self.fdep, self.edep, self.pdep,
+ self.bdep, self.rdep)))
+
+ for p in self.alldep:
+ p.parents.append(self)
+
+ def depth_recursive(self):
+
+ """
+ Recursively populate the depth tree up from a given package
+ through dependencies, assuming empty values on entries not yet
+ visited
+ """
+
+ if self.depth is None:
+ if len(self.parents) > 0:
+ max = 0
+ for i in self.parents:
+ w = i.depth_recursive()
+ if w > max:
+ max = w
+ self.depth = max + 1
+ else:
+ self.depth = 1
+ for port in ["openoffice", "kde-3"]:
+ if self.name.startswith(port):
+ # Artificial boost to try and get it building earlier
+ self.depth = 100
+ return self.depth
+
+ def destroy_recursive(self):
+ """ Remove a port and everything that depends on it """
+
+ parents=set([self])
+
+ while len(parents) > 0:
+ pkg = parents.pop()
+ assert pkg.depth is not None
+ parents.update(pkg.parents)
+ pkg.destroy()
+
+ def success(self):
+ """ Build succeeded and possibly uncovered some new leaves """
+
+ parents = self.parents[:]
+ self.done = True
+ self.remove()
+
+ newleafs = [p for p in parents if all(c.done for c in p.alldep)]
+ return newleafs
+
+ def failure(self):
+ """ Build failed """
+
+ self.destroy_recursive()
+
+ def packagename(self, arch, branch, buildid):
+ """ Return the path where a package may be found"""
+
+ return "/var/portbuild/%s/%s/builds/%s/packages/All/%s.tbz" \
+ % (arch, branch, buildid, self.name)
+
+ def is_stale(self, arch, branch, buildid):
+ """ Does a package need to be (re)-built?
+
+ Returns: False: if it exists and has newer mtime than all of
+ its dependencies.
+ True: otherwise
+ """
+
+ my_pkgname = self.packagename(arch, branch, buildid)
+ pkg_exists = os.path.exists(my_pkgname)
+
+ if pkg_exists:
+ my_mtime = os.stat(my_pkgname)[ST_MTIME]
+
+ dep_packages = [pkg.packagename(arch, branch, buildid)
+ for pkg in self.alldep]
+ deps_exist = all(os.path.exists(pkg) for pkg in dep_packages)
+ return not (pkg_exists and deps_exist and
+ all(os.stat(pkg)[ST_MTIME] <= my_mtime
+ for pkg in dep_packages))
+
+class Category(object):
+ def __init__(self, name):
+ self.name = name
+ self.ports = {}
+ categories[name] = self
+
+ def add(self, port):
+ self.ports[port] = port
+
+ def remove(self, port):
+ self.ports[port]=None
+ del self.ports[port]
+
+def gettargets(targets):
+ """ split command line arguments into list of packages to build.
+ Returns set or iterable of all ports that will be built including
+ dependencies """
+
+ plist = set()
+ if len(targets) == 0:
+ targets = ["all"]
+ for i in targets:
+ if i == "all":
+ return ports.itervalues()
+
+ if i.endswith("-all"):
+ cat = i.rpartition("-")[0]
+ plist.update(p.name for p in categories[cat].ports)
+ elif i.rstrip(".tbz") in ports:
+ plist.update([ports[i.rstrip(".tbz")].name])
else:
- jobfailure(index, self.job)
+ raise KeyError, i
- # Wake up dispatcher in case it was blocked
- worker.event.set()
- worker.event.clear()
+ # Compute transitive closure of all dependencies
+ pleft=plist.copy()
+ while len(pleft) > 0:
+ pkg = pleft.pop()
+ new = [p.name for p in ports[pkg].alldep]
+ plist.update(new)
+ pleft.update(new)
- worker.lock.release()
+ for p in set(ports.keys()).difference(plist):
+ ports[p].destroy()
- @staticmethod
- def dispatch(mach, job, queue, arch, branch):
- worker.lock.acquire()
- wrk = worker(mach, job, queue, arch, branch)
- worker.tlist.append(wrk)
- worker.lock.release()
- wrk.start()
+ return [ports[p] for p in plist]
-class machqueue(object):
- path = '';
- fd = -1;
+class worker(threading.Thread):
- # fcntl locks are per-process, so the fcntl lock acquisition will
- # succeed if another thread already holds it. We need the fcntl
- # lock for external visibility between processes but also need an
- # internal lock for protecting against out own threads.
- ilock = threading.Lock()
+ # Protects threads
+ lock = threading.Lock()
- def __init__(self, path):
- super(machqueue, self).__init__()
- self.path = path
- self.fd = os.open("%s.lock" % self.path, os.O_RDWR|os.O_CREAT)
-
-# print "Initializing with %s %d" % (self.path, self.fd)
-
- def lock(self):
- print "Locking...",
-# ret = fcntl.lockf(self.fd, fcntl.LOCK_EX)
- self.ilock.acquire()
- print "success"
-
- def unlock(self):
- print "Unlocking fd"
- self.ilock.release()
-# ret = fcntl.lockf(self.fd, fcntl.LOCK_UN)
-
- def poll(self):
- """ Return currently available machines """
-
- mfile = file(self.path + "../mlist", "r")
- mlist = mfile.readlines()
- mfile.close()
- mlist = [i.rstrip() for i in mlist] # Chop \n
-
- list = os.listdir(self.path)
- special = []
- machines = []
- for i in list:
- if i.startswith('.'):
- special.append(i)
- else:
- if i in mlist:
- machines.append(i)
- else:
- os.unlink(self.path + i)
-
- print "Found machines %s" % machines
- return (machines, special)
-
- def pick(self):
- """ Choose a random machine from the queue """
-
- min = 999
- while min == 999:
- while True:
- self.lock()
- (machines, special) = self.poll()
- if len(machines):
- break
- else:
- self.unlock()
- time.sleep(15)
- # XXX Use kqueue to monitor for changes
-
- list = []
- # XXX Choose as fraction of capacity
- for i in machines:
- f = file(self.path + i, "r")
- out = f.readline().rstrip()
- try:
- load = int(out)
- except ValueError:
- print "Bad value for %s: %s" % (i, out)
- load = 999
- f.close()
- if load < min:
- min = load
- list=[]
- if load == min:
- list.append(i)
- print "(%s, %d)" % (list, load)
-
- if min == 999:
- print "Bad queue length for %s" % list
- self.unlock()
-
- machine = choice(list)
- # XXX hook up config files
- if min == 2:
- # Queue full
- os.unlink(self.path + machine)
+ # Running threads, used for collecting status
+ threads = {}
+
+ def __init__(self, mach, job, arch, branch, buildid, queue):
+ threading.Thread.__init__(self)
+ self.machine = mach
+ self.job = job
+ self.arch = arch
+ self.branch = branch
+ self.buildid = buildid
+ self.queue = queue
+
+ self.setDaemon(True)
+
+ def run(self):
+ pkg = self.job
+
+ print "[MASTER] Running job %s" % (pkg.name),
+ if pkg.status == PHASE2:
+ print " (phase 2)"
else:
- f = file(self.path + machine, "w")
- f.write("%d\n" % (min + 1))
- f.flush()
- f.close()
-
- self.unlock()
- return (machine, special)
-
- def release(self, mach):
- self.lock()
- print "Releasing %s" % mach,
- if os.path.exists(self.path + mach):
- f = file(self.path + mach, "r+")
- out = f.readline().rstrip()
+ print
+ try:
+ build = subprocess.Popen(
+ ["/bin/sh", "/var/portbuild/scripts/pdispatch",
+ self.arch, self.branch, self.buildid, self.machine,
+ "/var/portbuild/scripts/portbuild", "%s.tbz" % pkg.name,
+ pkg.path],
+ env={'HOME':"/root",
+ 'PATH':'/sbin:/bin:/usr/sbin:/usr/bin:/usr/games:/usr/local/sbin:/usr/local/bin:/var/portbuild/scripts',
+ 'FD':" ".join(["%s.tbz" % p.name for p in pkg.fdep]),
+ 'ED':" ".join(["%s.tbz" % p.name for p in pkg.edep]),
+ 'PD':" ".join(["%s.tbz" % p.name for p in pkg.pdep]),
+ 'BD':" ".join(["%s.tbz" % p.name for p in pkg.bdep]),
+ 'RD':" ".join(["%s.tbz" % p.name for p in pkg.rdep])},
+ stderr=subprocess.STDOUT, stdout=subprocess.PIPE, bufsize=0)
+ except OSError, e:
+ print >>sys.stderr, "[%s:%s]: Execution failed: %s" % \
+ (pkg.id, pkg.name, e)
+ while True:
try:
- load = int(out)
- except ValueError:
- print "Queue error on release of %s: %s" % (mach, out)
- load = 3 #XXX
+ line = build.stdout.readline()
+ except:
+ print "[%s:%s]: Failed reading from build script" % \
+ (pkg.id, pkg.name)
+ break
+ if line == "":
+ break
+ print "[%s:%s] %s" % (pkg.id, pkg.name, line.rstrip())
+
+ retcode = build.wait()
+
+# time.sleep(random.randint(0,60))
+#
+# r = random.random()
+# if r < 0.1:
+# retcode = 1
+# elif r < 0.15:
+# retcode = 254
+# else:
+# retcode = 0
+
+ conn = QManagerClientConn(stderr = sys.stderr)
+ try:
+ (code, vars) = conn.command("release", {'id':pkg.id})
+ except RequestError, e:
+ print "[MASTER] Error releasing job %s (%s): %s" % (pkg.name, pkg.id, e.value)
+
+ if retcode == 254:
+ # Requeue soft failure at original priority
+ # XXX exponential backoff?
+ time.sleep(60)
+# print "Requeueing %s" % pkg.id
+ self.queue.put((-pkg.depth, pkg))
+ elif retcode == 253:
+ # setting up a machine, we should immediately retry
+ self.queue.put((-pkg.depth, pkg))
+ elif retcode == 0:
+ self.queue.put((SUCCESS_PRIO, pkg))
else:
- f = file(self.path + mach, "w")
- load = 3 #XXX
+ self.queue.put((FAILURE_PRIO, pkg))
-# f.truncate(0)
- f.write("%d\n" % (load - 1))
- print "...now %d" % (load - 1)
- f.flush()
- f.close()
- self.unlock()
+ # Clean up
+ worker.lock.acquire()
+ worker.threads[self]=None
+ del worker.threads[self]
+ worker.lock.release()
+
+ @staticmethod
+ def dispatch(mach, job, arch, branch, buildid, queue):
+ wrk = worker(mach, job, arch, branch, buildid, queue)
+
+ worker.lock.acquire()
+ worker.threads[wrk] = wrk
+ worker.lock.release()
+
+ wrk.start()
-def main(arch, branch, args):
+def main(arch, branch, buildid, args):
global index
- basedir="/var/portbuild/"+arch+"/"+branch
+ basedir="/var/portbuild/"+arch+"/"+branch+"/builds/"+buildid
portsdir=basedir+"/ports"
- indexfile=portsdir+"/INDEX-"+branch
- indexfile="/var/portbuild/i386/7-exp/ports/INDEX-7"
- qlen = 100
+ indexfile=portsdir+"/INDEX-"+branch[0]
- q = machqueue("/var/portbuild/%s/queue/" % arch)
+ print "[MASTER] parseindex..."
+ index = Index(indexfile)
+ index.parse()
+ print "[MASTER] length = %s" % len(ports)
- print "parseindex..."
- index=parseindex(indexfile)
- print "length = %s" % len(index)
+ print "[MASTER] Finding targets..."
+ targets = gettargets(args)
- targets = gettargets(index, args)
+ print "[MASTER] Calculating depth..."
+ depthindex(targets)
- print "heightindex..."
- heightindex(index, targets)
+ print "[MASTER] Pruning duds..."
+ dudsfile=basedir+"/duds"
+ for line in file(dudsfile):
+ try:
+ dud = ports[line.rstrip()]
+ except KeyError:
+ continue
+ print "[MASTER] Skipping %s (duds)" % dud.name
+ dud.destroy_recursive()
- sortd = sorted(((key, val["height"]) for (key, val) in index.iteritems() if val["height"] is not None), key=operator.itemgetter(1), reverse=True)
- wqueue = rank(index, selectheights(index, 1), (i[0] for i in sortd), qlen)
+ queue = PriorityQueue()
+ # XXX can do this while parsing index if we prune targets/duds
+ # first
+ for pkg in ports.itervalues():
+ if len(pkg.alldep) == 0:
+ queue.put((-pkg.depth, pkg))
- # Main work loop
- while len(sortd) > 0:
- worker.lock.acquire()
- print "Remaining %s" % len(sortd)
- while len(wqueue) > 0:
- job = wqueue.pop(0)
+ # XXX check osversion, pool
+ mdl=["arch = %s" % arch]
- if os.path.exists("/var/portbuild/%s/%s/packages/All/%s.tbz" % (arch, branch, job)):
- print "Skipping %s since it already exists" % job
- jobsuccess(index, job)
- else:
- worker.running.append(job) # Protect against a queue
- # rebalance adding this
- # back during build
- worker.lock.release()
- (machine, specials) = q.pick()
- worker.dispatch(machine, job, q, arch, branch)
- worker.lock.acquire()
-
- if len(wqueue) == 0:
- if len(sortd) == 0:
- # All jobs in progress, wait for children to exit
- break
- print "Rebalancing queue...",
- sortd = sorted(((key, val["height"]) for (key, val) in index.iteritems() if val["height"] is not None), key=operator.itemgetter(1), reverse=True)
- if len(sortd) == 0:
- break
+ # Main work loop
+ while len(ports) > 0:
+ print "[MASTER] Ports remaining=%s, Queue length=%s" % (len(ports), queue.qsize())
- print sortd[0:3]
- if sortd[0][0] == 1:
- # Everything left is depth 1, no need to waste time rebalancing further
- qlen = len(index)
+ if len(ports) < 10:
+ print "[MASTER] Remaining ports: %s" % ports.keys()
- # Don't add too many deps at once (e.g. after we build a
- # package like gmake), or we will switch to buildinglots
- # of shallow packages
- ready = [i for i in selectheights(index, 1) if i not in worker.running]
- wqueue = rank(index, ready, (i[0] for i in sortd), qlen)[:2*qlen]
- print "now %s (%s ready)" % (wqueue, len(ready))
+ (prio, job) = queue.get()
+ if prio == SUCCESS_PRIO:
+ print "[MASTER] Job %s succeeded" % job.name
+ for new in job.success():
+ queue.put((-new.depth, new))
+ continue
+ elif prio == FAILURE_PRIO:
+ if job.status == PHASE2:
+ print "[MASTER] Job %s failed" % job.name
+ job.failure()
+ continue
+ else:
+ # Requeue at low priority
+ print "[MASTER] Job %s failed (requeued for phase 2)" % job.name
+ job.status = PHASE2
+ queue.put((PHASE2_BASE_PRIO-job.depth, job))
+ continue
+ elif job.status == PHASE2:
+ depth = -(prio - PHASE2_BASE_PRIO)
+ else:
+ depth = -prio
+
+ print "[MASTER] Working on job %s, depth %d" % (job.name, depth)
+ if job.is_stale(arch, branch, buildid):
+ conn = QManagerClientConn(stderr = sys.stderr)
+ (code, vars) = conn.command("acquire",
+ {"name":job.name,
+ "type":"%s/%s/%s package" % \
+ (arch, branch, buildid),
+ "priority":10, "mdl":mdl})
+
+ if code[0] == "2":
+ machine=vars['machine']
+ job.id=vars['id']
+# print "Got ID %s" % job.id
+
+ worker.dispatch(machine, job, arch, branch, buildid, queue)
+ else:
+ print "[MASTER] Error acquiring job %s: %s" % (pkg.name, code)
+ else:
+ print "[MASTER] Skipping %s since it already exists" % job.name
+ for new in job.success():
+ queue.put((-new.depth, new))
- worker.lock.release()
+ print "[MASTER] Waiting for threads"
+ threads = worker.threads.copy()
- if len(wqueue) == 0:
- # Ran out of work, wait for workers to free up some more
- print "No work to do, sleeping on workers"
- worker.event.wait()
+ for t in threads:
+ print "[MASTER] Outstanding thread: %s" % t.job.name
- for i in worker.tlist:
- i.join()
+ for t in threads:
+ print "[MASTER] Waiting for thread %s" % t.job.name
+ t.join()
- print "Finished"
+ print "[MASTER] Finished"
if __name__ == "__main__":
# from guppy import hpy; h = hpy()
- main(sys.argv[1], sys.argv[2], sys.argv[3:])
-
-# index = parseindex("/var/portbuild/i386/7-exp/ports/INDEX-7")
-# print index['gmake-3.81_2']
+ main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:])
diff --git a/Tools/portbuild/scripts/pdispatch b/Tools/portbuild/scripts/pdispatch
index 82a3af9ada01..893b6ade6fa2 100755
--- a/Tools/portbuild/scripts/pdispatch
+++ b/Tools/portbuild/scripts/pdispatch
@@ -1,16 +1,16 @@
#!/bin/sh
#
-# pdispatch <arch> <branch> <buildid> <command> <package.tbz> [<args> ...]
+# pdispatch <arch> <branch> <buildid> <host> <command> <package.tbz> [<args> ...]
#
-# Choose a random machine from ${buildroot}/ulist and dispatch the
-# job to it via the ptimeout script.
+# Dispatch the job to a host via the ptimeout script.
pb=/var/portbuild
arch=$1
branch=$2
buildid=$3
-command=$4
-shift 4
+host=$4
+command=$5
+shift 5
pbab=${pb}/${arch}/${branch}
@@ -23,13 +23,19 @@ timeout=360000
loglength=1000
hdrlength=6
+buildid=$(resolve ${pb} ${arch} ${branch} ${buildid})
+if [ -z "${buildid}" ]; then
+ echo "Invalid build ID ${buildid}"
+ exit 1
+fi
+
builddir=${pbab}/builds/${buildid}
buildenv ${pb} ${arch} ${branch} ${builddir}
-# ssh -x doesn't work on some machines
+# XXX needed still?
unset DISPLAY
-# Use HPN-SSH for performance
+# Allow override by HPN-SSH for performance
if [ -z "${ssh_cmd}" ]; then
ssh_cmd=ssh
fi
@@ -39,11 +45,6 @@ fi
pkgname=$(basename $1 ${PKGSUFFIX})
-if grep -qxF ${pkgname} ${builddir}/duds; then
- echo "skipping ${pkgname}"
- exit 1
-fi
-
if [ -z "${pkgname}" ]; then
echo "null packagename"
exit 1
@@ -62,7 +63,7 @@ fi
if [ "x$NOPLISTCHECK" != "x" ]; then
flags="${flags} -noplistcheck"
fi
-if [ "x$WANT_DISTFILES" != "x" ]; then
+if [ "x$NO_DISTFILES" = "x" ]; then
flags="${flags} -distfiles"
fi
if [ "x$FETCH_ORIGINAL" != "x" ]; then
@@ -72,140 +73,123 @@ if [ "x$TRYBROKEN" != "x" ]; then
flags="${flags} -trybroken"
fi
-while `true`; do
- host=
- chroot=
- while [ -z "${host}" -o -z "${chroot}" ]; do
- chroot=
- host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch})
- # If ulist is empty, then all build machines are busy, so try
- # again in 15 seconds.
- if [ -z "${host}" ]; then
- sleep 15
- else
- . ${pb}/${arch}/portbuild.conf
- test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
- chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1)
- if [ -z "${chrootdata}" ]; then
- echo "Failed to claim chroot on ${host}"
- fi
-
- case "${chrootdata}" in
- */var/portbuild/scripts/claim-chroot*)
- # Error executing script, assume system is booting
- chrootdata="wait boot"
- ;;
- esac
-
- echo "Got ${chrootdata} from ${host}"
-
- set -- ${chrootdata}
- if [ $# -ge 2 ]; then
- case $1 in
- chroot)
- chroot=$2
- ;;
- setup)
- echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}"
-
- # Run in the background so we can potentially
- # claim a slot on another machine. In
- # practise I think we often end up trying
- # again on the same machine though.
-
- # Make sure to close stdin/stderr in the child
- # or make will hang until the child process
- # exits
- # XXX Revert to >&- once this is confirmed as working
- ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} >/tmp/setupnode.$$ 2>&1 &
- ;;
- error)
- echo "Error reported by ${host}: $2"
- sleep 60
- ;;
- wait)
- echo "Waiting for setup to finish"
- sleep 60
- ;;
- esac
- shift 2
- fi
-
- if [ -z "${chroot}" ]; then
- lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
- fi
- fi
- done
-
- . ${pb}/${arch}/portbuild.conf
- test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
-
- rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2
- rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2
-
- echo "dispatching: ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}"
- ${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1
- error=$?
+chroot=
+. ${pb}/${arch}/portbuild.conf
+test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
+chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1)
+if [ -z "${chrootdata}" ]; then
+ echo "Failed to claim chroot on ${host}"
+ exit 254
+fi
+
+case "${chrootdata}" in
+ */var/portbuild/scripts/claim-chroot*)
+ # Error executing script, assume system is booting
+ chrootdata="wait boot"
+ ;;
+esac
+
+# echo "Got ${chrootdata} from ${host}"
+
+set -- ${chrootdata}
+if [ $# -ge 2 ]; then
+ case $1 in
+ chroot)
+ chroot=$2
+ ;;
+ setup)
+ echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}"
+
+ # Run in the background so we can potentially
+ # claim a slot on another machine. In
+ # practise I think we often end up trying
+ # again on the same machine though.
+
+ # Make sure to close stdin/stderr in the child
+ # or make will hang until the child process
+ # exits
+ ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} > /tmp/setupnode.$$ 2>&1 &
+ exit 253
+ ;;
+ error)
+ echo "Error reported by ${host}: $2"
+ ;;
+ wait)
+ echo "Waiting for setup of ${host} to finish"
+ ;;
+ esac
+ shift 2
+fi
+
+if [ -z "${chroot}" ]; then
+ exit 254
+fi
+
+. ${pb}/${arch}/portbuild.conf
+test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host}
- #if grep -q " failed unexpectedly on " ${builddir}/logs/${pkgname}.pre.log; then
- # cat ${builddir}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto}
- #else
- # rm ${builddir}/logs/${pkgname}.pre.log
- #fi
+rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2
+rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2
- # Pull in the results of the build from the client
+${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1
+error=$?
+
+# Pull in the results of the build from the client
- ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log
- (${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz
+${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log
+(${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz
- # XXX Set dirty flag if any of the scp's fail
+# XXX Set dirty flag if any of the scp's fail
- mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname}
- ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \
- tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf -
+mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname}
+${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \
+ tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf - && \
touch ${builddir}/distfiles/.pbtmp/${pkgname}/.done
- if [ "${error}" = 0 ]; then
- ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \
- tar --unlink -C ${builddir} -xvf -
- test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \
- touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX}
- rm -f ${builddir}/errors/${pkgname}.log && \
- touch ${builddir}/errors/.force
- lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname}
- log=${builddir}/logs/$pkgname.log
- if grep -q "even though it is marked BROKEN" ${log}; then
- echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto}
- fi
- if grep -q "^list of .*file" ${log}; then
- buildlogdir=$(realpath ${builddir}/logs/)
- baselogdir=$(basename ${buildlogdir})
- (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto}
- fi
- else
- log=${builddir}/errors/${pkgname}.log
- ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto}
- if ! grep -q "even though it is marked BROKEN" ${log}; then
- buildlogdir=$(realpath ${builddir}/logs/)
- baselogdir=$(basename ${buildlogdir})
- if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then
- (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
- else
- (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
- fi
+if [ "${error}" = 0 ]; then
+ ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \
+ tar --unlink -C ${builddir} -xvf -
+
+ # XXX why is this needed?
+ test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \
+ touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX}
+
+ if [ -f ${builddir}/errors/${pkgname}.log ]; then
+ rm -f ${builddir}/errors/${pkgname}.log
+ # Force rebuild of html page to remove this package from list
+ touch ${builddir}/errors/.force
+ fi
+ lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname}
+ log=${builddir}/logs/$pkgname.log
+ if grep -q "even though it is marked BROKEN" ${log}; then
+ echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto}
+ fi
+ if grep -q "^list of .*file" ${log}; then
+ buildlogdir=$(realpath ${builddir}/logs/)
+ baselogdir=$(basename ${buildlogdir})
+ (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto}
+ fi
+else
+ log=${builddir}/errors/${pkgname}.log
+ ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto}
+ if ! grep -q "even though it is marked BROKEN" ${log}; then
+ buildlogdir=$(realpath ${builddir}/logs/)
+ baselogdir=$(basename ${buildlogdir})
+ if [ $(wc -l ${log} | awk '{print $1}') -le $((loglength + hdrlength)) ]; then
+ (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
+ else
+ (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto}
fi
- lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname}
fi
-
- ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean}
-
- lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host}
-
+ lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname}
+fi
+
+${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean}
+
# XXX Set a dirty variable earlier and check here
- if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then
- exit ${error}
- else
- echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding"
- sleep 120
- fi
-done
+if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then
+ exit ${error}
+else
+ echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly"
+ exit 254
+fi
diff --git a/Tools/portbuild/scripts/pollmachine b/Tools/portbuild/scripts/pollmachine
index c3438041c881..ddc7100a94d1 100755
--- a/Tools/portbuild/scripts/pollmachine
+++ b/Tools/portbuild/scripts/pollmachine
@@ -13,24 +13,20 @@
#
# options are:
# -daemon : poll repeatedly
-# -queue : update queue entries (XXX racy)
#
# TODO:
# XXX qmgr notification of new/removed machines
-# XXX log state changes in daemon mode
-# XXX clean up inactive builds
-# XXX test thread shutdown
-# XXX needed an explicit way to request setup?
-# XXX signal handler
+# XXX counter before declaring a machine as dead
+# Declares a machine as online if it reports 0 data from infoseek?
# * Deal with machines change OS/kernel version
# - ACL list might change!
# - take machine offline, update ACL/arch/etc, reboot, bring online
import sys, threading, socket
-from popen2 import *
from time import sleep
+import os, subprocess, logging
if len(sys.argv) < 1:
print "Usage: %s <arch> [<arch> ...]" % sys.argv[0]
@@ -39,14 +35,9 @@ if len(sys.argv) < 1:
arches=set()
mlist={}
polldelay=0
-queue=0
for i in sys.argv[1:]:
if i == "-daemon":
- polldelay = 30
- continue
-
- if i == "-queue":
- queue = 1
+ polldelay = 180
continue
if "/" in i:
@@ -82,9 +73,6 @@ class MachinePoll(threading.Thread):
host = None
port = 414
- # Should we update queue entry?
- queue = None
-
timeout = None # How often to poll
shutdown = False # Exit at next poll wakeup
@@ -94,17 +82,21 @@ class MachinePoll(threading.Thread):
# Dictionary of variables reported by the client
vars = None
- def __init__(self, mach, arch, timeout, host, port, queue):
+ def __init__(self, mach, arch, timeout, host, port):
super(MachinePoll, self).__init__()
self.mach = mach
self.arch = arch
self.timeout = timeout
self.host = host
self.port = port
- self.queue = queue
+
+ # How many times the connection timed out since last success
+ self.timeouts = 0
self.vars = {}
+ self.setDaemon(True)
+
def run(self):
while True:
if self.shutdown:
@@ -124,63 +116,104 @@ class MachinePoll(threading.Thread):
lines = []
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ s.settimeout(60)
s.connect((self.host, self.port))
- f = s.makefile()
-
- lines = f.readlines()
+
+ data = ""
+ while len(data) < 65536:
+ chunk = s.recv(8192)
+ if not chunk:
+ break
+ data += chunk
+
nowonline = True
+ self.timeouts = 0
+ lines = data.split("\n")
+ except socket.timeout:
+ if self.online:
+ logging.info("[%s] Connection timeout" % self.mach)
+ self.timeouts += 1
+ if self.timeouts < 3:
+ nowonline = self.online
except:
pass
- finally:
+ finally:
try:
s.close()
except:
pass
if nowonline != self.online:
- print "State change: %s %s -> %s" % (self.mach, self.online, nowonline)
+ logging.info("[%s] Now %s" % (self.mach, "online" if nowonline else "OFFLINE"))
self.online = nowonline
+ if self.online:
+ self.timeouts = 0
# XXX inform qmgr of state change
- if self.online and not lines:
+ if self.online and not lines and not self.timeouts:
# reportload script is missing
dosetup=1
else:
dosetup=0
for line in lines:
+ if line == "":
+ continue
line=line.rstrip()
part=line.partition('=')
if part[1] != '=' or not part[0]:
# if "No such file or directory" in line:
# # Client may require setting up post-boot
# dosetup=1
- print "Bad input from %s: %s" % (self.mach, line)
+ logging.info("[%s] Bad input: %s" % (self.mach, line))
# Assume client needs setting up
dosetup=1
-
try:
old = self.vars[part[0]]
except KeyError:
old = ""
if old != part[2]:
self.vars[part[0]] = part[2]
-# print "%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2])
+# logging.info("%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2]))
# XXX update qmgr
+ try:
+ envs = self.vars['buildenvs']
+ for e in envs.split():
+ (arch, branch, buildid) = e.split("/")
+ f = "/var/portbuild/%s/%s/builds/%s/.active" % \
+ (arch, branch, buildid)
+ if os.path.exists(f):
+ continue
+ # Clean up a stale buildenv
+ logging.info("[%s] Cleaning up stale build: %s" % (self.mach, e))
+ (err, out) = self.setup(branch, buildid, "-nocopy -full")
+ if err:
+ logging.info("[%s] Error from cleanup" % (self.mach))
+ for l in out.split("\n"):
+ if l == "":
+ continue
+ logging.info("[%s] %s" % (self.mach, l))
+
+ except KeyError:
+ pass
+
if dosetup:
- print "Setting up %s" % (self.mach)
- (err, out) = self.setup()
+ logging.info("[%s] Setting up machine" % (self.mach))
+ (err, out) = self.setup("-", "-")
if err:
- print "Error from setup of %s:" % (self.mach)
- print out
- print "Setup of %s complete" % (self.mach)
- return
+ logging.info("[%s] Error from setup" % (self.mach))
+ for l in out.split("\n"):
+ if l == "":
+ continue
+ logging.info("[%s] %s" % (self.mach, l))
+ logging.info("[%s] Setup complete" % (self.mach))
# Validate that arch has not changed (e.g. i386 -> amd64)
try:
if self.arch != self.vars['arch']:
- print "Machine %s reporting unexpected arch: %s -> %s" % (self.mach, self.arch, self.vars['arch'])
+ logging.info("[%s] Unexpected arch: %s -> %s" % \
+ (self.mach, self.arch, self.vars['arch']))
except KeyError:
pass
@@ -195,24 +228,27 @@ class MachinePoll(threading.Thread):
pass
f.close()
- if self.queue:
- try:
- f = file("%s/%s/queue/%s" % (pb, self.arch, self.mach), "w")
- except:
- return
-
- try:
- f.write("%s\n" % self.vars['jobs'])
- except:
- pass
- f.close()
-
- def setup(self):
- child = Popen4("su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s - - %s\"" % (self.arch, self.arch, self.mach), 0)
+ def setup(self, branch, buildid, args = ""):
+ cmd = "su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s %s %s %s %s\""\
+ % (self.arch, self.arch, branch, buildid, self.mach, args)
+ child = subprocess.Popen(cmd, shell=True, stderr = subprocess.STDOUT,
+ stdout = subprocess.PIPE)
err = child.wait()
- out = "".join(child.fromchild.readlines())
+ out = "".join(child.stdout.readlines())
return (err, out)
+logging.basicConfig(level=logging.INFO,
+ format='[%(asctime)s] %(message)s',
+ datefmt='%d %b %Y %H:%M:%S',
+ filename='/var/log/pollmachine.log', filemode='w')
+
+log_console = logging.StreamHandler()
+log_console.setLevel(logging.INFO)
+formatter = logging.Formatter('[%(asctime)s] %(message)s',
+ datefmt = '%d %b %Y %H:%M:%S')
+log_console.setFormatter(formatter)
+logging.getLogger('').addHandler(log_console)
+
while True:
for arch in arches:
try:
@@ -233,28 +269,30 @@ while True:
machines[arch]=now
for mach in gone:
- print "Removing machine %s" % mach
+ logging.info("Removing machine %s/%s" % (arch, mach))
# XXX disable from qmgr
pollthreads[mach].shutdown=True
del pollthreads[mach]
for mach in new:
- print "Adding machine %s" % mach
+ logging.info("Adding machine %s/%s" % (arch, mach))
# XXX set up qmgr
pc="%s/%s/portbuild.conf" % (pb, arch)
pch="%s/%s/portbuild.%s" % (pb, arch, mach)
- config = Popen4("test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch))
- host=config.fromchild.readline().rstrip()
+ cmd = "test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch)
+ config = subprocess.Popen(cmd, shell = True,
+ stdout = subprocess.PIPE)
+ host=config.stdout.readline().rstrip()
if not host:
host = mach
- port=config.fromchild.readline().rstrip()
+ port=config.stdout.readline().rstrip()
try:
port = int(port)
except (TypeError, ValueError):
port = 414
- pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port, queue)
+ pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port)
pollthreads[mach].start()
if not polldelay:
diff --git a/Tools/portbuild/scripts/ptimeout.c b/Tools/portbuild/scripts/ptimeout.c
index 33e9ecc4f3a9..915024ce15df 100644
--- a/Tools/portbuild/scripts/ptimeout.c
+++ b/Tools/portbuild/scripts/ptimeout.c
@@ -47,7 +47,7 @@ main(int argc, char *argv[])
/*printf("exited child is %d, status is %d\n", child, status);*/
if (pid1 = child) {
/*printf("killing process %d\n", pid2);*/
- kill(pid2, SIGTERM);
+ kill(pid2, SIGKILL);
} else {
/*printf("killing process %d\n", pid1);*/
kill(pid1, SIGTERM);
diff --git a/Tools/portbuild/scripts/reportload b/Tools/portbuild/scripts/reportload
index 130348eceb5c..56ec0df5196d 100755
--- a/Tools/portbuild/scripts/reportload
+++ b/Tools/portbuild/scripts/reportload
@@ -16,7 +16,7 @@ fi
error=
for i in squid disk; do
if [ -f ${scratchdir}/.${i} ]; then
- error=${i} "${error}"
+ error="${i} ${error}"
fi
done
@@ -36,4 +36,4 @@ done
echo "buildenvs=${buildenvs}"
echo -n "load="
uptime
-echo "error=${error}" \ No newline at end of file
+echo "error=${error}"
diff --git a/Tools/portbuild/scripts/straslivy.py b/Tools/portbuild/scripts/straslivy.py
index b67b75dd2a8d..8a899399a4f2 100755
--- a/Tools/portbuild/scripts/straslivy.py
+++ b/Tools/portbuild/scripts/straslivy.py
@@ -26,7 +26,7 @@ def getpdispatch():
pid = fields[0]
arch = fields[3]
branch = fields[4]
- port = fields[7].replace('/usr/ports/','')
+ port = fields[9].replace('/usr/ports/','')
line = ' '.join(fields)
if len(arch) > archwidth:
diff --git a/Tools/portbuild/scripts/zbackup b/Tools/portbuild/scripts/zbackup
index c51b9342b5fe..18f806fddbb6 100755
--- a/Tools/portbuild/scripts/zbackup
+++ b/Tools/portbuild/scripts/zbackup
@@ -16,7 +16,7 @@ backuplist=["a", "a/nfs", "a/src", "a/local", "a/ports", "a/portbuild",
backupdir="/dumpster/pointyhat/backup"
# How many days between full backups
-fullinterval=14
+fullinterval=3
def validate():
fslist = zfs.getallfs()
diff --git a/Tools/portbuild/scripts/zclient b/Tools/portbuild/scripts/zclient
new file mode 100755
index 000000000000..636aa265598c
--- /dev/null
+++ b/Tools/portbuild/scripts/zclient
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+# ZFS snapshot client
+
+import socket, os, sys
+
+ZSERVER = ('gohan10.freebsd.org', 8888)
+ZFSLOCAL = '/tmp/.zserver'
+
+def connect():
+ """ Connects to service, returns (socket, islocal) """
+
+ if os.path.exists(ZFSLOCAL):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ try:
+ s.connect(ZFSLOCAL)
+ return (s, True)
+ except:
+ s.close()
+
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s.connect(ZSERVER)
+ return (s, False)
+ except:
+ s.close()
+
+ return (None, None)
+
+def send(sock, cmd):
+ """ Send a command, retrieve single line of reply """
+
+ sock.write(cmd)
+ sock.flush()
+
+ res= sock.readline()
+ #print "res = %s" % res
+ return res
+
+def error(msg):
+ print >>sys.stderr, "%s: %s" % (sys.argv[0], msg.rstrip())
+ sys.exit(1)
+
+def do_list(sockfile, islocal, args):
+ res = send(sockfile, "LIST\n")
+ if res[0] == "2":
+ for i in sockfile:
+ print i.rstrip()
+ else:
+ error(res[4:])
+
+def do_get(sockfile, islocal, args):
+ res = send(sockfile, "GET %s %s\n" % (args[0], args[1]))
+ if res[0] == "2":
+ while True:
+ block = sockfile.read(32*1024)
+ if not block:
+ break
+ sys.stdout.write(block)
+ else:
+ error(res[4:])
+
+def do_diff(sockfile, islocal, args):
+ res = send(sockfile, "DIFF %s %s %s\n" % (args[0], args[1], args[2]))
+ if res[0] == "2":
+ while True:
+ block = sockfile.read(32*1024)
+ if not block:
+ break
+ sys.stdout.write(block)
+ else:
+ error(res[4:])
+
+def do_reg(sockfile, islocal, args):
+ if not sock[1]:
+ error("must register on local machine")
+ res = send(sockfile, "REGISTER %s\n" % args[0])
+ if res[0] == "2":
+ print res[4:]
+ else:
+ error(res[4:])
+
+def do_unreg(sockfile, islocal, args):
+ if not sock[1]:
+ error("must register on local machine")
+ res = send(sockfile, "UNREGISTER %s\n" % args[0])
+
+ if res[0] == "2":
+ print res[4:]
+ else:
+ error(res[4:])
+
+def do_help(sockfile, islocal, args):
+ for (i, val) in sorted(cmddict.iteritems()):
+ print "%15s - %s" % (i, val[1])
+
+cmddict = {'list':(do_list, 'List available filesystem/snapshot pairs'),
+ 'get':(do_get, 'Get a snapshot'),
+ 'diff':(do_diff, 'Get the diffs between two snapshots'),
+ 'register':(do_reg, 'Register a new filesystem (privileged)'),
+ 'reg':(do_reg, 'Alias for register'),
+ 'unregister':(do_unreg, 'Register a new filesystem (privileged)'),
+ 'unreg':(do_unreg, 'Alias for register'),
+ 'help':(do_help, 'Display this help')}
+
+if __name__ == "__main__":
+
+ try:
+ sock = connect()
+ except:
+ raise
+ sys.exit(1)
+
+ args = sys.argv
+
+ try:
+ cmd = args[1]
+ arg = args[2:]
+# print "cmd = %s, arg = %s" % (cmd, arg)
+ cmddict[cmd][0](sock[0].makefile(), sock[1], arg)
+ except (KeyError, IndexError):
+ raise
+ error("No such command\n")
+
diff --git a/Tools/portbuild/scripts/zsync b/Tools/portbuild/scripts/zsync
new file mode 100755
index 000000000000..62f8e2b27e31
--- /dev/null
+++ b/Tools/portbuild/scripts/zsync
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+base=/a/cache
+zbase=a/cache
+
+fs=$1
+
+rsnap=$(zclient list | grep "^$fs " | tail -1 | awk '{print $2}')
+if [ -z "$rsnap" ]; then
+ echo "No such filesystem $fs"
+ exit 1
+fi
+
+lsnap=$(zfs list -Ht snapshot | grep "^$zbase/$fs@" | tail -1 | sed -e "s,^$zbase/$fs@,," | awk '{print $1}')
+if [ -z "$lsnap" ]; then
+ echo "No local snapshot found"
+ dofull=1
+else
+ if [ "$lsnap" = "$rsnap" ]; then
+ exit 0
+ fi
+ # Check for remotve snapshot
+ if ! (zclient list | grep "^$fs $lsnap " > /dev/null); then
+ echo "Local snapshot not found, removing and resyncing"
+ zfs destroy $zbase/$fs@$lsnap
+ dofull=0
+ else
+ dofull=1
+ fi
+fi
+
+if [ "$dofull" = "1" ]; then
+ zfs destroy -r ${zbase}/${fs}
+ zclient get ${fs} ${rsnap} | zcat | zfs receive ${zbase}/${fs}
+else
+ zclient diff ${fs} ${lsnap} ${rsnap} | zcat | zfs receive -F ${zbase}/${fs}
+fi
+