diff options
author | Pav Lucistnik <pav@FreeBSD.org> | 2009-05-21 16:17:15 +0000 |
---|---|---|
committer | Pav Lucistnik <pav@FreeBSD.org> | 2009-05-21 16:17:15 +0000 |
commit | ce4e44feb45d4624500b527986c5a1cda9a26200 (patch) | |
tree | 8a11906f3f3047037cedcb008823b870b0b28604 | |
parent | 46e2ac6a6ff9bc4358abd6fb41d209224a61d990 (diff) |
- Update to reflect a reality on pointyhat
Notes
Notes:
svn path=/head/; revision=234345
-rwxr-xr-x | Tools/portbuild/scripts/build | 59 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/buildproxy-client | 14 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/buildscript | 4 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/dopackages | 75 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/dosetupnode | 17 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/makeduds | 11 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/makerestr | 3 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/makeworld | 2 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/packagebuild | 936 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/pdispatch | 264 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/pollmachine | 148 | ||||
-rw-r--r-- | Tools/portbuild/scripts/ptimeout.c | 2 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/reportload | 4 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/straslivy.py | 2 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/zbackup | 2 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/zclient | 124 | ||||
-rwxr-xr-x | Tools/portbuild/scripts/zsync | 38 |
17 files changed, 989 insertions, 716 deletions
diff --git a/Tools/portbuild/scripts/build b/Tools/portbuild/scripts/build index bfa94e004d78..ed2c33971eff 100755 --- a/Tools/portbuild/scripts/build +++ b/Tools/portbuild/scripts/build @@ -94,7 +94,7 @@ do_clone() { if [ -d ${newbuilddir} ]; then if [ ! -f ${pbab}/builds/previous/.keep ]; then - build destroy ${arch} ${branch} previous + /var/portbuild/scripts/build destroy ${arch} ${branch} previous fi rm -f ${pbab}/builds/previous mv ${pbab}/builds/latest ${pbab}/builds/previous @@ -111,13 +111,22 @@ do_portsupdate() { buildid=$3 builddir=$4 shift 4 + if [ $# -gt 0 ]; then + arg=$1 + shift + fi portsfs=a/portbuild/${arch}/${buildid}/ports + destroy_fs a/portbuild/${arch} ${buildid} /ports || exit 1 + + if [ "${arg}" = "-umount" ]; then + return + fi + echo "================================================" echo "Reimaging ZFS ports tree on ${builddir}/ports" echo "================================================" - destroy_fs a/portbuild/${arch} ${buildid} /ports || exit 1 now=$(now) zfs snapshot a/snap/ports@${now} @@ -132,15 +141,24 @@ do_srcupdate() { buildid=$3 builddir=$4 shift 4 + if [ $# -gt 0 ]; then + arg=$1 + shift + fi srcfs=a/portbuild/${arch}/${buildid}/src + destroy_fs a/portbuild/${arch} ${buildid} /src || exit 1 + + if [ "${arg}" = "-umount" ]; then + return + fi + echo "================================================" echo "Reimaging ZFS src tree on ${builddir}/src" echo "================================================" - destroy_fs a/portbuild/${arch} ${buildid} /src || exit 1 - + case ${branch} in 8|8-exp) srcbranch=HEAD @@ -173,11 +191,8 @@ cleanup_client() { test -f ${pb}/${arch}/portbuild.${mach} && . ${pb}/${arch}/portbuild.${mach} # Kill off builds and clean up chroot - ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${mach} -nocopy -queue - - if [ "${arg}" = "-full" ]; then - ${ssh_cmd} ${client_user}@${mach} ${sudo_cmd} rm -rf ${pb}/${arch}/${branch}/builds/${buildid}/.ready ${pb}/${arch}/${branch}/builds/${buildid} /tmp/.setup-${buildid} - fi + ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${mach} -nocopy -queue -full + echo "Finished cleaning up ${arch}/${branch} build ID ${buildid} on ${mach}" } @@ -211,7 +226,7 @@ do_upload() { test_fs() { local fs=$1 - zfs list -Ht filesystem | awk '{print $1}' | grep -q "$fs" + zfs list -Ht filesystem ${fs} > /dev/null 2>&1 } @@ -286,22 +301,22 @@ destroy_fs() { # We might have snapshots on the target filesystem, e.g. if it # is both the head and tail of its clone tree. They should be # unreferenced. - (zfs list -H -o name | grep "^${fullfs}@" | xargs -n 1 zfs destroy) || return 1 + # We have to grep because zfs list -H returns an error instead of + # a null list if no snapshots exist + if ! (zfs list -r -H -o name -t snapshot ${fullfs} | grep "^${fullfs}@" | xargs -n 1 zfs destroy); then + return 1 + fi # The target filesystem should now be unreferenced - zfs destroy -f "${fullfs}" || return 1 - - # Clean up the initial snapshot(s) that were promoted onto a - # cloned filesystem. It could have been propagated several - # times so we don't know where it ended up. Therefore we - # can't match for the ${buildid} part of ${fullfs}. - # - # XXX might be doing a substring match of subfs but we can't - # prepend / because a null subfs will not match + if ! zfs destroy -f "${fullfs}"; then + return 1 + fi # Destroy the origin snapshot, which should be unreferenced if [ ! -z "${parent}" ]; then - zfs destroy -f ${parent} || return 1 + if ! zfs destroy -f ${parent}; then + return 1 + fi fi fi } @@ -374,7 +389,7 @@ proxy_user() { eval "do_${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}" error=$? else - su ports-${arch} -c "build ${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}" + su ports-${arch} -c "/var/portbuild/scripts/build ${cmd} ${arch} ${branch} ${buildid} \"${builddir}\" ${args}" error=$? fi diff --git a/Tools/portbuild/scripts/buildproxy-client b/Tools/portbuild/scripts/buildproxy-client index e5ebcfed05cf..f8de6fe878e0 100755 --- a/Tools/portbuild/scripts/buildproxy-client +++ b/Tools/portbuild/scripts/buildproxy-client @@ -25,8 +25,18 @@ try: s.close() sys.exit(int(code)) -except: - raise # XXX debug +except Exception, e: + print "buildproxy-client: exception:" + print e + try: + if code == None: + print "buildproxy-client: error: code was None" + else: + print "buildproxy-client: error: code was '" + code + "'" + except Exception, e2: + print "buildproxy-client: exception 2:" + print e2 + raise e # XXX debug sys.exit(254) diff --git a/Tools/portbuild/scripts/buildscript b/Tools/portbuild/scripts/buildscript index ec05c6594347..fc32d7d42b02 100755 --- a/Tools/portbuild/scripts/buildscript +++ b/Tools/portbuild/scripts/buildscript @@ -126,11 +126,15 @@ restr=$(make -V RESTRICTED) # Keep restricted distfiles in a subdirectory for extra protection # against leakage if [ ! -z "$restr" ]; then + echo "DISTDIR=${DISTDIR}" export DISTDIR=${DISTDIR}/RESTRICTED + echo "DISTDIR=${DISTDIR}" + mkdir -p ${DISTDIR} fi if [ $phase = 1 ]; then + # note: if you change this header, also change processonelog and processlogs2 cd $dir || exit 1 echo "building for: $(uname -mr)" echo "maintained by: $(make maintainer)" diff --git a/Tools/portbuild/scripts/dopackages b/Tools/portbuild/scripts/dopackages index 29e7f3398dfe..e2fc7676f37a 100755 --- a/Tools/portbuild/scripts/dopackages +++ b/Tools/portbuild/scripts/dopackages @@ -71,12 +71,14 @@ mailexit () { } srctar() { - tar cfCj ${builddir}/src-${buildid}.tbz ${builddir} src/ + rm -f ${builddir}/src-2*.tbz* + tar cfCj ${builddir}/src-${buildid}.tbz ${builddir} src/ 2>/dev/null md5 ${builddir}/src-${buildid}.tbz > ${builddir}/src-${buildid}.tbz.md5 } portstar() { - tar cfCj ${builddir}/ports-${buildid}.tbz ${builddir} ports/ + rm -f ${builddir}/ports-2*.tbz* + tar cfCj ${builddir}/ports-${buildid}.tbz ${builddir} ports/ 2>/dev/null md5 ${builddir}/ports-${buildid}.tbz > ${builddir}/ports-${buildid}.tbz.md5 } @@ -204,24 +206,16 @@ dobuild() { arch=$2 branch=$3 builddir=$4 - phase=$5 - - count=0 - for i in `cat ${pb}/${arch}/mlist`; do - . ${pb}/${arch}/portbuild.conf - test -f ${pb}/${arch}/portbuild.${i} && . ${pb}/${arch}/portbuild.${i} - count=$((${count}+${maxjobs})) - done - + echo "================================================" - echo "building packages (phase ${phase})" + echo "building packages" echo "================================================" echo "started at $(date)" - phasestart=$(date +%s) - make -k -j${count} quickports all > ${builddir}/make.${phase} 2>&1 </dev/null + start=$(date +%s) + /var/portbuild/evil/qmanager/packagebuild ${arch} ${branch} ${buildid} > ${builddir}/make 2>&1 < /dev/null echo "ended at $(date)" - phaseend=$(date +%s) - echo "phase ${phase} took $(date -u -j -r $(($phaseend - $phasestart)) | awk '{print $4}')" + end=$(date +%s) + echo "Build took $(date -u -j -r $((end - start)) | awk '{print $4}')" echo $(echo $(ls -1 ${builddir}/packages/All | wc -l) - 2 | bc) "packages built" echo $(wc -l ${PORTSDIR}/${INDEXFILE} | awk '{print $1}') "lines in INDEX" @@ -230,9 +224,9 @@ dobuild() { echo $(echo $(du -sk ${builddir}/distfiles | awk '{print $1}') / 1024 | bc) "MB of distfiles" cd ${builddir} - if grep -qE '(ptimeout|pnohang): killing' make.${phase}; then + if grep -qE '(ptimeout|pnohang): killing' make; then echo "The following port(s) timed out:" - grep -E '(ptimeout|pnohang): killing' make.${phase} | sed -e 's/^.*ptimeout:/ptimeout:/' -e 's/^.*pnohang:/pnohang:/' + grep -E '(ptimeout|pnohang): killing' make | sed -e 's/^.*ptimeout:/ptimeout:/' -e 's/^.*pnohang:/pnohang:/' fi } @@ -259,7 +253,7 @@ restart=0 cont=0 finish=0 nofinish=0 -dodistfiles=1 +nodistfiles=0 fetch_orig=0 trybroken=0 incremental=0 @@ -300,7 +294,7 @@ while [ $# -gt 0 ]; do noplistcheck=1 ;; x-nodistfiles) - dodistfiles=0 + nodistfiles=1 ;; x-fetch-original) fetch_orig=1 @@ -349,9 +343,8 @@ fi echo | mail -s "$(basename $0) started for ${arch}-${branch} ${buildid} at $(date)" ${mailto} -if [ "$dodistfiles" = 1 ]; then - # XXX flip default to always collect - export WANT_DISTFILES=1 +if [ "$nodistfiles" = 1 ]; then + export NO_DISTFILES=1 fi if [ "$noplistcheck" = 1 ]; then @@ -384,6 +377,11 @@ df -k | grep ${buildid} # Set up our environment variables buildenv ${pb} ${arch} ${branch} ${builddir} +# bomb out if build clone failed +if [ ! -d ${builddir} ]; then + mailexit 1 +fi + if [ "${keep}" -eq 1 ]; then touch ${builddir}/.keep fi @@ -498,8 +496,6 @@ if [ "$skipstart" = 0 ]; then cdromlist ${pb} ${arch} ${branch} ${builddir} & fi - ${pb}/scripts/makeparallel ${arch} ${branch} ${buildid} & - cd ${builddir} mv distfiles/ .distfiles~ rm -rf .distfiles~ & @@ -518,7 +514,7 @@ if [ "$skipstart" = 0 ]; then # Create new log directories for archival rm -rf ${newerrors} - mkdir -p ${newerrors}/old-errors + mkdir -p ${newerrors} ln -sf ${newerrors} ${builddir}/errors rm -rf ${newlogs} mkdir -p ${newlogs} @@ -533,6 +529,10 @@ if [ "$skipstart" = 0 ]; then fi cp -p ${builddir}/duds ${newerrors}/duds cp -p ${builddir}/duds ${newlogs}/duds + if [ -f "${builddir}/duds.verbose" ]; then + cp -p ${builddir}/duds.verbose ${newerrors}/duds.verbose + cp -p ${builddir}/duds.verbose ${newlogs}/duds.verbose + fi cp -p ${builddir}/ports/${INDEXFILE} ${newerrors}/INDEX cp -p ${builddir}/ports/${INDEXFILE} ${newlogs}/INDEX @@ -594,24 +594,7 @@ if [ "$nobuild" = 0 ]; then cp duds.orig duds fi - cd ${builddir}/packages/All - ln -sf ../../Makefile . - - dobuild ${pb} ${arch} ${branch} ${builddir} 1 - - ls -asFlrt ${builddir}/packages/All > ${builddir}/logs/ls-lrt - - cd ${builddir}/errors/ - find . -name '*.log' -depth 1 | cpio -dumpl ${builddir}/errors/old-errors - - # Clean up the clients - ${pb}/scripts/build cleanup ${arch} ${branch} ${buildid} - - wait - echo "setting up of nodes ended at $(date)" - - cd ${builddir}/packages/All - dobuild ${pb} ${arch} ${branch} ${builddir} 2 + dobuild ${pb} ${arch} ${branch} ${builddir} fi @@ -666,7 +649,7 @@ if [ "$nofinish" = 0 ]; then ${pb}/scripts/chopindex ${builddir}/ports/${INDEXFILE} ${builddir}/packages > ${builddir}/packages/INDEX ls -asFlrt ${builddir}/packages/All > ${builddir}/logs/ls-lrt - cp -p ${builddir}/make.[12] ${builddir}/logs + cp -p ${builddir}/make ${builddir}/logs echo "================================================" echo "copying distfiles" @@ -712,4 +695,4 @@ echo "all done at $(date)" echo "entire process took $(date -u -j -r $(($endtime - $starttime)) | awk '{print $4}')" echo "================================================" -mailexit 0
\ No newline at end of file +mailexit 0 diff --git a/Tools/portbuild/scripts/dosetupnode b/Tools/portbuild/scripts/dosetupnode index 33a6044a627f..243ce2ae3664 100755 --- a/Tools/portbuild/scripts/dosetupnode +++ b/Tools/portbuild/scripts/dosetupnode @@ -5,8 +5,6 @@ # -norsync|-nocopy : Don't copy files, just clean up builds # -# -queue : update queue once we finish setting up -# # -force : force file copying/extraction even if it appears it is # up-to-date # @@ -99,9 +97,16 @@ setup() { ${client_setup} post-copy ${args} || (echo "post-copy for ${node} failed"; return 1) if [ "${queue}" -eq 1 ]; then - lockf -k ${pb}/${arch}/queue/.lock \ - ${pb}/scripts/pollmachine ${arch}/${node} -queue + jobs=$(python /var/portbuild/evil/qmanager/qclient jobs | grep "${node}" | grep "${arch}/${branch}/${buildid} package" | awk '{print $1}' | tail +1) + for j in ${jobs}; do + python /var/portbuild/evil/qmanager/qclient release $j + done + fi + + if [ "${full}" -eq 1 ]; then + ${ssh_cmd} ${client_user}@${node} ${sudo_cmd} rm -rf ${pb}/${arch}/${branch}/builds/${buildid}/.ready ${pb}/${arch}/${branch}/builds/${buildid} /tmp/.setup-${buildid} fi + echo "setting up of $node ended at $(date)" } @@ -109,6 +114,7 @@ pbab=${pb}/${arch}/${branch} norsync=0 queue=0 +full=0 while [ $# -ge 1 ]; do case $1 in @@ -122,6 +128,9 @@ while [ $# -ge 1 ]; do -force) force=-force ;; + -full) + full=1 + ;; esac shift done diff --git a/Tools/portbuild/scripts/makeduds b/Tools/portbuild/scripts/makeduds index a087ebf69a6c..809f73821d07 100755 --- a/Tools/portbuild/scripts/makeduds +++ b/Tools/portbuild/scripts/makeduds @@ -40,9 +40,14 @@ export PKG_DBDIR=/nonexistentpkg export PORT_DBDIR=/nonexistentport cd ${PORTSDIR} -make -j${DUDSJOBS} ignorelist ECHO_MSG=true > ${duds} || exit 1 -sort ${duds} > ${duds}.tmp -mv -f ${duds}.tmp ${duds} +make -j${DUDSJOBS} ignorelist-verbose ECHO_MSG=true > ${duds}.verbose 2> /dev/null || exit 1 +sort ${duds}.verbose > ${duds}.verbose.tmp +mv -f ${duds}.verbose.tmp ${duds}.verbose +cut -f 1 -d \| ${duds}.verbose > ${duds} cp ${duds} ${duds}.orig grep -Ff ${duds}.orig ${index} | cut -f 1 -d \| > ${duds}.full + +cat ${duds} ${duds}.full | sort | uniq -u | sed -e "s@\$@|IGNORE: dependent port@" > ${duds}.full.verbose.tmp +cat ${duds}.verbose ${duds}.full.verbose.tmp | sort > ${duds}.full.verbose +rm ${duds}.full.verbose.tmp diff --git a/Tools/portbuild/scripts/makerestr b/Tools/portbuild/scripts/makerestr index 4fe8ec944703..9da8db96edce 100755 --- a/Tools/portbuild/scripts/makerestr +++ b/Tools/portbuild/scripts/makerestr @@ -22,7 +22,8 @@ builddir=${pb}/${arch}/${branch}/builds/${buildid} buildenv ${pb} ${arch} ${branch} ${builddir} -duds=${builddir}/duds +# XXX MCL 20080908 test removal of this +#duds=${builddir}/duds unset DISPLAY diff --git a/Tools/portbuild/scripts/makeworld b/Tools/portbuild/scripts/makeworld index 953f9d02a421..91845fff1e3c 100755 --- a/Tools/portbuild/scripts/makeworld +++ b/Tools/portbuild/scripts/makeworld @@ -23,6 +23,8 @@ builddir=${pb}/${arch}/${branch}/builds/${buildid} # confuses cross-builds export TARGET_ARCH=${arch} +# Workaround needed for zfs - 20090321 erwin +export NO_FSCHG=1 case "$branch" in 6*) diff --git a/Tools/portbuild/scripts/packagebuild b/Tools/portbuild/scripts/packagebuild index 9beeeb151d57..e0b73e0624b5 100755 --- a/Tools/portbuild/scripts/packagebuild +++ b/Tools/portbuild/scripts/packagebuild @@ -10,511 +10,571 @@ # where the entire cluster waits for a deep part of the tree to # build on a small number of machines # -# Other advantages are that this system is easily customizable and -# will let us customize things like the matching policy of jobs to -# machines. +# We can dynamically respond to changes in build machine availability, +# since the queue manager will block jobs that cannot be immediately +# satisfied and will unblock us when a job slot becomes available. +# +# When a package build fails, it is requeued with a lower priority +# such that it will rebuild again as soon as no "phase 1" packages +# are available to build. This prevents the cluster staying idle +# until the last phase 1 package builds. +# +# Other advantages are that this system is easily customizable and in +# the future will let us customize things like the matching policy of +# jobs to machines. For example, we could avoid dispatching multiple +# openoffice builds to the same system. # # TODO: -# * External queue manager -# * Mark completed packages instead of deleting them +# * Combine build prep stages? +# - initial check for file up-to-date # * check mtime for package staleness (cf make) -# * Check for parent mtimes after finishing child - -import os, sys, threading, time, subprocess, fcntl, operator -#from itertools import ifilter, imap -from random import choice - -def parseindex(indexfile): - - tmp={} - pkghash={} - for i in file(indexfile): - line=i.rstrip().split("|") - pkg = line[0] - tmp[pkg] = line[1:] - - # XXX hash category names too - - # Trick python into storing package names by reference instead of copying strings and wasting 60MB - pkghash[pkg] = pkg - - index=dict.fromkeys(tmp.keys()) - for pkg in tmp.iterkeys(): - line = tmp[pkg] - data={'name': pkg, 'path':line[0], - #'prefix':line[1], - #'comment':line[2], - #'descr':line[3], - #'maintainer':line[4], - 'categories':line[5], # XXX duplicates strings - 'bdep':[pkghash[i] for i in line[6].split(None)], - 'rdep':[pkghash[i] for i in line[7].split(None)], - #'www':line[8], - 'edep':[pkghash[i] for i in line[9].split(None)], - 'pdep':[pkghash[i] for i in line[10].split(None)], - 'fdep':[pkghash[i] for i in line[11].split(None)], - 'height':None} - if index[pkg] is None: - index[pkg] = data - else: - index[pkg].update(data) - if not index[pkg].has_key('parents'): - index[pkg]['parents'] = [] - - # XXX iter? - deps=set() - for j in ['bdep','rdep','edep','fdep','pdep']: - deps.update(set(index[pkg][j])) - index[pkg]['deps'] = [pkghash[i] for i in deps] - - for j in deps: - # This grossness is needed to avoid a second pass through - # the index, because we might be about to refer to - # packages that have not yet been processed - if index[j] is not None: - if index[j].has_key('parents'): - index[j]['parents'].append(pkghash[pkg]) - else: - index[j]['parents'] = [pkghash[pkg]] - else: - index[j] = {'parents':[pkghash[pkg]]} +# * option to skip phase 2 - return index +from qmanagerclient import * -def gettargets(index, targets): - """ split command line arguments into list of packages to build. Returns set or iterable """ - # XXX make this return the full recursive list and use this later for processing wqueue +import os, sys, threading, time, subprocess +#import random +from itertools import chain +#import gc +from stat import * - plist = set() - if len(targets) == 0: - targets = ["all"] - for i in targets: - if i == "all": - plist = index.iterkeys() - break - if i.endswith("-all"): - cat = i.rpartition("-")[0] - plist.update(j for j in index.iterkeys() if cat in index[j]['categories']) - elif i.rstrip(".tbz") in index.iterkeys(): - plist.update([i.rstrip(".tbz")]) +from Queue import Queue +from heapq import * - return plist +categories = {} +ports = {} -def heightindex(index, targets): - """ Initial population of height tree """ +# When a build fails we requeue it with a lower priority such that it +# will never preempt a phase 1 build but will build when spare +# capacity is available. +PHASE2_BASE_PRIO=1000 - for i in targets: - heightdown(index, i) +# Process success quickly so other jobs are started +SUCCESS_PRIO = -1000 -def heightdown(index, pkgname): - """ - Recursively populate the height tree down from a given package, - assuming empty values on entries not yet visited +# Failure should be a less common event :) +FAILURE_PRIO = -900 + +# Port status codes +PENDING = 1 # Yet to build +PHASE2 = 2 # Failed once + +class PriorityQueue(Queue): + """Variant of Queue that retrieves open entries in + priority order (lowest first). + Entries are typically tuples of the form: (priority number, + data) + This class can be found at: Python-2.6a3/Lib/Queue.py """ + maxsize = 0 - pkg=index[pkgname] - if pkg['height'] is None: - if len(pkg['deps']) > 0: - max = 0 - for i in pkg['deps']: - w = heightdown(index, i) - if w > max: - max = w - pkg['height'] = max + 1 - else: - pkg['height'] = 1 - return pkg['height'] + def _init(self, maxsize): + self.queue = [] -def heightup(index, pkgname): - """ Recalculate the height tree going upwards from a package """ + def _qsize(self, len=len): + return len(self.queue) - if not index.has_key(pkgname): - raise KeyError + def _put(self, item, heappush=heappush): + heappush(self.queue, item) - parents=set(index[pkgname]['parents']) + def _get(self, heappop=heappop): + return heappop(self.queue) - while len(parents) > 0: - # XXX use a deque? - pkgname = parents.pop() - if not index.has_key(pkgname): - # XXX can this happen? - continue - pkg=index[pkgname] - oldheight=pkg['height'] - if oldheight is None: - # Parent is in our build target list - continue - if len(pkg['deps']) == 0: - newheight = 1 - else: - newheight=max(index[j]['height'] for j in pkg['deps']) + 1 - if newheight > oldheight: - print "%s height increasing: %d -> %d", pkg, oldheight, newheight - assert(False) - if newheight != oldheight: - pkg['height'] = newheight - parents.update(pkg['parents']) - -def deleteup(index, pkgname): - if not index.has_key(pkgname): - raise KeyError - - parents=set([pkgname]) - - children=[] - removed=[] - while len(parents) > 0: - pkgname = parents.pop() - if not index.has_key(pkgname): - # Parent was already deleted via another path - # XXX can happen? - print "YYYYYYYYYYYYYYYYYYYYYY %s deleted" % pkgname - continue - if index[pkgname]['height'] is None: - # parent is not in our list of build targets - continue - pkg=index[pkgname] - - children.extend(pkg['deps']) - parents.update(pkg['parents']) - removed.append(pkgname) - del index[pkgname] - - removed = set(removed) - children = set(children) -# print "Removed %d packages, touching %d children" % (len(removed), len(children)) - - for i in children.difference(removed): - par=index[i]['parents'] - index[i]['parents'] = list(set(par).difference(removed)) - -# XXX return an iter -def selectheights(index, level): - return [i for i in index.iterkeys() if index[i]['height'] == level] - -def rank(index, ready, sortd, max = None): - """ rank the list of ready packages according to those listed as - dependencies in successive entries of the sorted list """ - - input=set(ready) - output = [] - count = 0 - print "Working on depth ", - for i in sortd: - deps = set(index[i]['deps']) - both = deps.intersection(input) - if len(both) > 0: - print "%d " % index[i]['height'], - input.difference_update(both) - output.extend(list(both)) - if len(input) == 0: - break - if max: - count+=len(both) - if count > max: - return output - print - output.extend(list(input)) +class Index(object): - return output + def __init__(self, indexfile): + self.indexfile = indexfile -def jobsuccess(index, job): + def parse(self, targets = None): - pkg = index[job] - # Build succeeded - for i in pkg['parents']: - index[i]['deps'].remove(job) + print "[MASTER] Read index" + f = file(self.indexfile) + index = f.readlines() + f.close() + f = None + del f - # deps/parents tree now partially inconsistent but this is - # what we need to avoid counting the height of the entry - # we are about to remove (which would make it a NOP) - heightup(index, job) + lines=[] + print "[MASTER] Phase 1" + for i in index: + (name, path, prefix, comment, descr, maintainer, categories, bdep, + rdep, www, edep, pdep, fdep) = i.rstrip().split("|") - del index[job] + if targets is None or name in targets: + lines.append((name, bdep, rdep, edep, pdep, fdep)) -def jobfailure(index, job): + Port(name, path, "", "", "", "", + categories, "") + index = None + del index - # Build failed - deleteup(index, job) - -class worker(threading.Thread): + print "[MASTER] Phase 2" + for (name, bdep, rdep, edep, pdep, fdep) in lines: + ports[name].setdeps(bdep, rdep, edep, pdep, fdep) - lock = threading.Lock() + lines = None + del lines + print "[MASTER] Done" - # List of running threads - tlist = [] +def depthindex(targets): + """ Initial population of depth tree """ - # List of running jobs - running = [] + for i in targets: + i.depth_recursive() - # Used to signal dispatcher when we finish a job - event = threading.Event() +class Port(object): - def __init__(self, mach, job, queue, arch, branch): - threading.Thread.__init__(self) - self.job = job - self.mach = mach - self.queue = queue - self.arch = arch - self.branch = branch + def __init__(self, name, path, prefix, comment, descr, maintainer, + cats, www): - def run(self): - global index + __slots__ = ["name", "path", "prefix", "comment", "descr", + "maintainer", "www", "bdep", "rdep", "edep", "pdep", + "fdep", "alldep", "parents", "depth", "categories"] - pkg = index[self.job] + self.name = name + self.path = path + self.prefix = prefix + self.comment = comment + self.descr = descr + self.maintainer = maintainer + self.www = www + + # Populated later + self.bdep = [] + self.rdep = [] + self.edep = [] + self.pdep = [] + self.fdep = [] + + self.alldep = [] + self.parents = [] + self.id = None # XXX + + self.status = PENDING + + # Whether the package build has completed and is hanging around + # to resolve dependencies for others XXX use status + self.done = False + + # Depth is the maximum length of the dependency chain of this port + self.depth = None + + self.categories=[] + scats = cats.split() + if len(scats) != len(set(scats)): + print "[MASTER] Warning: port %s includes duplicated categories: %s" % (name, cats) + + for c in set(scats): + try: + cat = categories[c] + except KeyError: + cat = Category(c) - if len(pkg['deps']) != 0: - print "Running job with non-empty deps: %s" % pkg - assert(False) + self.categories.append(cat) + cat.add(self) - print "Running job %s" % (self.job) - while True: - retcode = subprocess.call(["/usr/bin/env", "FD=%s" % " ".join(["%s.tbz" % i for i in pkg['fdep']]), "ED=%s" % " ".join(["%s.tbz" % i for i in pkg['edep']]), "PD=%s" % " ".join(["%s.tbz" % i for i in pkg['pdep']]), "BD=%s" % " ".join(["%s.tbz" % i for i in pkg['bdep']]), "RD=%s" % " ".join(["%s.tbz" % i for i in pkg['rdep']]), "/var/portbuild/scripts/pdispatch2", self.mach, self.arch, self.branch, "/var/portbuild/scripts/portbuild", "%s.tbz" % self.job, pkg['path']]) - self.queue.release(self.mach) - if retcode != 254: - break + ports[name] = self - # Failed to obtain job slot - time.sleep(15) - (self.mach, dummy) = self.queue.pick() - print "Retrying on %s" % self.mach + def remove(self): + """ Clean ourselves up but don't touch references in other objects; +they still need to know about us as dependencies etc """ - print "Finished job %s" % self.job, + self.fdep = None + self.edep = None + self.pdep = None + self.bdep = None + self.rdep = None + self.alldep = None + self.parents = None - if retcode == 0: - status = True - print - else: - status = False - print " with status %d" % retcode + for cat in self.categories: + cat.remove(self) - worker.lock.acquire() - worker.running.remove(self.job) - worker.tlist.remove(self) - if status == True: - jobsuccess(index, self.job) + ports[self.name] = None + del ports[self.name] + del self + + def destroy(self): + """ Remove a package and all references to it """ + + for pkg in self.alldep: + if pkg.parents is not None: + # Already removed but not destroyed + try: + pkg.parents.remove(self) + except ValueError: + continue + + for pkg in self.parents: + try: + pkg.fdep.remove(self) + except ValueError: + pass + try: + pkg.edep.remove(self) + except ValueError: + pass + try: + pkg.pdep.remove(self) + except ValueError: + pass + try: + pkg.bdep.remove(self) + except ValueError: + pass + try: + pkg.rdep.remove(self) + except ValueError: + pass + pkg.alldep.remove(self) + + sys.exc_clear() + + self.remove() + + def setdeps(self, bdep, rdep, edep, pdep, fdep): + self.fdep = [ports[p] for p in fdep.split()] + self.edep = [ports[p] for p in edep.split()] + self.pdep = [ports[p] for p in pdep.split()] + self.bdep = [ports[p] for p in bdep.split()] + self.rdep = [ports[p] for p in rdep.split()] + + self.alldep = list(set(chain(self.fdep, self.edep, self.pdep, + self.bdep, self.rdep))) + + for p in self.alldep: + p.parents.append(self) + + def depth_recursive(self): + + """ + Recursively populate the depth tree up from a given package + through dependencies, assuming empty values on entries not yet + visited + """ + + if self.depth is None: + if len(self.parents) > 0: + max = 0 + for i in self.parents: + w = i.depth_recursive() + if w > max: + max = w + self.depth = max + 1 + else: + self.depth = 1 + for port in ["openoffice", "kde-3"]: + if self.name.startswith(port): + # Artificial boost to try and get it building earlier + self.depth = 100 + return self.depth + + def destroy_recursive(self): + """ Remove a port and everything that depends on it """ + + parents=set([self]) + + while len(parents) > 0: + pkg = parents.pop() + assert pkg.depth is not None + parents.update(pkg.parents) + pkg.destroy() + + def success(self): + """ Build succeeded and possibly uncovered some new leaves """ + + parents = self.parents[:] + self.done = True + self.remove() + + newleafs = [p for p in parents if all(c.done for c in p.alldep)] + return newleafs + + def failure(self): + """ Build failed """ + + self.destroy_recursive() + + def packagename(self, arch, branch, buildid): + """ Return the path where a package may be found""" + + return "/var/portbuild/%s/%s/builds/%s/packages/All/%s.tbz" \ + % (arch, branch, buildid, self.name) + + def is_stale(self, arch, branch, buildid): + """ Does a package need to be (re)-built? + + Returns: False: if it exists and has newer mtime than all of + its dependencies. + True: otherwise + """ + + my_pkgname = self.packagename(arch, branch, buildid) + pkg_exists = os.path.exists(my_pkgname) + + if pkg_exists: + my_mtime = os.stat(my_pkgname)[ST_MTIME] + + dep_packages = [pkg.packagename(arch, branch, buildid) + for pkg in self.alldep] + deps_exist = all(os.path.exists(pkg) for pkg in dep_packages) + return not (pkg_exists and deps_exist and + all(os.stat(pkg)[ST_MTIME] <= my_mtime + for pkg in dep_packages)) + +class Category(object): + def __init__(self, name): + self.name = name + self.ports = {} + categories[name] = self + + def add(self, port): + self.ports[port] = port + + def remove(self, port): + self.ports[port]=None + del self.ports[port] + +def gettargets(targets): + """ split command line arguments into list of packages to build. + Returns set or iterable of all ports that will be built including + dependencies """ + + plist = set() + if len(targets) == 0: + targets = ["all"] + for i in targets: + if i == "all": + return ports.itervalues() + + if i.endswith("-all"): + cat = i.rpartition("-")[0] + plist.update(p.name for p in categories[cat].ports) + elif i.rstrip(".tbz") in ports: + plist.update([ports[i.rstrip(".tbz")].name]) else: - jobfailure(index, self.job) + raise KeyError, i - # Wake up dispatcher in case it was blocked - worker.event.set() - worker.event.clear() + # Compute transitive closure of all dependencies + pleft=plist.copy() + while len(pleft) > 0: + pkg = pleft.pop() + new = [p.name for p in ports[pkg].alldep] + plist.update(new) + pleft.update(new) - worker.lock.release() + for p in set(ports.keys()).difference(plist): + ports[p].destroy() - @staticmethod - def dispatch(mach, job, queue, arch, branch): - worker.lock.acquire() - wrk = worker(mach, job, queue, arch, branch) - worker.tlist.append(wrk) - worker.lock.release() - wrk.start() + return [ports[p] for p in plist] -class machqueue(object): - path = ''; - fd = -1; +class worker(threading.Thread): - # fcntl locks are per-process, so the fcntl lock acquisition will - # succeed if another thread already holds it. We need the fcntl - # lock for external visibility between processes but also need an - # internal lock for protecting against out own threads. - ilock = threading.Lock() + # Protects threads + lock = threading.Lock() - def __init__(self, path): - super(machqueue, self).__init__() - self.path = path - self.fd = os.open("%s.lock" % self.path, os.O_RDWR|os.O_CREAT) - -# print "Initializing with %s %d" % (self.path, self.fd) - - def lock(self): - print "Locking...", -# ret = fcntl.lockf(self.fd, fcntl.LOCK_EX) - self.ilock.acquire() - print "success" - - def unlock(self): - print "Unlocking fd" - self.ilock.release() -# ret = fcntl.lockf(self.fd, fcntl.LOCK_UN) - - def poll(self): - """ Return currently available machines """ - - mfile = file(self.path + "../mlist", "r") - mlist = mfile.readlines() - mfile.close() - mlist = [i.rstrip() for i in mlist] # Chop \n - - list = os.listdir(self.path) - special = [] - machines = [] - for i in list: - if i.startswith('.'): - special.append(i) - else: - if i in mlist: - machines.append(i) - else: - os.unlink(self.path + i) - - print "Found machines %s" % machines - return (machines, special) - - def pick(self): - """ Choose a random machine from the queue """ - - min = 999 - while min == 999: - while True: - self.lock() - (machines, special) = self.poll() - if len(machines): - break - else: - self.unlock() - time.sleep(15) - # XXX Use kqueue to monitor for changes - - list = [] - # XXX Choose as fraction of capacity - for i in machines: - f = file(self.path + i, "r") - out = f.readline().rstrip() - try: - load = int(out) - except ValueError: - print "Bad value for %s: %s" % (i, out) - load = 999 - f.close() - if load < min: - min = load - list=[] - if load == min: - list.append(i) - print "(%s, %d)" % (list, load) - - if min == 999: - print "Bad queue length for %s" % list - self.unlock() - - machine = choice(list) - # XXX hook up config files - if min == 2: - # Queue full - os.unlink(self.path + machine) + # Running threads, used for collecting status + threads = {} + + def __init__(self, mach, job, arch, branch, buildid, queue): + threading.Thread.__init__(self) + self.machine = mach + self.job = job + self.arch = arch + self.branch = branch + self.buildid = buildid + self.queue = queue + + self.setDaemon(True) + + def run(self): + pkg = self.job + + print "[MASTER] Running job %s" % (pkg.name), + if pkg.status == PHASE2: + print " (phase 2)" else: - f = file(self.path + machine, "w") - f.write("%d\n" % (min + 1)) - f.flush() - f.close() - - self.unlock() - return (machine, special) - - def release(self, mach): - self.lock() - print "Releasing %s" % mach, - if os.path.exists(self.path + mach): - f = file(self.path + mach, "r+") - out = f.readline().rstrip() + print + try: + build = subprocess.Popen( + ["/bin/sh", "/var/portbuild/scripts/pdispatch", + self.arch, self.branch, self.buildid, self.machine, + "/var/portbuild/scripts/portbuild", "%s.tbz" % pkg.name, + pkg.path], + env={'HOME':"/root", + 'PATH':'/sbin:/bin:/usr/sbin:/usr/bin:/usr/games:/usr/local/sbin:/usr/local/bin:/var/portbuild/scripts', + 'FD':" ".join(["%s.tbz" % p.name for p in pkg.fdep]), + 'ED':" ".join(["%s.tbz" % p.name for p in pkg.edep]), + 'PD':" ".join(["%s.tbz" % p.name for p in pkg.pdep]), + 'BD':" ".join(["%s.tbz" % p.name for p in pkg.bdep]), + 'RD':" ".join(["%s.tbz" % p.name for p in pkg.rdep])}, + stderr=subprocess.STDOUT, stdout=subprocess.PIPE, bufsize=0) + except OSError, e: + print >>sys.stderr, "[%s:%s]: Execution failed: %s" % \ + (pkg.id, pkg.name, e) + while True: try: - load = int(out) - except ValueError: - print "Queue error on release of %s: %s" % (mach, out) - load = 3 #XXX + line = build.stdout.readline() + except: + print "[%s:%s]: Failed reading from build script" % \ + (pkg.id, pkg.name) + break + if line == "": + break + print "[%s:%s] %s" % (pkg.id, pkg.name, line.rstrip()) + + retcode = build.wait() + +# time.sleep(random.randint(0,60)) +# +# r = random.random() +# if r < 0.1: +# retcode = 1 +# elif r < 0.15: +# retcode = 254 +# else: +# retcode = 0 + + conn = QManagerClientConn(stderr = sys.stderr) + try: + (code, vars) = conn.command("release", {'id':pkg.id}) + except RequestError, e: + print "[MASTER] Error releasing job %s (%s): %s" % (pkg.name, pkg.id, e.value) + + if retcode == 254: + # Requeue soft failure at original priority + # XXX exponential backoff? + time.sleep(60) +# print "Requeueing %s" % pkg.id + self.queue.put((-pkg.depth, pkg)) + elif retcode == 253: + # setting up a machine, we should immediately retry + self.queue.put((-pkg.depth, pkg)) + elif retcode == 0: + self.queue.put((SUCCESS_PRIO, pkg)) else: - f = file(self.path + mach, "w") - load = 3 #XXX + self.queue.put((FAILURE_PRIO, pkg)) -# f.truncate(0) - f.write("%d\n" % (load - 1)) - print "...now %d" % (load - 1) - f.flush() - f.close() - self.unlock() + # Clean up + worker.lock.acquire() + worker.threads[self]=None + del worker.threads[self] + worker.lock.release() + + @staticmethod + def dispatch(mach, job, arch, branch, buildid, queue): + wrk = worker(mach, job, arch, branch, buildid, queue) + + worker.lock.acquire() + worker.threads[wrk] = wrk + worker.lock.release() + + wrk.start() -def main(arch, branch, args): +def main(arch, branch, buildid, args): global index - basedir="/var/portbuild/"+arch+"/"+branch + basedir="/var/portbuild/"+arch+"/"+branch+"/builds/"+buildid portsdir=basedir+"/ports" - indexfile=portsdir+"/INDEX-"+branch - indexfile="/var/portbuild/i386/7-exp/ports/INDEX-7" - qlen = 100 + indexfile=portsdir+"/INDEX-"+branch[0] - q = machqueue("/var/portbuild/%s/queue/" % arch) + print "[MASTER] parseindex..." + index = Index(indexfile) + index.parse() + print "[MASTER] length = %s" % len(ports) - print "parseindex..." - index=parseindex(indexfile) - print "length = %s" % len(index) + print "[MASTER] Finding targets..." + targets = gettargets(args) - targets = gettargets(index, args) + print "[MASTER] Calculating depth..." + depthindex(targets) - print "heightindex..." - heightindex(index, targets) + print "[MASTER] Pruning duds..." + dudsfile=basedir+"/duds" + for line in file(dudsfile): + try: + dud = ports[line.rstrip()] + except KeyError: + continue + print "[MASTER] Skipping %s (duds)" % dud.name + dud.destroy_recursive() - sortd = sorted(((key, val["height"]) for (key, val) in index.iteritems() if val["height"] is not None), key=operator.itemgetter(1), reverse=True) - wqueue = rank(index, selectheights(index, 1), (i[0] for i in sortd), qlen) + queue = PriorityQueue() + # XXX can do this while parsing index if we prune targets/duds + # first + for pkg in ports.itervalues(): + if len(pkg.alldep) == 0: + queue.put((-pkg.depth, pkg)) - # Main work loop - while len(sortd) > 0: - worker.lock.acquire() - print "Remaining %s" % len(sortd) - while len(wqueue) > 0: - job = wqueue.pop(0) + # XXX check osversion, pool + mdl=["arch = %s" % arch] - if os.path.exists("/var/portbuild/%s/%s/packages/All/%s.tbz" % (arch, branch, job)): - print "Skipping %s since it already exists" % job - jobsuccess(index, job) - else: - worker.running.append(job) # Protect against a queue - # rebalance adding this - # back during build - worker.lock.release() - (machine, specials) = q.pick() - worker.dispatch(machine, job, q, arch, branch) - worker.lock.acquire() - - if len(wqueue) == 0: - if len(sortd) == 0: - # All jobs in progress, wait for children to exit - break - print "Rebalancing queue...", - sortd = sorted(((key, val["height"]) for (key, val) in index.iteritems() if val["height"] is not None), key=operator.itemgetter(1), reverse=True) - if len(sortd) == 0: - break + # Main work loop + while len(ports) > 0: + print "[MASTER] Ports remaining=%s, Queue length=%s" % (len(ports), queue.qsize()) - print sortd[0:3] - if sortd[0][0] == 1: - # Everything left is depth 1, no need to waste time rebalancing further - qlen = len(index) + if len(ports) < 10: + print "[MASTER] Remaining ports: %s" % ports.keys() - # Don't add too many deps at once (e.g. after we build a - # package like gmake), or we will switch to buildinglots - # of shallow packages - ready = [i for i in selectheights(index, 1) if i not in worker.running] - wqueue = rank(index, ready, (i[0] for i in sortd), qlen)[:2*qlen] - print "now %s (%s ready)" % (wqueue, len(ready)) + (prio, job) = queue.get() + if prio == SUCCESS_PRIO: + print "[MASTER] Job %s succeeded" % job.name + for new in job.success(): + queue.put((-new.depth, new)) + continue + elif prio == FAILURE_PRIO: + if job.status == PHASE2: + print "[MASTER] Job %s failed" % job.name + job.failure() + continue + else: + # Requeue at low priority + print "[MASTER] Job %s failed (requeued for phase 2)" % job.name + job.status = PHASE2 + queue.put((PHASE2_BASE_PRIO-job.depth, job)) + continue + elif job.status == PHASE2: + depth = -(prio - PHASE2_BASE_PRIO) + else: + depth = -prio + + print "[MASTER] Working on job %s, depth %d" % (job.name, depth) + if job.is_stale(arch, branch, buildid): + conn = QManagerClientConn(stderr = sys.stderr) + (code, vars) = conn.command("acquire", + {"name":job.name, + "type":"%s/%s/%s package" % \ + (arch, branch, buildid), + "priority":10, "mdl":mdl}) + + if code[0] == "2": + machine=vars['machine'] + job.id=vars['id'] +# print "Got ID %s" % job.id + + worker.dispatch(machine, job, arch, branch, buildid, queue) + else: + print "[MASTER] Error acquiring job %s: %s" % (pkg.name, code) + else: + print "[MASTER] Skipping %s since it already exists" % job.name + for new in job.success(): + queue.put((-new.depth, new)) - worker.lock.release() + print "[MASTER] Waiting for threads" + threads = worker.threads.copy() - if len(wqueue) == 0: - # Ran out of work, wait for workers to free up some more - print "No work to do, sleeping on workers" - worker.event.wait() + for t in threads: + print "[MASTER] Outstanding thread: %s" % t.job.name - for i in worker.tlist: - i.join() + for t in threads: + print "[MASTER] Waiting for thread %s" % t.job.name + t.join() - print "Finished" + print "[MASTER] Finished" if __name__ == "__main__": # from guppy import hpy; h = hpy() - main(sys.argv[1], sys.argv[2], sys.argv[3:]) - -# index = parseindex("/var/portbuild/i386/7-exp/ports/INDEX-7") -# print index['gmake-3.81_2'] + main(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4:]) diff --git a/Tools/portbuild/scripts/pdispatch b/Tools/portbuild/scripts/pdispatch index 82a3af9ada01..893b6ade6fa2 100755 --- a/Tools/portbuild/scripts/pdispatch +++ b/Tools/portbuild/scripts/pdispatch @@ -1,16 +1,16 @@ #!/bin/sh # -# pdispatch <arch> <branch> <buildid> <command> <package.tbz> [<args> ...] +# pdispatch <arch> <branch> <buildid> <host> <command> <package.tbz> [<args> ...] # -# Choose a random machine from ${buildroot}/ulist and dispatch the -# job to it via the ptimeout script. +# Dispatch the job to a host via the ptimeout script. pb=/var/portbuild arch=$1 branch=$2 buildid=$3 -command=$4 -shift 4 +host=$4 +command=$5 +shift 5 pbab=${pb}/${arch}/${branch} @@ -23,13 +23,19 @@ timeout=360000 loglength=1000 hdrlength=6 +buildid=$(resolve ${pb} ${arch} ${branch} ${buildid}) +if [ -z "${buildid}" ]; then + echo "Invalid build ID ${buildid}" + exit 1 +fi + builddir=${pbab}/builds/${buildid} buildenv ${pb} ${arch} ${branch} ${builddir} -# ssh -x doesn't work on some machines +# XXX needed still? unset DISPLAY -# Use HPN-SSH for performance +# Allow override by HPN-SSH for performance if [ -z "${ssh_cmd}" ]; then ssh_cmd=ssh fi @@ -39,11 +45,6 @@ fi pkgname=$(basename $1 ${PKGSUFFIX}) -if grep -qxF ${pkgname} ${builddir}/duds; then - echo "skipping ${pkgname}" - exit 1 -fi - if [ -z "${pkgname}" ]; then echo "null packagename" exit 1 @@ -62,7 +63,7 @@ fi if [ "x$NOPLISTCHECK" != "x" ]; then flags="${flags} -noplistcheck" fi -if [ "x$WANT_DISTFILES" != "x" ]; then +if [ "x$NO_DISTFILES" = "x" ]; then flags="${flags} -distfiles" fi if [ "x$FETCH_ORIGINAL" != "x" ]; then @@ -72,140 +73,123 @@ if [ "x$TRYBROKEN" != "x" ]; then flags="${flags} -trybroken" fi -while `true`; do - host= - chroot= - while [ -z "${host}" -o -z "${chroot}" ]; do - chroot= - host=$(lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/getmachine ${pb} ${arch} ${branch}) - # If ulist is empty, then all build machines are busy, so try - # again in 15 seconds. - if [ -z "${host}" ]; then - sleep 15 - else - . ${pb}/${arch}/portbuild.conf - test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} - chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1) - if [ -z "${chrootdata}" ]; then - echo "Failed to claim chroot on ${host}" - fi - - case "${chrootdata}" in - */var/portbuild/scripts/claim-chroot*) - # Error executing script, assume system is booting - chrootdata="wait boot" - ;; - esac - - echo "Got ${chrootdata} from ${host}" - - set -- ${chrootdata} - if [ $# -ge 2 ]; then - case $1 in - chroot) - chroot=$2 - ;; - setup) - echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}" - - # Run in the background so we can potentially - # claim a slot on another machine. In - # practise I think we often end up trying - # again on the same machine though. - - # Make sure to close stdin/stderr in the child - # or make will hang until the child process - # exits - # XXX Revert to >&- once this is confirmed as working - ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} >/tmp/setupnode.$$ 2>&1 & - ;; - error) - echo "Error reported by ${host}: $2" - sleep 60 - ;; - wait) - echo "Waiting for setup to finish" - sleep 60 - ;; - esac - shift 2 - fi - - if [ -z "${chroot}" ]; then - lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} - fi - fi - done - - . ${pb}/${arch}/portbuild.conf - test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} - - rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2 - rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2 - - echo "dispatching: ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args}" - ${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -t -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1 - error=$? +chroot= +. ${pb}/${arch}/portbuild.conf +test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} +chrootdata=$(${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/claim-chroot ${arch} ${branch} ${buildid} ${pkgname} 2>&1) +if [ -z "${chrootdata}" ]; then + echo "Failed to claim chroot on ${host}" + exit 254 +fi + +case "${chrootdata}" in + */var/portbuild/scripts/claim-chroot*) + # Error executing script, assume system is booting + chrootdata="wait boot" + ;; +esac + +# echo "Got ${chrootdata} from ${host}" + +set -- ${chrootdata} +if [ $# -ge 2 ]; then + case $1 in + chroot) + chroot=$2 + ;; + setup) + echo "Setting up ${arch}/${branch} build ID ${buildid} on ${host}" + + # Run in the background so we can potentially + # claim a slot on another machine. In + # practise I think we often end up trying + # again on the same machine though. + + # Make sure to close stdin/stderr in the child + # or make will hang until the child process + # exits + ${pb}/scripts/dosetupnode ${arch} ${branch} ${buildid} ${host} > /tmp/setupnode.$$ 2>&1 & + exit 253 + ;; + error) + echo "Error reported by ${host}: $2" + ;; + wait) + echo "Waiting for setup of ${host} to finish" + ;; + esac + shift 2 +fi + +if [ -z "${chroot}" ]; then + exit 254 +fi + +. ${pb}/${arch}/portbuild.conf +test -f ${pb}/${arch}/portbuild.${host} && . ${pb}/${arch}/portbuild.${host} - #if grep -q " failed unexpectedly on " ${builddir}/logs/${pkgname}.pre.log; then - # cat ${builddir}/logs/${pkgname}.pre.log | mail -s "${pkgname} failed uncleanly on ${arch} ${branch}" ${mailto} - #else - # rm ${builddir}/logs/${pkgname}.pre.log - #fi +rm -f ${builddir}/logs/${pkgname}.log ${builddir}/logs/${pkgname}.log.bz2 +rm -f ${builddir}/errors/${pkgname}.log ${builddir}/errors/${pkgname}.log.bz2 - # Pull in the results of the build from the client +${pb}/scripts/ptimeout.host $timeout ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${command} ${arch} ${branch} ${buildid} ${chroot} ${flags} \"$ED\" \"$PD\" \"$FD\" \"$BD\" \"$RD\" ${args} 2>&1 +error=$? + +# Pull in the results of the build from the client - ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log - (${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz +${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${builddir}/logs/${pkgname}.log +(${ssh_cmd} -a -n ${client_user}@${host} test -f ${chroot}/tmp/work.tbz ) && ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/work.tbz ${builddir}/wrkdirs/${pkgname}.tbz - # XXX Set dirty flag if any of the scp's fail +# XXX Set dirty flag if any of the scp's fail - mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname} - ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \ - tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf - +mkdir -p ${builddir}/distfiles/.pbtmp/${pkgname} +${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp/distfiles -cf - . | \ + tar --unlink -C ${builddir}/distfiles/.pbtmp/${pkgname} -xvf - && \ touch ${builddir}/distfiles/.pbtmp/${pkgname}/.done - if [ "${error}" = 0 ]; then - ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \ - tar --unlink -C ${builddir} -xvf - - test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \ - touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX} - rm -f ${builddir}/errors/${pkgname}.log && \ - touch ${builddir}/errors/.force - lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname} - log=${builddir}/logs/$pkgname.log - if grep -q "even though it is marked BROKEN" ${log}; then - echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto} - fi - if grep -q "^list of .*file" ${log}; then - buildlogdir=$(realpath ${builddir}/logs/) - baselogdir=$(basename ${buildlogdir}) - (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto} - fi - else - log=${builddir}/errors/${pkgname}.log - ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto} - if ! grep -q "even though it is marked BROKEN" ${log}; then - buildlogdir=$(realpath ${builddir}/logs/) - baselogdir=$(basename ${buildlogdir}) - if [ `wc -l ${log} | awk '{print $1}'` -le `expr ${loglength} + ${hdrlength}` ]; then - (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} - else - (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} - fi +if [ "${error}" = 0 ]; then + ${ssh_cmd} -a -n ${client_user}@${host} tar -C ${chroot}/tmp -cf - packages | \ + tar --unlink -C ${builddir} -xvf - + + # XXX why is this needed? + test -f ${builddir}/packages/All/${pkgname}${PKGSUFFIX} && \ + touch ${builddir}/packages/All/${pkgname}${PKGSUFFIX} + + if [ -f ${builddir}/errors/${pkgname}.log ]; then + rm -f ${builddir}/errors/${pkgname}.log + # Force rebuild of html page to remove this package from list + touch ${builddir}/errors/.force + fi + lockf -k ${pbab}/failure.lock ${pb}/scripts/buildsuccess ${arch} ${branch} ${buildid} ${pkgname} + log=${builddir}/logs/$pkgname.log + if grep -q "even though it is marked BROKEN" ${log}; then + echo | mail -s "${pkgname} BROKEN but built on ${arch} ${branch}" ${mailto} + fi + if grep -q "^list of .*file" ${log}; then + buildlogdir=$(realpath ${builddir}/logs/) + baselogdir=$(basename ${buildlogdir}) + (sed -e '/^build started/,$d' $log;echo;echo "For the full build log, see"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '1,/^=== Checking filesystem state/d' $log) | mail -s "${pkgname} pkg-plist errors on ${arch} ${branch}" ${mailto} + fi +else + log=${builddir}/errors/${pkgname}.log + ${scp_cmd} ${client_user}@${host}:${chroot}/tmp/${pkgname}.log ${log} || (echo ${chroot}@${host}; ${ssh_cmd} -a -n ${client_user}@${host} ls -laR ${chroot}/tmp) | mail -s "${pkgname} logfile not found" ${mailto} + if ! grep -q "even though it is marked BROKEN" ${log}; then + buildlogdir=$(realpath ${builddir}/logs/) + baselogdir=$(basename ${buildlogdir}) + if [ $(wc -l ${log} | awk '{print $1}') -le $((loglength + hdrlength)) ]; then + (echo "You can also find this build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;cat ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} + else + (echo "Excerpt from the build log at"; echo; echo " http://${master}/errorlogs/${arch}-errorlogs/${baselogdir}/$(basename $log)";echo;sed -e '/^build started/,$d' $log;echo;echo " [... lines trimmed ...]";echo;tail -${loglength} ${log}) | mail -s "${pkgname} failed on ${arch} ${branch}" ${mailto} fi - lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname} fi - - ${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean} - - lockf -k ${pb}/${arch}/queue/.lock ${pb}/scripts/releasemachine ${arch} ${host} - + lockf -k ${pbab}/failure.lock ${pb}/scripts/buildfailure ${arch} ${branch} ${buildid} ${pkgname} +fi + +${ssh_cmd} -a -n ${client_user}@${host} ${sudo_cmd} ${pb}/scripts/clean-chroot ${arch} ${branch} ${buildid} ${chroot} ${clean} + # XXX Set a dirty variable earlier and check here - if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then - exit ${error} - else - echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly, rebuilding" - sleep 120 - fi -done +if grep -q "^build of .*ended at" ${builddir}/logs/${pkgname}.log; then + exit ${error} +else + echo "Build of ${pkgname} in ${host}:/${chroot} failed uncleanly" + exit 254 +fi diff --git a/Tools/portbuild/scripts/pollmachine b/Tools/portbuild/scripts/pollmachine index c3438041c881..ddc7100a94d1 100755 --- a/Tools/portbuild/scripts/pollmachine +++ b/Tools/portbuild/scripts/pollmachine @@ -13,24 +13,20 @@ # # options are: # -daemon : poll repeatedly -# -queue : update queue entries (XXX racy) # # TODO: # XXX qmgr notification of new/removed machines -# XXX log state changes in daemon mode -# XXX clean up inactive builds -# XXX test thread shutdown -# XXX needed an explicit way to request setup? -# XXX signal handler +# XXX counter before declaring a machine as dead +# Declares a machine as online if it reports 0 data from infoseek? # * Deal with machines change OS/kernel version # - ACL list might change! # - take machine offline, update ACL/arch/etc, reboot, bring online import sys, threading, socket -from popen2 import * from time import sleep +import os, subprocess, logging if len(sys.argv) < 1: print "Usage: %s <arch> [<arch> ...]" % sys.argv[0] @@ -39,14 +35,9 @@ if len(sys.argv) < 1: arches=set() mlist={} polldelay=0 -queue=0 for i in sys.argv[1:]: if i == "-daemon": - polldelay = 30 - continue - - if i == "-queue": - queue = 1 + polldelay = 180 continue if "/" in i: @@ -82,9 +73,6 @@ class MachinePoll(threading.Thread): host = None port = 414 - # Should we update queue entry? - queue = None - timeout = None # How often to poll shutdown = False # Exit at next poll wakeup @@ -94,17 +82,21 @@ class MachinePoll(threading.Thread): # Dictionary of variables reported by the client vars = None - def __init__(self, mach, arch, timeout, host, port, queue): + def __init__(self, mach, arch, timeout, host, port): super(MachinePoll, self).__init__() self.mach = mach self.arch = arch self.timeout = timeout self.host = host self.port = port - self.queue = queue + + # How many times the connection timed out since last success + self.timeouts = 0 self.vars = {} + self.setDaemon(True) + def run(self): while True: if self.shutdown: @@ -124,63 +116,104 @@ class MachinePoll(threading.Thread): lines = [] try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(60) s.connect((self.host, self.port)) - f = s.makefile() - - lines = f.readlines() + + data = "" + while len(data) < 65536: + chunk = s.recv(8192) + if not chunk: + break + data += chunk + nowonline = True + self.timeouts = 0 + lines = data.split("\n") + except socket.timeout: + if self.online: + logging.info("[%s] Connection timeout" % self.mach) + self.timeouts += 1 + if self.timeouts < 3: + nowonline = self.online except: pass - finally: + finally: try: s.close() except: pass if nowonline != self.online: - print "State change: %s %s -> %s" % (self.mach, self.online, nowonline) + logging.info("[%s] Now %s" % (self.mach, "online" if nowonline else "OFFLINE")) self.online = nowonline + if self.online: + self.timeouts = 0 # XXX inform qmgr of state change - if self.online and not lines: + if self.online and not lines and not self.timeouts: # reportload script is missing dosetup=1 else: dosetup=0 for line in lines: + if line == "": + continue line=line.rstrip() part=line.partition('=') if part[1] != '=' or not part[0]: # if "No such file or directory" in line: # # Client may require setting up post-boot # dosetup=1 - print "Bad input from %s: %s" % (self.mach, line) + logging.info("[%s] Bad input: %s" % (self.mach, line)) # Assume client needs setting up dosetup=1 - try: old = self.vars[part[0]] except KeyError: old = "" if old != part[2]: self.vars[part[0]] = part[2] -# print "%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2]) +# logging.info("%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2])) # XXX update qmgr + try: + envs = self.vars['buildenvs'] + for e in envs.split(): + (arch, branch, buildid) = e.split("/") + f = "/var/portbuild/%s/%s/builds/%s/.active" % \ + (arch, branch, buildid) + if os.path.exists(f): + continue + # Clean up a stale buildenv + logging.info("[%s] Cleaning up stale build: %s" % (self.mach, e)) + (err, out) = self.setup(branch, buildid, "-nocopy -full") + if err: + logging.info("[%s] Error from cleanup" % (self.mach)) + for l in out.split("\n"): + if l == "": + continue + logging.info("[%s] %s" % (self.mach, l)) + + except KeyError: + pass + if dosetup: - print "Setting up %s" % (self.mach) - (err, out) = self.setup() + logging.info("[%s] Setting up machine" % (self.mach)) + (err, out) = self.setup("-", "-") if err: - print "Error from setup of %s:" % (self.mach) - print out - print "Setup of %s complete" % (self.mach) - return + logging.info("[%s] Error from setup" % (self.mach)) + for l in out.split("\n"): + if l == "": + continue + logging.info("[%s] %s" % (self.mach, l)) + logging.info("[%s] Setup complete" % (self.mach)) # Validate that arch has not changed (e.g. i386 -> amd64) try: if self.arch != self.vars['arch']: - print "Machine %s reporting unexpected arch: %s -> %s" % (self.mach, self.arch, self.vars['arch']) + logging.info("[%s] Unexpected arch: %s -> %s" % \ + (self.mach, self.arch, self.vars['arch'])) except KeyError: pass @@ -195,24 +228,27 @@ class MachinePoll(threading.Thread): pass f.close() - if self.queue: - try: - f = file("%s/%s/queue/%s" % (pb, self.arch, self.mach), "w") - except: - return - - try: - f.write("%s\n" % self.vars['jobs']) - except: - pass - f.close() - - def setup(self): - child = Popen4("su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s - - %s\"" % (self.arch, self.arch, self.mach), 0) + def setup(self, branch, buildid, args = ""): + cmd = "su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s %s %s %s %s\""\ + % (self.arch, self.arch, branch, buildid, self.mach, args) + child = subprocess.Popen(cmd, shell=True, stderr = subprocess.STDOUT, + stdout = subprocess.PIPE) err = child.wait() - out = "".join(child.fromchild.readlines()) + out = "".join(child.stdout.readlines()) return (err, out) +logging.basicConfig(level=logging.INFO, + format='[%(asctime)s] %(message)s', + datefmt='%d %b %Y %H:%M:%S', + filename='/var/log/pollmachine.log', filemode='w') + +log_console = logging.StreamHandler() +log_console.setLevel(logging.INFO) +formatter = logging.Formatter('[%(asctime)s] %(message)s', + datefmt = '%d %b %Y %H:%M:%S') +log_console.setFormatter(formatter) +logging.getLogger('').addHandler(log_console) + while True: for arch in arches: try: @@ -233,28 +269,30 @@ while True: machines[arch]=now for mach in gone: - print "Removing machine %s" % mach + logging.info("Removing machine %s/%s" % (arch, mach)) # XXX disable from qmgr pollthreads[mach].shutdown=True del pollthreads[mach] for mach in new: - print "Adding machine %s" % mach + logging.info("Adding machine %s/%s" % (arch, mach)) # XXX set up qmgr pc="%s/%s/portbuild.conf" % (pb, arch) pch="%s/%s/portbuild.%s" % (pb, arch, mach) - config = Popen4("test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch)) - host=config.fromchild.readline().rstrip() + cmd = "test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch) + config = subprocess.Popen(cmd, shell = True, + stdout = subprocess.PIPE) + host=config.stdout.readline().rstrip() if not host: host = mach - port=config.fromchild.readline().rstrip() + port=config.stdout.readline().rstrip() try: port = int(port) except (TypeError, ValueError): port = 414 - pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port, queue) + pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port) pollthreads[mach].start() if not polldelay: diff --git a/Tools/portbuild/scripts/ptimeout.c b/Tools/portbuild/scripts/ptimeout.c index 33e9ecc4f3a9..915024ce15df 100644 --- a/Tools/portbuild/scripts/ptimeout.c +++ b/Tools/portbuild/scripts/ptimeout.c @@ -47,7 +47,7 @@ main(int argc, char *argv[]) /*printf("exited child is %d, status is %d\n", child, status);*/ if (pid1 = child) { /*printf("killing process %d\n", pid2);*/ - kill(pid2, SIGTERM); + kill(pid2, SIGKILL); } else { /*printf("killing process %d\n", pid1);*/ kill(pid1, SIGTERM); diff --git a/Tools/portbuild/scripts/reportload b/Tools/portbuild/scripts/reportload index 130348eceb5c..56ec0df5196d 100755 --- a/Tools/portbuild/scripts/reportload +++ b/Tools/portbuild/scripts/reportload @@ -16,7 +16,7 @@ fi error= for i in squid disk; do if [ -f ${scratchdir}/.${i} ]; then - error=${i} "${error}" + error="${i} ${error}" fi done @@ -36,4 +36,4 @@ done echo "buildenvs=${buildenvs}" echo -n "load=" uptime -echo "error=${error}"
\ No newline at end of file +echo "error=${error}" diff --git a/Tools/portbuild/scripts/straslivy.py b/Tools/portbuild/scripts/straslivy.py index b67b75dd2a8d..8a899399a4f2 100755 --- a/Tools/portbuild/scripts/straslivy.py +++ b/Tools/portbuild/scripts/straslivy.py @@ -26,7 +26,7 @@ def getpdispatch(): pid = fields[0] arch = fields[3] branch = fields[4] - port = fields[7].replace('/usr/ports/','') + port = fields[9].replace('/usr/ports/','') line = ' '.join(fields) if len(arch) > archwidth: diff --git a/Tools/portbuild/scripts/zbackup b/Tools/portbuild/scripts/zbackup index c51b9342b5fe..18f806fddbb6 100755 --- a/Tools/portbuild/scripts/zbackup +++ b/Tools/portbuild/scripts/zbackup @@ -16,7 +16,7 @@ backuplist=["a", "a/nfs", "a/src", "a/local", "a/ports", "a/portbuild", backupdir="/dumpster/pointyhat/backup" # How many days between full backups -fullinterval=14 +fullinterval=3 def validate(): fslist = zfs.getallfs() diff --git a/Tools/portbuild/scripts/zclient b/Tools/portbuild/scripts/zclient new file mode 100755 index 000000000000..636aa265598c --- /dev/null +++ b/Tools/portbuild/scripts/zclient @@ -0,0 +1,124 @@ +#!/usr/bin/env python + +# ZFS snapshot client + +import socket, os, sys + +ZSERVER = ('gohan10.freebsd.org', 8888) +ZFSLOCAL = '/tmp/.zserver' + +def connect(): + """ Connects to service, returns (socket, islocal) """ + + if os.path.exists(ZFSLOCAL): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + s.connect(ZFSLOCAL) + return (s, True) + except: + s.close() + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.connect(ZSERVER) + return (s, False) + except: + s.close() + + return (None, None) + +def send(sock, cmd): + """ Send a command, retrieve single line of reply """ + + sock.write(cmd) + sock.flush() + + res= sock.readline() + #print "res = %s" % res + return res + +def error(msg): + print >>sys.stderr, "%s: %s" % (sys.argv[0], msg.rstrip()) + sys.exit(1) + +def do_list(sockfile, islocal, args): + res = send(sockfile, "LIST\n") + if res[0] == "2": + for i in sockfile: + print i.rstrip() + else: + error(res[4:]) + +def do_get(sockfile, islocal, args): + res = send(sockfile, "GET %s %s\n" % (args[0], args[1])) + if res[0] == "2": + while True: + block = sockfile.read(32*1024) + if not block: + break + sys.stdout.write(block) + else: + error(res[4:]) + +def do_diff(sockfile, islocal, args): + res = send(sockfile, "DIFF %s %s %s\n" % (args[0], args[1], args[2])) + if res[0] == "2": + while True: + block = sockfile.read(32*1024) + if not block: + break + sys.stdout.write(block) + else: + error(res[4:]) + +def do_reg(sockfile, islocal, args): + if not sock[1]: + error("must register on local machine") + res = send(sockfile, "REGISTER %s\n" % args[0]) + if res[0] == "2": + print res[4:] + else: + error(res[4:]) + +def do_unreg(sockfile, islocal, args): + if not sock[1]: + error("must register on local machine") + res = send(sockfile, "UNREGISTER %s\n" % args[0]) + + if res[0] == "2": + print res[4:] + else: + error(res[4:]) + +def do_help(sockfile, islocal, args): + for (i, val) in sorted(cmddict.iteritems()): + print "%15s - %s" % (i, val[1]) + +cmddict = {'list':(do_list, 'List available filesystem/snapshot pairs'), + 'get':(do_get, 'Get a snapshot'), + 'diff':(do_diff, 'Get the diffs between two snapshots'), + 'register':(do_reg, 'Register a new filesystem (privileged)'), + 'reg':(do_reg, 'Alias for register'), + 'unregister':(do_unreg, 'Register a new filesystem (privileged)'), + 'unreg':(do_unreg, 'Alias for register'), + 'help':(do_help, 'Display this help')} + +if __name__ == "__main__": + + try: + sock = connect() + except: + raise + sys.exit(1) + + args = sys.argv + + try: + cmd = args[1] + arg = args[2:] +# print "cmd = %s, arg = %s" % (cmd, arg) + cmddict[cmd][0](sock[0].makefile(), sock[1], arg) + except (KeyError, IndexError): + raise + error("No such command\n") + diff --git a/Tools/portbuild/scripts/zsync b/Tools/portbuild/scripts/zsync new file mode 100755 index 000000000000..62f8e2b27e31 --- /dev/null +++ b/Tools/portbuild/scripts/zsync @@ -0,0 +1,38 @@ +#!/bin/sh + +base=/a/cache +zbase=a/cache + +fs=$1 + +rsnap=$(zclient list | grep "^$fs " | tail -1 | awk '{print $2}') +if [ -z "$rsnap" ]; then + echo "No such filesystem $fs" + exit 1 +fi + +lsnap=$(zfs list -Ht snapshot | grep "^$zbase/$fs@" | tail -1 | sed -e "s,^$zbase/$fs@,," | awk '{print $1}') +if [ -z "$lsnap" ]; then + echo "No local snapshot found" + dofull=1 +else + if [ "$lsnap" = "$rsnap" ]; then + exit 0 + fi + # Check for remotve snapshot + if ! (zclient list | grep "^$fs $lsnap " > /dev/null); then + echo "Local snapshot not found, removing and resyncing" + zfs destroy $zbase/$fs@$lsnap + dofull=0 + else + dofull=1 + fi +fi + +if [ "$dofull" = "1" ]; then + zfs destroy -r ${zbase}/${fs} + zclient get ${fs} ${rsnap} | zcat | zfs receive ${zbase}/${fs} +else + zclient diff ${fs} ${lsnap} ${rsnap} | zcat | zfs receive -F ${zbase}/${fs} +fi + |