diff options
author | David W. Chapman Jr. <dwcjr@FreeBSD.org> | 2001-06-23 16:09:53 +0000 |
---|---|---|
committer | David W. Chapman Jr. <dwcjr@FreeBSD.org> | 2001-06-23 16:09:53 +0000 |
commit | 302ddeb73e4485411c6ccb4dbc8d317879222dba (patch) | |
tree | 821fd4b1cec4d66d83d4ac4c891d8fc73127f184 /www | |
parent | cb1d95a5762937e7d8a54e4d881ec7d453e843c0 (diff) | |
download | ports-302ddeb73e4485411c6ccb4dbc8d317879222dba.tar.gz ports-302ddeb73e4485411c6ccb4dbc8d317879222dba.zip |
Notes
Diffstat (limited to 'www')
-rw-r--r-- | www/Makefile | 1 | ||||
-rw-r--r-- | www/crawl/Makefile | 25 | ||||
-rw-r--r-- | www/crawl/distinfo | 1 | ||||
-rw-r--r-- | www/crawl/files/patch-configure.in | 17 | ||||
-rw-r--r-- | www/crawl/pkg-comment | 1 | ||||
-rw-r--r-- | www/crawl/pkg-descr | 23 | ||||
-rw-r--r-- | www/crawl/pkg-plist | 1 |
7 files changed, 69 insertions, 0 deletions
diff --git a/www/Makefile b/www/Makefile index d754a85572be..340e3c0652c6 100644 --- a/www/Makefile +++ b/www/Makefile @@ -47,6 +47,7 @@ SUBDIR += chimera2 SUBDIR += comclear SUBDIR += comline + SUBDIR += crawl SUBDIR += css-mode.el SUBDIR += decss SUBDIR += demoroniser diff --git a/www/crawl/Makefile b/www/crawl/Makefile new file mode 100644 index 000000000000..e25ae90ec559 --- /dev/null +++ b/www/crawl/Makefile @@ -0,0 +1,25 @@ +# New ports collection makefile for: crawl +# Date created: 20 June 2001 +# Whom: Pete Fritchman <petef@databits.net> +# +# $FreeBSD$ +# + +PORTNAME= crawl +PORTVERSION= 0.1 +CATEGORIES= www +MASTER_SITES= http://www.monkey.org/~provos/ + +MAINTAINER= petef@databits.net + +BUILD_DEPENDS= ${LOCALBASE}/lib/libevent.a:${PORTSDIR}/devel/libevent + +WRKSRC= ${WRKDIR}/${PORTNAME} + +USE_AUTOCONF= yes +GNU_CONFIGURE= yes +CONFIGURE_ARGS= --with-libevent=${LOCALBASE} + +MAN1= crawl.1 + +.include <bsd.port.mk> diff --git a/www/crawl/distinfo b/www/crawl/distinfo new file mode 100644 index 000000000000..80abbc95dafe --- /dev/null +++ b/www/crawl/distinfo @@ -0,0 +1 @@ +MD5 (crawl-0.1.tar.gz) = 93df9d0e6534bc4fc462950c023ec2e7 diff --git a/www/crawl/files/patch-configure.in b/www/crawl/files/patch-configure.in new file mode 100644 index 000000000000..9de236fb45a9 --- /dev/null +++ b/www/crawl/files/patch-configure.in @@ -0,0 +1,17 @@ +--- configure.in.orig Wed Jun 20 14:41:44 2001 ++++ configure.in Wed Jun 20 17:30:07 2001 +@@ -38,11 +38,11 @@ + ;; + *) + AC_MSG_RESULT($withval) +- if test -f $withval/event.h -a -f $withval/libevent.a; then ++ if test -f $withval/include/event.h -a -f $withval/lib/libevent.a; then + owd=`pwd` + if cd $withval; then withval=`pwd`; cd $owd; fi +- EVENTINC="-I$withval" +- EVENTLIB="-L$withval -levent" ++ EVENTINC="-I$withval/include" ++ EVENTLIB="-L$withval/lib -levent" + else + AC_ERROR(event.h or libevent.a not found in $withval) + fi diff --git a/www/crawl/pkg-comment b/www/crawl/pkg-comment new file mode 100644 index 000000000000..16dd9e5120d0 --- /dev/null +++ b/www/crawl/pkg-comment @@ -0,0 +1 @@ +A small, efficient web crawler with advanced features diff --git a/www/crawl/pkg-descr b/www/crawl/pkg-descr new file mode 100644 index 000000000000..96361c6087a2 --- /dev/null +++ b/www/crawl/pkg-descr @@ -0,0 +1,23 @@ +The crawl utility starts a depth-first traversal of the web at the +specified URLs. It stores all JPEG images that match the configured +constraints. Crawl is fairly fast and allows for graceful termination. +After terminating crawl, it is possible to restart it at exactly +the same spot where it was terminated. Crawl keeps a persistent +database that allows multiple crawls without revisiting sites. + +The main reason for writing crawl was the lack of simple open source +web crawlers. Crawl is only a few thousand lines of code and fairly +easy to debug and customize. + +Some of the main features: + - Saves encountered JPEG images + - Image selection based on regular expressions and size contrainsts + - Resume previous crawl after graceful termination + - Persistent database of visited URLs + - Very small and efficient code + - Supports robots.txt + +WWW: http://www.monkey.org/~provos/crawl/ + +- Pete +petef@databits.net diff --git a/www/crawl/pkg-plist b/www/crawl/pkg-plist new file mode 100644 index 000000000000..1cdd09ea5311 --- /dev/null +++ b/www/crawl/pkg-plist @@ -0,0 +1 @@ +bin/crawl |