diff options
author | Jun Kuriyama <kuriyama@FreeBSD.org> | 2009-03-05 22:55:11 +0000 |
---|---|---|
committer | Jun Kuriyama <kuriyama@FreeBSD.org> | 2009-03-05 22:55:11 +0000 |
commit | 1283458c3a863b2cd68cc37e98b3e574c0ce0f03 (patch) | |
tree | 31f39fe53fa59a1e925c29080db2afe0b23b586b /www/p5-HTML-ExtractContent | |
parent | 93c8f25a7645d01ede0bf1d880e679bfc8094ad5 (diff) | |
download | ports-1283458c3a863b2cd68cc37e98b3e574c0ce0f03.tar.gz ports-1283458c3a863b2cd68cc37e98b3e574c0ce0f03.zip |
Notes
Diffstat (limited to 'www/p5-HTML-ExtractContent')
-rw-r--r-- | www/p5-HTML-ExtractContent/Makefile | 27 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/distinfo | 3 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/pkg-descr | 11 | ||||
-rw-r--r-- | www/p5-HTML-ExtractContent/pkg-plist | 5 |
4 files changed, 46 insertions, 0 deletions
diff --git a/www/p5-HTML-ExtractContent/Makefile b/www/p5-HTML-ExtractContent/Makefile new file mode 100644 index 000000000000..504e59fef3e5 --- /dev/null +++ b/www/p5-HTML-ExtractContent/Makefile @@ -0,0 +1,27 @@ +# New ports collection makefile for: HTML::ExtractContent +# Date created: 05 Mar 2009 +# Whom: Jun Kuriyama <kuriyama@FreeBSD.org> +# +# $FreeBSD$ +# + +PORTNAME= HTML-ExtractContent +PORTVERSION= 0.05 +CATEGORIES= www perl5 +MASTER_SITES= CPAN +PKGNAMEPREFIX= p5- + +MAINTAINER= kuriyama@FreeBSD.org +COMMENT= Perl extension for HTML content extractor with scoring heuristics + +RUN_DEPENDS= \ + p5-Class-Accessor-Lvalue>0:${PORTSDIR}/devel/p5-Class-Accessor-Lvalue \ + p5-Exporter-Lite>0:${PORTSDIR}/devel/p5-Exporter-Lite \ + p5-HTML-Parser>0:${PORTSDIR}/www/p5-HTML-Parser +BUILD_DEPENDS= ${RUN_DEPENDS} + +PERL_CONFIGURE= yes + +MAN3= HTML::ExtractContent.3 + +.include <bsd.port.mk> diff --git a/www/p5-HTML-ExtractContent/distinfo b/www/p5-HTML-ExtractContent/distinfo new file mode 100644 index 000000000000..1c41ee285394 --- /dev/null +++ b/www/p5-HTML-ExtractContent/distinfo @@ -0,0 +1,3 @@ +MD5 (HTML-ExtractContent-0.05.tar.gz) = 95c0f8be7624a4e71de6b7b3a0fe362b +SHA256 (HTML-ExtractContent-0.05.tar.gz) = 973950b6445b9644d71caa79787cb4753ed75ec296d31ee5d6df9494491ac85f +SIZE (HTML-ExtractContent-0.05.tar.gz) = 25899 diff --git a/www/p5-HTML-ExtractContent/pkg-descr b/www/p5-HTML-ExtractContent/pkg-descr new file mode 100644 index 000000000000..16155a386c75 --- /dev/null +++ b/www/p5-HTML-ExtractContent/pkg-descr @@ -0,0 +1,11 @@ +HTML::ExtractContent is a module for extracting content from HTML with +scoring heuristics. + +It guesses which block of HTML looks like content according to scores +depending on the amount of punctuation marks and the lengths of non-tag +texts. + +It also guesses whether content end in the block or continue to the next +block. + +WWW: http://search.cpan.org/dist/HTML-ExtractContent/ diff --git a/www/p5-HTML-ExtractContent/pkg-plist b/www/p5-HTML-ExtractContent/pkg-plist new file mode 100644 index 000000000000..b78c74786f45 --- /dev/null +++ b/www/p5-HTML-ExtractContent/pkg-plist @@ -0,0 +1,5 @@ +%%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent/.packlist +%%SITE_PERL%%/HTML/ExtractContent.pm +%%SITE_PERL%%/HTML/ExtractContent/Util.pm +@dirrm %%SITE_PERL%%/HTML/ExtractContent +@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent |