aboutsummaryrefslogtreecommitdiff
path: root/www/p5-HTML-ExtractContent
diff options
context:
space:
mode:
authorJun Kuriyama <kuriyama@FreeBSD.org>2009-03-05 22:55:11 +0000
committerJun Kuriyama <kuriyama@FreeBSD.org>2009-03-05 22:55:11 +0000
commit1283458c3a863b2cd68cc37e98b3e574c0ce0f03 (patch)
tree31f39fe53fa59a1e925c29080db2afe0b23b586b /www/p5-HTML-ExtractContent
parent93c8f25a7645d01ede0bf1d880e679bfc8094ad5 (diff)
downloadports-1283458c3a863b2cd68cc37e98b3e574c0ce0f03.tar.gz
ports-1283458c3a863b2cd68cc37e98b3e574c0ce0f03.zip
Notes
Diffstat (limited to 'www/p5-HTML-ExtractContent')
-rw-r--r--www/p5-HTML-ExtractContent/Makefile27
-rw-r--r--www/p5-HTML-ExtractContent/distinfo3
-rw-r--r--www/p5-HTML-ExtractContent/pkg-descr11
-rw-r--r--www/p5-HTML-ExtractContent/pkg-plist5
4 files changed, 46 insertions, 0 deletions
diff --git a/www/p5-HTML-ExtractContent/Makefile b/www/p5-HTML-ExtractContent/Makefile
new file mode 100644
index 000000000000..504e59fef3e5
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/Makefile
@@ -0,0 +1,27 @@
+# New ports collection makefile for: HTML::ExtractContent
+# Date created: 05 Mar 2009
+# Whom: Jun Kuriyama <kuriyama@FreeBSD.org>
+#
+# $FreeBSD$
+#
+
+PORTNAME= HTML-ExtractContent
+PORTVERSION= 0.05
+CATEGORIES= www perl5
+MASTER_SITES= CPAN
+PKGNAMEPREFIX= p5-
+
+MAINTAINER= kuriyama@FreeBSD.org
+COMMENT= Perl extension for HTML content extractor with scoring heuristics
+
+RUN_DEPENDS= \
+ p5-Class-Accessor-Lvalue>0:${PORTSDIR}/devel/p5-Class-Accessor-Lvalue \
+ p5-Exporter-Lite>0:${PORTSDIR}/devel/p5-Exporter-Lite \
+ p5-HTML-Parser>0:${PORTSDIR}/www/p5-HTML-Parser
+BUILD_DEPENDS= ${RUN_DEPENDS}
+
+PERL_CONFIGURE= yes
+
+MAN3= HTML::ExtractContent.3
+
+.include <bsd.port.mk>
diff --git a/www/p5-HTML-ExtractContent/distinfo b/www/p5-HTML-ExtractContent/distinfo
new file mode 100644
index 000000000000..1c41ee285394
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/distinfo
@@ -0,0 +1,3 @@
+MD5 (HTML-ExtractContent-0.05.tar.gz) = 95c0f8be7624a4e71de6b7b3a0fe362b
+SHA256 (HTML-ExtractContent-0.05.tar.gz) = 973950b6445b9644d71caa79787cb4753ed75ec296d31ee5d6df9494491ac85f
+SIZE (HTML-ExtractContent-0.05.tar.gz) = 25899
diff --git a/www/p5-HTML-ExtractContent/pkg-descr b/www/p5-HTML-ExtractContent/pkg-descr
new file mode 100644
index 000000000000..16155a386c75
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/pkg-descr
@@ -0,0 +1,11 @@
+HTML::ExtractContent is a module for extracting content from HTML with
+scoring heuristics.
+
+It guesses which block of HTML looks like content according to scores
+depending on the amount of punctuation marks and the lengths of non-tag
+texts.
+
+It also guesses whether content end in the block or continue to the next
+block.
+
+WWW: http://search.cpan.org/dist/HTML-ExtractContent/
diff --git a/www/p5-HTML-ExtractContent/pkg-plist b/www/p5-HTML-ExtractContent/pkg-plist
new file mode 100644
index 000000000000..b78c74786f45
--- /dev/null
+++ b/www/p5-HTML-ExtractContent/pkg-plist
@@ -0,0 +1,5 @@
+%%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent/.packlist
+%%SITE_PERL%%/HTML/ExtractContent.pm
+%%SITE_PERL%%/HTML/ExtractContent/Util.pm
+@dirrm %%SITE_PERL%%/HTML/ExtractContent
+@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/HTML/ExtractContent