diff options
author | Andrew Pantyukhin <sat@FreeBSD.org> | 2007-04-23 11:15:06 +0000 |
---|---|---|
committer | Andrew Pantyukhin <sat@FreeBSD.org> | 2007-04-23 11:15:06 +0000 |
commit | 6ffd6b1352cb39afe6d838f198f5084a139fe346 (patch) | |
tree | 536853aed8ae6a855d84264c179881bf34cbd893 | |
parent | d1fb8e2940ef546c53422986f4b9c0efe8b92cbe (diff) | |
download | ports-6ffd6b1352cb39afe6d838f198f5084a139fe346.tar.gz ports-6ffd6b1352cb39afe6d838f198f5084a139fe346.zip |
Notes
-rw-r--r-- | textproc/Makefile | 1 | ||||
-rw-r--r-- | textproc/py-html2text/Makefile | 24 | ||||
-rw-r--r-- | textproc/py-html2text/distinfo | 3 | ||||
-rw-r--r-- | textproc/py-html2text/files/patch-html2text.py | 28 | ||||
-rw-r--r-- | textproc/py-html2text/pkg-descr | 6 |
5 files changed, 62 insertions, 0 deletions
diff --git a/textproc/Makefile b/textproc/Makefile index 44d7418c6ea9..e11675959b39 100644 --- a/textproc/Makefile +++ b/textproc/Makefile @@ -754,6 +754,7 @@ SUBDIR += py-expat SUBDIR += py-feedparser SUBDIR += py-genshi + SUBDIR += py-html2text SUBDIR += py-hyperestraier SUBDIR += py-hyperestraier-python SUBDIR += py-jaxml diff --git a/textproc/py-html2text/Makefile b/textproc/py-html2text/Makefile new file mode 100644 index 000000000000..74bdc623ae12 --- /dev/null +++ b/textproc/py-html2text/Makefile @@ -0,0 +1,24 @@ +# New ports collection makefile for: html2text +# Date created: 23 April 2007 +# Whom: Andrew Pantyukhin <infofarmer@FreeBSD.org> +# +# $FreeBSD$ +# + +PORTNAME= html2text +PORTVERSION= 2.2.8 +CATEGORIES= textproc python +MASTER_SITES= CENKES +PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} + +MAINTAINER= infofarmer@FreeBSD.org +COMMENT= Convert HTML into clean plain ASCII text + +USE_PYTHON= yes +USE_PYDISTUTILS=yes +PLIST_FILES= bin/${PORTNAME}.py + +post-patch: + @${REINPLACE_CMD} -e '1s|.*|#!${PYTHON_CMD}|' ${WRKSRC}/${PORTNAME}.py + +.include <bsd.port.mk> diff --git a/textproc/py-html2text/distinfo b/textproc/py-html2text/distinfo new file mode 100644 index 000000000000..a6f653d2b39b --- /dev/null +++ b/textproc/py-html2text/distinfo @@ -0,0 +1,3 @@ +MD5 (html2text-2.2.8.tar.gz) = 8f84bd9456972ee1cccd2489f8b2535a +SHA256 (html2text-2.2.8.tar.gz) = 81a2304eeb7006f351343e8be59214eac8352ece6ff020fd11028b5a6e10d890 +SIZE (html2text-2.2.8.tar.gz) = 3998 diff --git a/textproc/py-html2text/files/patch-html2text.py b/textproc/py-html2text/files/patch-html2text.py new file mode 100644 index 000000000000..0797a37472fe --- /dev/null +++ b/textproc/py-html2text/files/patch-html2text.py @@ -0,0 +1,28 @@ +--- html2text.py.orig 2007-01-18 19:06:49.000000000 -0500 ++++ html2text.py +@@ -150,7 +150,7 @@ class _html2text(sgmllib.SGMLParser): + self.lastWasNL = 0 + + def outtextf(self, s): +- if type(s) is type(''): s = codecs.utf_8_decode(s)[0] ++ if type(s) is type(''): s = codecs.utf_8_decode(s, "replace")[0] + self.outtext += s + + def close(self): +@@ -259,6 +259,7 @@ class _html2text(sgmllib.SGMLParser): + if attrs.has_key('src'): + attrs['href'] = attrs['src'] + alt = attrs.get('alt', '') ++ alt = re.sub('\n', ' ', alt) + i = self.previousIndex(attrs) + if i is not None: + attrs = self.a[i] +@@ -279,7 +280,7 @@ class _html2text(sgmllib.SGMLParser): + if tag in ["ol", "ul"]: + if start: + self.list.append({'name':tag, 'num':0}) +- else: ++ elif self.list: + if self.list: self.list.pop() + + self.p() diff --git a/textproc/py-html2text/pkg-descr b/textproc/py-html2text/pkg-descr new file mode 100644 index 000000000000..63c320470635 --- /dev/null +++ b/textproc/py-html2text/pkg-descr @@ -0,0 +1,6 @@ +html2text is a Python script that convers a page of HTML into clean, +easy-to-read plain ASCII text. Better yet, that ASCII also happens to +be valid Markdown (a text-to-HTML format). + +WWW: http://www.aaronsw.com/2002/html2text/ +Author: Aaron Swartz <me@aaronsw.com> |