From 42e9a79423d9837b343f3833971a6ca643d3a327 Mon Sep 17 00:00:00 2001 From: Michael Scheidell Date: Sat, 7 Apr 2012 16:34:45 +0000 Subject: - Modern revamping japanese/p5-Mail-SpamAssassin to work as a slave port [1] - Bump PORTREVISION to follow mail/p5-Mail-SpamAssassin [1] - tweak files/*.plist to check for and optionally save tokenizer.pre [2] PR: ports/165765 [1] Submitted by: Masaki TAGAWA (maintainer) Reviewed by: scheidell (me) [2] Feature safe: yes --- japanese/p5-Mail-SpamAssassin/Makefile | 340 +----- japanese/p5-Mail-SpamAssassin/distinfo | 8 - .../files/patch-rules-local.cf | 15 - .../p5-Mail-SpamAssassin/files/patch-sa-learn.raw | 27 - japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in | 43 - .../files/spamassassin-3.3.2-ja-1.patch | 1148 ++++++++++++++++++++ .../files/spamassassin-3.3.2-ja-1.plist | 7 + japanese/p5-Mail-SpamAssassin/files/tokenizer.pre | 8 + japanese/p5-Mail-SpamAssassin/pkg-deinstall | 17 - japanese/p5-Mail-SpamAssassin/pkg-descr | 14 - japanese/p5-Mail-SpamAssassin/pkg-install | 66 -- japanese/p5-Mail-SpamAssassin/pkg-message | 4 +- japanese/p5-Mail-SpamAssassin/pkg-plist | 161 --- 13 files changed, 1187 insertions(+), 671 deletions(-) delete mode 100644 japanese/p5-Mail-SpamAssassin/distinfo delete mode 100644 japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf delete mode 100644 japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw delete mode 100644 japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in create mode 100644 japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch create mode 100644 japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist create mode 100644 japanese/p5-Mail-SpamAssassin/files/tokenizer.pre delete mode 100644 japanese/p5-Mail-SpamAssassin/pkg-deinstall delete mode 100644 japanese/p5-Mail-SpamAssassin/pkg-descr delete mode 100644 japanese/p5-Mail-SpamAssassin/pkg-install delete mode 100644 japanese/p5-Mail-SpamAssassin/pkg-plist (limited to 'japanese') diff --git a/japanese/p5-Mail-SpamAssassin/Makefile b/japanese/p5-Mail-SpamAssassin/Makefile index bad670021dac..45575daa95cb 100644 --- a/japanese/p5-Mail-SpamAssassin/Makefile +++ b/japanese/p5-Mail-SpamAssassin/Makefile @@ -5,339 +5,43 @@ # $FreeBSD$ # -PORTNAME= Mail-SpamAssassin -PORTVERSION= 3.3.2 -PORTREVISION= 1 +PORTREVISION= 2 CATEGORIES= japanese mail perl5 -MASTER_SITES= ${MASTER_SITE_APACHE:S/$/:apache/} ${MASTER_SITE_PERL_CPAN:S/$/:cpan/} \ - ${PATCH_SITES} -MASTER_SITE_SUBDIR= spamassassin/source/:apache Mail/:cpan PKGNAMEPREFIX= ja-p5- -DISTFILES= ${DISTNAME}${EXTRACT_SUFX}:apache,cpan \ - ${TOKENIZER_PRE}:JA ${DOCJA}:JA -DIST_SUBDIR= ja-spamassassin -EXTRACT_ONLY= ${DISTNAME}${EXTRACT_SUFX} - -PATCH_SITES= http://spamassassin.emaillab.jp/pub/ja-patch/sa${PORTVERSION:R}/:JA -PATCHFILES= spamassassin-${PORTVERSION}-ja-${PATCHLEVEL}.patch:JA -PATCHLEVEL= 1 -PATCH_DIST_STRIP= -p1 MAINTAINER= masaki@club.kyutech.ac.jp -COMMENT= SpamAssassin with Japanese tokenizer - -RUN_DEPENDS= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \ - p5-Net-DNS>=0.63:${PORTSDIR}/dns/p5-Net-DNS \ - p5-HTML-Parser>=3.46:${PORTSDIR}/www/p5-HTML-Parser \ - p5-libwww>=0:${PORTSDIR}/www/p5-libwww \ - p5-Encode-Detect>=0:${PORTSDIR}/converters/p5-Encode-Detect \ - p5-Mail-Tools>=0:${PORTSDIR}/mail/p5-Mail-Tools \ - ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab -BUILD_DEPENDS= p5-NetAddr-IP>=4.00.7:${PORTSDIR}/net-mgmt/p5-NetAddr-IP \ - p5-Net-DNS>=0.63:${PORTSDIR}/dns/p5-Net-DNS \ - p5-HTML-Parser>=3.46:${PORTSDIR}/www/p5-HTML-Parser \ - p5-libwww>=0:${PORTSDIR}/www/p5-libwww \ - p5-Encode-Detect>=0:${PORTSDIR}/converters/p5-Encode-Detect \ - p5-Mail-Tools>=0:${PORTSDIR}/mail/p5-Mail-Tools - -CONFLICTS= p5-Mail-SpamAssassin-[0-9]* - -PERL_CONFIGURE= yes -USE_PERL5_RUN= 5.8.8+ -USE_LDCONFIG= yes -CONFIGURE_ARGS= SYSCONFDIR="${PREFIX}/etc" \ - CONTACT_ADDRESS="${CONTACT_ADDRESS}" \ - LOCALSTATEDIR="/var/db/spamassassin" - -USERS= spamd -GROUPS= spamd - -# You can override it if you like -CONTACT_ADDRESS?= The administrator of that system - -OPTIONS= AS_ROOT "Run spamd as root (recommended)" on \ - SPAMC "Build spamd/spamc (not for amavisd)" on \ - SACOMPILE "sa-compile" off \ - DKIM "DKIM/DomainKeys Identified Mail" on \ - SSL "Build with SSL support for spamd/spamc" on \ - GNUPG "Install GnuPG (for sa-update)" on \ - MYSQL "Add MySQL support" off \ - PGSQL "Add PostreSQL support" off \ - RAZOR "Add Vipul's Razor support" on \ - SPF_QUERY "Add SPF query support" off \ - RELAY_COUNTRY "Relay country support" off \ - DCC "Add DCC support (see LICENSE)" off - -.if !defined(WITHOUT_SSL) -USE_OPENSSL= yes -.endif - -.include - -.if ${PERL_LEVEL} < 500903 -RUN_DEPENDS+= p5-IO-Compress>=2.017:${PORTSDIR}/archivers/p5-IO-Compress -.endif - -.if ${PERL_LEVEL} < 501000 -RUN_DEPENDS+= p5-Archive-Tar>=1.23:${PORTSDIR}/archivers/p5-Archive-Tar \ - p5-IO-Zlib>=1.04:${PORTSDIR}/archivers/p5-IO-Zlib \ - p5-Test-Harness>=3.16:${PORTSDIR}/devel/p5-Test-Harness -.endif - -.if defined (WITH_SPAMC) -CONFIGURE_ARGS+= BUILD_SPAMC=yes -.else -CONFIGURE_ARGS+= BUILD_SPAMC=no -WITH_AS_ROOT= -WITHOUT_SSL=1 -.endif - -.if defined(WITH_SPF_QUERY) -RUN_DEPENDS+= ${SITE_PERL}/Mail/SPF.pm:${PORTSDIR}/mail/p5-Mail-SPF -.endif -.if !defined(WITHOUT_IPV6) -RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/INET6.pm:${PORTSDIR}/net/p5-IO-Socket-INET6 -.endif - -.if !defined(WITHOUT_SSL) -.include "${PORTSDIR}/Mk/bsd.openssl.mk" -RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/SSL.pm:${PORTSDIR}/security/p5-IO-Socket-SSL -CFLAGS+= -I${OPENSSLINC} -LDFLAGS+= -L${OPENSSLLIB} -CONFIGURE_ARGS+= ENABLE_SSL=yes -PLIST_SUB+= SSL="" -.else -CONFIGURE_ARGS+= ENABLE_SSL=no -PLIST_SUB+= SSL="@comment " -.endif - -.if !defined(WITHOUT_GNUPG) -RUN_DEPENDS+= gnupg>=1.4.7:${PORTSDIR}/security/gnupg -.endif +COMMENT= SpamAssassin with paches to handle multibyte character -.if defined(WITH_MYSQL) -RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/DBD/mysql.pm:${PORTSDIR}/databases/p5-DBD-mysql -.endif +LICENSE= AL2 -.if defined(WITH_PGSQL) -RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/DBD/Pg.pm:${PORTSDIR}/databases/p5-DBD-Pg -.endif +MASTERDIR= ${.CURDIR}/../../mail/p5-Mail-SpamAssassin -.if defined(WITH_RAZOR) -RUN_DEPENDS+= razor-agents>=2.84:${PORTSDIR}/mail/razor-agents -.else -.if ${PERL_LEVEL} < 501000 -.if !defined(WITH_DKIM) -RUN_DEPENDS+= p5-Digest-SHA1>=2.11:${PORTSDIR}/security/p5-Digest-SHA1 -.endif -.endif -.endif +RUN_DEPENDS+= ja-p5-MeCab>=0.98:${PORTSDIR}/japanese/p5-MeCab -.if defined(WITH_DKIM) -RUN_DEPENDS+= ${SITE_PERL}/IO/Socket/SSL.pm:${PORTSDIR}/security/p5-IO-Socket-SSL -. if ${PERL_LEVEL} < 501000 -RUN_DEPENDS+= ${SITE_PERL}/${PERL_ARCH}/Digest/SHA.pm:${PORTSDIR}/security/p5-Digest-SHA -. endif -RUN_DEPENDS+= p5-Mail-DKIM>=0.37:${PORTSDIR}/mail/p5-Mail-DKIM -RUN_DEPENDS+= p5-Crypt-OpenSSL-RSA>=0.26_1:${PORTSDIR}/security/p5-Crypt-OpenSSL-RSA -.endif - -.if defined(WITH_SACOMPILE) -RUN_DEPENDS+= re2c>=.12.0:${PORTSDIR}/devel/re2c -.endif - -.if defined(WITH_RELAY_COUNTRY) -RUN_DEPENDS+= ${SITE_PERL}/IP/Country/Fast.pm:${PORTSDIR}/net/p5-IP-Country -.endif - -.if defined(WITH_DCC) -RUN_DEPENDS+= dcc-dccd>=1.3.111:${PORTSDIR}/mail/dcc-dccd -.endif +CONFLICTS= p5-Mail-SpamAssassin-[0-9]* -MAN3= Mail::SpamAssassin.3 \ - Mail::SpamAssassin::AICache.3 \ - Mail::SpamAssassin::ArchiveIterator.3 \ - Mail::SpamAssassin::AsyncLoop.3 \ - Mail::SpamAssassin::AutoWhitelist.3 \ - Mail::SpamAssassin::Bayes.3 \ - Mail::SpamAssassin::BayesStore.3 \ - Mail::SpamAssassin::BayesStore::BDB.3 \ - Mail::SpamAssassin::BayesStore::MySQL.3 \ - Mail::SpamAssassin::BayesStore::PgSQL.3 \ - Mail::SpamAssassin::BayesStore::SQL.3 \ - Mail::SpamAssassin::Client.3 \ - Mail::SpamAssassin::Conf.3 \ - Mail::SpamAssassin::Conf::LDAP.3 \ - Mail::SpamAssassin::Conf::Parser.3 \ - Mail::SpamAssassin::Conf::SQL.3 \ - Mail::SpamAssassin::DnsResolver.3 \ - Mail::SpamAssassin::Logger.3 \ - Mail::SpamAssassin::Logger::File.3 \ - Mail::SpamAssassin::Logger::Stderr.3 \ - Mail::SpamAssassin::Logger::Syslog.3 \ - Mail::SpamAssassin::Message.3 \ - Mail::SpamAssassin::Message::Metadata.3 \ - Mail::SpamAssassin::Message::Node.3 \ - Mail::SpamAssassin::PerMsgLearner.3 \ - Mail::SpamAssassin::PerMsgStatus.3 \ - Mail::SpamAssassin::PersistentAddrList.3 \ - Mail::SpamAssassin::Plugin.3 \ - Mail::SpamAssassin::Plugin::ASN.3 \ - Mail::SpamAssassin::Plugin::AWL.3 \ - Mail::SpamAssassin::Plugin::AccessDB.3 \ - Mail::SpamAssassin::Plugin::AntiVirus.3 \ - Mail::SpamAssassin::Plugin::AutoLearnThreshold.3 \ - Mail::SpamAssassin::Plugin::Bayes.3 \ - Mail::SpamAssassin::Plugin::BodyRuleBaseExtractor.3 \ - Mail::SpamAssassin::Plugin::Check.3 \ - Mail::SpamAssassin::Plugin::DCC.3 \ - Mail::SpamAssassin::Plugin::DKIM.3 \ - Mail::SpamAssassin::Plugin::Hashcash.3 \ - Mail::SpamAssassin::Plugin::MIMEHeader.3 \ - Mail::SpamAssassin::Plugin::OneLineBodyRuleType.3 \ - Mail::SpamAssassin::Plugin::PhishTag.3 \ - Mail::SpamAssassin::Plugin::Pyzor.3 \ - Mail::SpamAssassin::Plugin::Razor2.3 \ - Mail::SpamAssassin::Plugin::RelayCountry.3 \ - Mail::SpamAssassin::Plugin::ReplaceTags.3 \ - Mail::SpamAssassin::Plugin::Reuse.3 \ - Mail::SpamAssassin::Plugin::Rule2XSBody.3 \ - Mail::SpamAssassin::Plugin::SPF.3 \ - Mail::SpamAssassin::Plugin::Shortcircuit.3 \ - Mail::SpamAssassin::Plugin::SpamCop.3 \ - Mail::SpamAssassin::Plugin::Test.3 \ - Mail::SpamAssassin::Plugin::TextCat.3 \ - Mail::SpamAssassin::Plugin::Tokenizer.3 \ - Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3 \ - Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3 \ - Mail::SpamAssassin::Plugin::URIDNSBL.3 \ - Mail::SpamAssassin::Plugin::URIDetail.3 \ - Mail::SpamAssassin::Plugin::VBounce.3 \ - Mail::SpamAssassin::Plugin::WhiteListSubject.3 \ - Mail::SpamAssassin::PluginHandler.3 \ - Mail::SpamAssassin::SQLBasedAddrList.3 \ - Mail::SpamAssassin::SubProcBackChannel.3 \ - Mail::SpamAssassin::Timeout.3 \ - Mail::SpamAssassin::Util.3 \ - Mail::SpamAssassin::Util::Charset.3 \ - Mail::SpamAssassin::Util::DependencyInfo.3 \ - Mail::SpamAssassin::Util::Progress.3 \ - Mail::SpamAssassin::Util::RegistrarBoundaries.3 \ - spamassassin-run.3 +EXTRA_PATCHES= ${.CURDIR}/files/spamassassin-3.3.2-ja-1.patch -MAN1= spamd.1 spamassassin.1 spamc.1 sa-learn.1 sa-update.1 \ - spamassassin-run.1 sa-compile.1 sa-awl.1 +PKGMESSAGE= ${.CURDIR}/pkg-message +PLIST= ${WRKDIR}/pkg-plist -DOCSDIR= ${PREFIX}/share/doc/${PKGNAMEPREFIX}${PORTNAME} -DATADIR= ${PREFIX}/share/spamassassin -DOCS= CREDITS Changes INSTALL LICENSE NOTICE PACKAGING README TRADEMARK UPGRADE USAGE procmailrc.example -DOCSSQL= README README.awl README.bayes awl_mysql.sql awl_pg.sql bayes_mysql.sql bayes_pg.sql userpref_mysql.sql userpref_pg.sql -DOCSLDAP= README README.testing sa_test.ldif -PORTDOCS= ${DOCS} sql ldap ${DOCJA} -DOCJA= ${PATCHFILES:S/.patch:JA/.txt/} TOKENIZER_PRE= tokenizer.pre -USE_RC_SUBR= sa-spamd.sh - -.if defined(WITH_MYSQL) || defined(WITH_PGSQL) -SUB_LIST+= SQL_FLAG="-Q" -.else -SUB_LIST+= SQL_FLAG="" -.endif -.if !defined(WITH_AS_ROOT) -SUB_LIST+= RUN_AS_USER="-u ${USERS} -H /var/spool/spamd" -.else -SUB_LIST+= RUN_AS_USER="" -.endif - -post-patch: - @${FIND} ${WRKSRC} -name \*.orig -delete - @${REINPLACE_CMD} -e 's#B_CONFDIR)/local.cf#B_CONFDIR)/local.cf.sample#g' \ - -e 's#B_CONFDIR)/init.pre#B_CONFDIR)/init.pre.sample#g' \ - -e 's#B_CONFDIR)/v310.pre#B_CONFDIR)/v310.pre.sample#g' \ - -e 's#B_CONFDIR)/v312.pre#B_CONFDIR)/v312.pre.sample#g' \ - -e 's#B_CONFDIR)/v320.pre#B_CONFDIR)/v320.pre.sample#g' \ - -e 's#B_CONFDIR)/v330.pre#B_CONFDIR)/v330.pre.sample#g' \ - -e 's/require DBI/0/' \ - ${WRKSRC}/Makefile.PL - @${REINPLACE_CMD} -e '/^CC =/d; \ - s|@SSLCFLAGS@|& $${CFLAGS}|g' ${WRKSRC}/spamc/Makefile.in +PLIST_SUB+= TOKENIZER_PRE=${TOKENIZER_PRE} -.if defined(WITH_RAZOR) - ${REINPLACE_CMD} -e '/Razor2/s/^#loadplugin/loadplugin/' ${WRKSRC}/rules/v312.pre -.endif -.if defined(WITH_RELAY_COUNTRY) - ${REINPLACE_CMD} -e '/RelayCountry/s/^# ?loadplugin/loadplugin/' ${WRKSRC}/rules/init.pre -.endif -.if !defined(WITH_DKIM) - ${REINPLACE_CMD} -e '/DKIM/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/v312.pre -.endif -.if !defined(WITH_SPF_QUERY) - ${REINPLACE_CMD} -e '/SPF/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/init.pre -.endif -.if defined(WITH_DCC) - ${REINPLACE_CMD} -e '/DCC/s/^#loadplugin/loadplugin/' ${WRKSRC}/rules/v310.pre -.endif -.if !defined(WITH_AWL) - ${REINPLACE_CMD} -e '/AWL/s/^loadplugin/#loadplugin/' ${WRKSRC}/rules/v310.pre -.endif -.if defined(WITH_SACOMPILE) - ${REINPLACE_CMD} -e '/Rule2XSBody/s/^# loadplugin/loadplugin/' ${WRKSRC}/rules/v320.pre -.endif +MAN3= Mail::SpamAssassin::Util::Charset.3 \ + Mail::SpamAssassin::Plugin::Tokenizer::MeCab.3 \ + Mail::SpamAssassin::Plugin::Tokenizer.3 \ + Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA.3 pre-install: - @${MKDIR} ${DATADIR} - -post-build: - @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libspamc.so) -.if !defined(WITHOUT_SSL) - @(cd ${BUILD_WRKSRC}; ${SETENV} ${MAKE_ENV} ${MAKE} ${MAKE_FLAGS} ${MAKEFILE} ${MAKE_ARGS} spamc/libsslspamc.so) -.endif - -pre-su-install: - @USER=${USERS} GROUP=${GROUPS} ${SH} ${PKGINSTALL} ${PKGNAME} PRE-INSTALL - @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libspamc.so ${PREFIX}/lib/libspamc.so.0 - @${LN} -sf libspamc.so.0 ${PREFIX}/lib/libspamc.so -.if !defined(WITHOUT_SSL) - @${INSTALL_PROGRAM} ${WRKSRC}/spamc/libsslspamc.so ${PREFIX}/lib/libsslspamc.so.0 - @${LN} -sf libsslspamc.so.0 ${PREFIX}/lib/libsslspamc.so -.endif - @${INSTALL_DATA} ${WRKSRC}/spamc/libspamc.h ${PREFIX}/include - -post-install: -.if defined (WITH_SPAMC) - @${STRIP_CMD} ${PREFIX}/bin/spamc -.endif - @[ -f ${PREFIX}/etc/mail/spamassassin/init.pre ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/init.pre.sample \ - ${PREFIX}/etc/mail/spamassassin/init.pre - @[ -f ${PREFIX}/etc/mail/spamassassin/v310.pre ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/v310.pre.sample \ - ${PREFIX}/etc/mail/spamassassin/v310.pre - @[ -f ${PREFIX}/etc/mail/spamassassin/v312.pre ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/v312.pre.sample \ - ${PREFIX}/etc/mail/spamassassin/v312.pre - @[ -f ${PREFIX}/etc/mail/spamassassin/v320.pre ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/v320.pre.sample \ - ${PREFIX}/etc/mail/spamassassin/v320.pre - @PKG_PREFIX=${PREFIX} BATCH=${BATCH} SU_CMD="${SU_CMD}" USER=${USERS} GROUP=${GROUPS} ${SH} ${PKGDIR}/pkg-install ${PKGNAME} POST-INSTALL - @[ -f ${PREFIX}/etc/mail/spamassassin/v330.pre ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/v330.pre.sample \ - ${PREFIX}/etc/mail/spamassassin/v330.pre - - @${CP} ${DISTDIR}/${DIST_SUBDIR}/${TOKENIZER_PRE} ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE}.sample - @[ -f ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE} ] || \ - ${CP} ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE}.sample \ - ${PREFIX}/etc/mail/spamassassin/${TOKENIZER_PRE} - -.if !defined(NOPORTDOCS) - @${MKDIR} ${DOCSDIR} ${DOCSDIR}/sql ${DOCSDIR}/ldap - @${INSTALL_DATA} ${DOCS:S|^|${WRKSRC}/|} ${DOCSDIR} - @${INSTALL_DATA} ${DOCSSQL:S|^|${WRKSRC}/sql/|} ${DOCSDIR}/sql - @${INSTALL_DATA} ${DOCSLDAP:S|^|${WRKSRC}/ldap/|} ${DOCSDIR}/ldap - @${INSTALL_DATA} ${DISTDIR}/${DIST_SUBDIR}/${DOCJA} ${DOCSDIR} + @${CAT} ${EXTRA_PATCHES:S/.patch/.plist/} > ${PLIST} + @${CAT} ${PKGDIR}/pkg-plist >> ${PLIST} -.endif - @${SED} -e 's#PREFIX#${PREFIX}#' ${PKGMESSAGE} +post-install:: + @${CP} ${.CURDIR}/files/${TOKENIZER_PRE} ${ETCDIR}/${TOKENIZER_PRE}.sample + @[ -f ${ETCDIR}/${TOKENIZER_PRE} ] || \ + ${INSTALL_DATA} ${ETCDIR}/${TOKENIZER_PRE}.sample \ + ${ETCDIR}/${TOKENIZER_PRE} -.include +.include "${MASTERDIR}/Makefile" diff --git a/japanese/p5-Mail-SpamAssassin/distinfo b/japanese/p5-Mail-SpamAssassin/distinfo deleted file mode 100644 index b9842b70795a..000000000000 --- a/japanese/p5-Mail-SpamAssassin/distinfo +++ /dev/null @@ -1,8 +0,0 @@ -SHA256 (ja-spamassassin/Mail-SpamAssassin-3.3.2.tar.gz) = 5323038939a0ef9fc97d5264defce3ae1d95e98b3a94c4c3b583341c927f32df -SIZE (ja-spamassassin/Mail-SpamAssassin-3.3.2.tar.gz) = 1208182 -SHA256 (ja-spamassassin/tokenizer.pre) = 9f8e30a8449fd13d571427ea30a252b4b275f153bde5345c50427a7aee3c90e0 -SIZE (ja-spamassassin/tokenizer.pre) = 163 -SHA256 (ja-spamassassin/spamassassin-3.3.2-ja-1.txt) = 6d818b246d2655abb260de83b4735c4f433808de7c19c4f905474c78c1ccbebd -SIZE (ja-spamassassin/spamassassin-3.3.2-ja-1.txt) = 7246 -SHA256 (ja-spamassassin/spamassassin-3.3.2-ja-1.patch) = 073e9eaebf0dc2bf9e90f894c171a7654dad8444ed269528045e547302b7136a -SIZE (ja-spamassassin/spamassassin-3.3.2-ja-1.patch) = 33740 diff --git a/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf b/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf deleted file mode 100644 index 29979a03ee9f..000000000000 --- a/japanese/p5-Mail-SpamAssassin/files/patch-rules-local.cf +++ /dev/null @@ -1,15 +0,0 @@ ---- rules/local.cf.orig 2008-01-06 06:11:39.000000000 +0900 -+++ rules/local.cf 2008-05-27 22:07:58.000000000 +0900 -@@ -51,3 +51,12 @@ - # bayes_ignore_header X-Spam-Flag - # bayes_ignore_header X-Spam-Status - -+ -+# Normalize charset to UTF-8 (default:0) -+# -+# normalize_charset 1 -+# -+# score FROM_EXCESS_BASE64 0 -+# score SUBJ_ILLEGAL_CHARS 0 -+# score MIME_BASE64_TEXT 1.0 -+# score TVD_SPACE_RATIO 0 diff --git a/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw b/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw deleted file mode 100644 index 635cc317adc9..000000000000 --- a/japanese/p5-Mail-SpamAssassin/files/patch-sa-learn.raw +++ /dev/null @@ -1,27 +0,0 @@ ---- sa-learn.raw.orig Wed Aug 8 06:19:47 2007 -+++ sa-learn.raw Mon Aug 20 19:05:06 2007 -@@ -101,6 +101,7 @@ GetOptions( - 'local|L' => \$opt{'local'}, - 'no-sync|nosync' => \$opt{'nosync'}, - 'showdots' => \$opt{'showdots'}, -+ 'quiet|q' => \$opt{'quiet'}, - 'progress' => \$opt{'progress'}, - 'use-ignores' => \$opt{'use-ignores'}, - 'no-rebuild|norebuild' => sub { $opt{'nosync'} = 1; warn "The --no-rebuild option has been deprecated. Please use --no-sync instead.\n" }, -@@ -433,7 +434,7 @@ eval { - $progress->final() if ($opt{progress} && $progress); - - my $phrase = defined $forget ? "Forgot" : "Learned"; -- print "$phrase tokens from $learnedcount message(s) ($messagecount message(s) examined)\n"; -+ print "$phrase tokens from $learnedcount message(s) ($messagecount message(s) examined)\n" if (!$opt{quiet}); - - # If we needed to make a tempfile, go delete it. - if ( defined $tempfile ) { -@@ -601,6 +602,7 @@ Options: - (default: /etc/mail/spamassassin) - --cf='config line' Additional line of configuration - -D, --debug [area=n,...] Print debugging messages -+ -q, --quiet Reduce amount of information printed out - -V, --version Print version - -h, --help Print usage message - diff --git a/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in b/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in deleted file mode 100644 index 5398ea6fa7e7..000000000000 --- a/japanese/p5-Mail-SpamAssassin/files/sa-spamd.sh.in +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/sh -# -# $FreeBSD$ -# - -# PROVIDE: spamd -# REQUIRE: LOGIN -# BEFORE: mail -# KEYWORD: shutdown - -# -# Add the following lines to /etc/rc.conf to enable spamd: -# -#spamd_enable="YES" -# -# See spamd(8) for flags -# - -. /etc/rc.subr - -name=spamd -rcvar=spamd_enable - -extra_commands="reload" -load_rc_config $name - -# Set defaults -: ${spamd_enable:="NO"} -: ${spamd_flags="-c %%SQL_FLAG%% %%RUN_AS_USER%%"} - -pidfile=${spamd_pidfile:-"/var/run/spamd/spamd.pid"} -command=%%PREFIX%%/bin/spamd -command_args="-d -r ${pidfile}" -required_dirs=%%PREFIX%%/share/spamassassin - -stop_postcmd=stop_postcmd - -stop_postcmd() -{ - rm -f $pidfile -} - -run_rc_command "$1" diff --git a/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch new file mode 100644 index 000000000000..9ce06cfe2d9f --- /dev/null +++ b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.patch @@ -0,0 +1,1148 @@ +diff -uNr lib/Mail/SpamAssassin/HTML.pm lib/Mail/SpamAssassin/HTML.pm +--- lib/Mail/SpamAssassin/HTML.pm 2011-06-07 08:59:17.000000000 +0900 ++++ lib/Mail/SpamAssassin/HTML.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -84,7 +84,7 @@ + $ok_attributes{span}{$_} = 1 for qw( style ); + + sub new { +- my ($class) = @_; ++ my ($class, $opts) = @_; + my $self = $class->SUPER::new( + api_version => 3, + handlers => [ +@@ -97,6 +97,7 @@ + declaration => ["html_declaration", "self,text"], + ], + marked_sections => 1); ++ $self->{normalize} = $opts->{'normalize'} || 0; + + $self; + } +@@ -672,7 +673,14 @@ + } + } + else { +- $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace ++ $text =~ s/[ \t\n\r\f\x0b]+/ /g; ++ } ++ else { ++ $text =~ s/[ \t\n\r\f\x0b\xa0]+/ /g; ++ } + # trim leading whitespace if previous element was whitespace + # and current element is not invisible + if (@{ $self->{text} } && !$display{invisible} && +diff -uNr lib/Mail/SpamAssassin/Message/Node.pm lib/Mail/SpamAssassin/Message/Node.pm +--- lib/Mail/SpamAssassin/Message/Node.pm 2011-06-07 08:59:17.000000000 +0900 ++++ lib/Mail/SpamAssassin/Message/Node.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -42,6 +42,7 @@ + use Mail::SpamAssassin::Constants qw(:sa); + use Mail::SpamAssassin::HTML; + use Mail::SpamAssassin::Logger; ++use Mail::SpamAssassin::Util::Charset; + + =item new() + +@@ -387,27 +388,10 @@ + + sub _normalize { + my ($self, $data, $charset) = @_; +- return $data unless $self->{normalize}; ++ return wantarray ? ($data, $charset) : $data unless $self->{normalize}; + +- my $detected = Encode::Detect::Detector::detect($data); +- +- my $converter; +- +- if ($charset && $charset !~ /^us-ascii$/i && +- ($detected || 'none') !~ /^(?:UTF|EUC|ISO-2022|Shift_JIS|Big5|GB)/i) { +- dbg("message: Using labeled charset $charset"); +- $converter = Encode::find_encoding($charset); +- } +- +- $converter = Encode::find_encoding($detected) unless $converter || !defined($detected); +- +- return $data unless $converter; +- +- dbg("message: Converting..."); +- +- my $rv = $converter->decode($data, 0); +- utf8::downgrade($rv, 1); +- return $rv ++ my ($decoded_data, $detected_charset) = normalize_charset($data, $charset); ++ return wantarray ? ($decoded_data, $detected_charset) : $decoded_data; + } + + =item rendered() +@@ -430,8 +414,12 @@ + # text/x-aol is ignored here, but looks like text/html ... + return(undef,undef) unless ( $self->{'type'} =~ /^text\/(?:plain|html)$/i ); + +- my $text = $self->_normalize($self->decode(), $self->{charset}); ++ my ($text, $charset) = $self->_normalize($self->decode(), $self->{charset}); + my $raw = length($text); ++ if ($self->{normalize}) { ++ $self->{charset} = $charset; ++ $self->{language} = get_language($text, $charset); ++ } + + # render text/html always, or any other text|text/plain part as text/html + # based on a heuristic which simulates a certain common mail client +@@ -441,7 +429,7 @@ + { + $self->{rendered_type} = 'text/html'; + +- my $html = Mail::SpamAssassin::HTML->new(); # object ++ my $html = Mail::SpamAssassin::HTML->new({normalize=>$self->{normalize}}); # object + $html->parse($text); # parse+render text + $self->{rendered} = $html->get_rendered_text(); + $self->{visible_rendered} = $html->get_rendered_text(invisible => 0); +diff -uNr lib/Mail/SpamAssassin/Message.pm lib/Mail/SpamAssassin/Message.pm +--- lib/Mail/SpamAssassin/Message.pm 2011-06-07 08:59:17.000000000 +0900 ++++ lib/Mail/SpamAssassin/Message.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -559,6 +559,8 @@ + delete $self->{'pristine_headers'}; + delete $self->{'line_ending'}; + delete $self->{'missing_head_body_separator'}; ++ delete $self->{'charset'}; ++ delete $self->{'language'}; + + my @toclean = ( $self ); + +@@ -585,6 +587,8 @@ + delete $part->{'invisible_rendered'}; + delete $part->{'type'}; + delete $part->{'rendered_type'}; ++ delete $self->{'charset'}; ++ delete $self->{'language'}; + + # if there are children nodes, add them to the queue of nodes to clean up + if (exists $part->{'body_parts'}) { +@@ -1014,7 +1018,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + # warn "message: $text"; +@@ -1071,7 +1082,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + my @textary = split_into_array_of_short_lines ($text); +@@ -1122,7 +1140,14 @@ + + # whitespace handling (warning: small changes have large effects!) + $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed +- $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ if ($self->{normalize}) { ++ $text =~ s/\xc2\xa0/ /g; # no-break space => space ++ $text =~ s/\xe3\x80\x80/ /g; # ideographicspace => space ++ $text =~ tr/ \t\n\r\x0b/ /s; # whitespace => space ++ } ++ else { ++ $text =~ tr/ \t\n\r\x0b\xa0/ /s; # whitespace => space ++ } + $text =~ tr/\f/\n/; # form feeds => newline + + my @textary = split_into_array_of_short_lines ($text); +@@ -1198,6 +1223,28 @@ + + # --------------------------------------------------------------------------- + ++sub get_language { ++ my ($self) = @_; ++ ++ if (defined $self->{language}) { return $self->{language}; } ++ my @parts = $self->find_parts(qr/^(?:text|message)\b/i,1); ++ return '' unless @parts; ++ ++ # Go through each part ++ my @langs; ++ for(my $pt = 0 ; $pt <= $#parts ; $pt++ ) { ++ my $p = $parts[$pt]; ++ my $lang = $p->{language}; ++ next unless ($lang); ++ push(@langs, $lang) unless (grep(/^$lang$/, @langs)) ++ } ++ $self->{language} = scalar(@langs) ? join(' ', @langs) : ''; ++ return $self->{language}; ++} ++ ++# --------------------------------------------------------------------------- ++ ++ + 1; + + =back +diff -uNr lib/Mail/SpamAssassin/PerMsgStatus.pm lib/Mail/SpamAssassin/PerMsgStatus.pm +--- lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-06-07 08:59:17.000000000 +0900 ++++ lib/Mail/SpamAssassin/PerMsgStatus.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -53,6 +53,7 @@ + use warnings; + use re 'taint'; + ++use Encode; + use Time::HiRes qw(time); + + use Mail::SpamAssassin::Constants qw(:sa); +@@ -733,19 +734,41 @@ + + # the report charset + my $report_charset = "; charset=iso-8859-1"; +- if ($self->{conf}->{report_charset}) { +- $report_charset = "; charset=" . $self->{conf}->{report_charset}; +- } + + # the SpamAssassin report + my $report = $self->get_report(); ++ if ($self->{conf}->{report_charset}) { ++ $report_charset = "; charset=" . $self->{conf}->{report_charset}; ++ } + + # If there are any wide characters, need to MIME-encode in UTF-8 + # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then + # we could try converting to that charset if possible +- unless ($] < 5.008 || utf8::downgrade($report, 1)) { ++ my $is_utf8 = 0; ++ if ($self->{conf}->{normalize_charset}) { ++ $report = Encode::decode_utf8($report); ++ $is_utf8 = 1; ++ } ++ else { ++ if ($self->{msg}->{charset}) { ++ eval { ++ my $scratch = $report; ++ $report = Encode::decode($self->{msg}->{charset},$scratch,Encode::FB_CROAK); ++ $is_utf8 = 1; ++ }; ++ } ++ } ++ if ($is_utf8) { ++ $is_utf8 = 1; ++ eval { ++ my $scratch = $report; ++ $report = Encode::encode($self->{conf}->{report_charset},$scratch,Encode::FB_CROAK); ++ $is_utf8 = 0; ++ }; ++ if ($is_utf8) { ++ $report = Encode::encode_utf8($report); + $report_charset = "; charset=utf-8"; +- utf8::encode($report); ++ } + } + + # get original headers, "pristine" if we can do it +diff -uNr lib/Mail/SpamAssassin/Plugin/Bayes.pm lib/Mail/SpamAssassin/Plugin/Bayes.pm +--- lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-06-07 08:59:17.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Bayes.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -223,6 +223,15 @@ + # will require a longer token than English ones.) + use constant MAX_TOKEN_LENGTH => 15; + ++# Skip if a token is too short. ++our $SKIP_UTF8_SHORT_TOKENS_RE = qr{(?: ++ [\x00-\x7F] # 1 byte ++ | [\xC0-\xDF][\x80-\xBF] # 2 bytes ++ | [\xE0-\xEF][\x80-\xBF]{2} # 3 bytes ++ | [\xF0-\xF7][\x80-\xBF]{3} # 4 bytes ++ | (?:\xE3[\x81-\x83][\x80-\xBF]){2} # 2 characters of Hiragana and Katakana ++)}x; ++ + ########################################################################### + + sub new { +@@ -983,9 +992,28 @@ + $msgdata->{bayes_token_body} = $msg->{msg}->get_visible_rendered_body_text_array(); + $msgdata->{bayes_token_inviz} = $msg->{msg}->get_invisible_rendered_body_text_array(); + @{$msgdata->{bayes_token_uris}} = $msg->get_uri_list(); ++ if ($self->{conf}->{normalize_charset}) { ++ my $tokenizer = $self->get_tokenizer($msg); ++ if (ref($tokenizer)) { ++ $msgdata->{bayes_token_body} = $tokenizer->tokenize($msgdata->{bayes_token_body}); ++ $msgdata->{bayes_token_inviz} = $tokenizer->tokenize($msgdata->{bayes_token_inviz}); ++ } ++ } + return $msgdata; + } + ++sub get_tokenizer { ++ my ($self, $msg) = @_; ++ ++ my $tokenizer; ++ my @languages = split(/\s+/, $msg->{msg}->get_language()); ++ foreach my $lang (@languages) { ++ $tokenizer = $self->{'conf'}->{'tokenizer'}->{$lang}; ++ last if (ref($tokenizer)); ++ } ++ return $tokenizer; ++} ++ + ########################################################################### + + # The calling functions expect a uniq'ed array of tokens ... +@@ -1039,7 +1067,7 @@ + # include quotes, .'s and -'s for URIs, and [$,]'s for Nigerian-scam strings, + # and ISO-8859-15 alphas. Do not split on @'s; better results keeping it. + # Some useful tokens: "$31,000,000" "www.clock-speed.net" "f*ck" "Hits!" +- tr/-A-Za-z0-9,\@\*\!_'"\$.\241-\377 / /cs; ++ tr/-A-Za-z0-9,\@\*\!_'"\$.\200-\377 / /cs; + + # DO split on "..." or "--" or "---"; common formatting error resulting in + # hapaxes. Keep the separator itself as a token, though, as long ones can +@@ -1068,6 +1096,11 @@ + # + next if ( defined $magic_re && $token =~ /$magic_re/ ); + ++ # Skip short UTF-8 tokens. ++ if ($self->{conf}->{normalize_charset}) { ++ next if ($token =~ /^$SKIP_UTF8_SHORT_TOKENS_RE$/o); ++ } ++ + # *do* keep 3-byte tokens; there's some solid signs in there + my $len = length($token); + +@@ -1096,14 +1129,16 @@ + # the domain ".net" appeared in the To header. + # + if ($len > MAX_TOKEN_LENGTH && $token !~ /\*/) { +- if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { +- # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, +- # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan +- # to me! (jm) +- while ($token =~ s/^(..?)//) { +- push (@rettokens, "8:$1"); +- } +- next; ++ unless ($self->{conf}->{normalize_charset}) { ++ if (TOKENIZE_LONG_8BIT_SEQS_AS_TUPLES && $token =~ /[\xa0-\xff]{2}/) { ++ # Matt sez: "Could be asian? Autrijus suggested doing character ngrams, ++ # but I'm doing tuples to keep the dbs small(er)." Sounds like a plan ++ # to me! (jm) ++ while ($token =~ s/^(..?)//) { ++ push (@rettokens, "8:$1"); ++ } ++ next; ++ } + } + + if (($region == 0 && HDRS_TOKENIZE_LONG_TOKENS_AS_SKIPS) +diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm +--- lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,84 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ++ ++=head1 NAME ++ ++Tokenizer::MeCab - Japanese tokenizer with MeCab ++ ++=head1 SYNOPSIS ++ ++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab ++ ++=head1 DESCRIPTION ++ ++This plugin tokenizes a Japanese string with MeCab that is ++the morphological analysis engine. ++ ++Text::MeCab 0.12 or over is required. ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer::MeCab; ++ ++use strict; ++use warnings; ++use Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ ++# Have to do this so that RPM doesn't find these as required perl modules ++BEGIN { require MeCab; } ++our $language = 'ja'; ++our $mecab = new MeCab::Tagger(-Ochasen); ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++} ++ ++sub tokenize { ++ my $self = shift; ++ my $text_array = shift; ++ ++ my @tokenized_array; ++ foreach my $text (@$text_array) { ++ next unless ($text); ++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; ++ push(@tokenized_array, $text); ++ } ++ return \@tokenized_array; ++} ++ ++sub _tokenize { ++ my $text = shift; ++ ++ my @buf; ++ for (my $node = $mecab->parseToNode($text); $node->{next}; $node = $node->{next}) { ++ push(@buf, $node->{surface}); ++ } ++ my $tokenized = join(' ', @buf) . ' '; ++ return $tokenized; ++} ++ ++1; ++ +diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm +--- lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,111 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ++ ++=head1 NAME ++ ++Tokenizer::SimpleJA - simple Japanese tokenizer ++ ++=head1 SYNOPSIS ++ ++loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA ++ ++=head1 DESCRIPTION ++ ++This plugin simply tokenizes a Japanese string by characters other than ++the alphabet, the Chinese character, and the katakana. ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA; ++ ++use strict; ++use warnings; ++use Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ ++our $language = 'ja'; ++ ++our $RE = qr{( ++ # Hiragana ++ (?: ++ \xE3\x81[\x80-\xBF] ++ | \xE3\x82[\x80-\x9F] ++ )+ ++ # Katakana ++ | (?: ++ \xE3\x82[\xA0-\xBF] ++ | \xE3\x83[\x80-\xBF] ++ )+ ++ # Kanji ++ | (?: ++ \xE3[\x90-\xBF][\x80-\xBF] ++ | [\xE4-\xE9][\x80-\xBF]{2} ++ | \xEF[\xA4-\xAB][\x80-\xBF] ++ )+ ++ # Fullwidth ++ | (?: ++ \xEF\xBC[\x80-\xBF] ++ | \xEF\xBD[\x80-\x9F] ++ )+ ++ # Others ++ | [\xC0-\xDF][\x80-\xBF] ++ | [\xE0-\xE2][\x80-\xBF]{2} ++ | \xE3\x80[\x80-\xBF] ++ | \xE3[\x84-\x8F][\x80-\xBF] ++ | [\xEA-\xEE][\x80-\xBF]{2} ++ | \xEF[\x80-\xA3][\x80-\xBF] ++ | \xEF[\xAC-\xBB][\x80-\xBF] ++ | \xEF\xBD[\xA0-\xBF] ++ | \xEF[\xBE-\xBF][\x80-\xBF] ++ | [\xF0-\xF7][\x80-\xBF]{3} ++)}x; ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++} ++ ++sub tokenize { ++ my $self = shift; ++ my $text_array = shift; ++ ++ my @tokenized_array; ++ foreach my $text (@$text_array) { ++ next unless ($text); ++ $text =~ s/([\x80-\xFF]{3,})/&_tokenize($1)/eg; ++ push(@tokenized_array, $text); ++ } ++ return \@tokenized_array; ++} ++ ++sub _tokenize { ++ my $text = shift; ++ ++ $text =~ s/$RE/$1 /og; ++ $text = ' ' . $text; ++ return $text; ++} ++ ++1; ++ +diff -uNr lib/Mail/SpamAssassin/Plugin/Tokenizer.pm lib/Mail/SpamAssassin/Plugin/Tokenizer.pm +--- lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Plugin/Tokenizer.pm 2011-07-14 22:35:46.000000000 +0900 +@@ -0,0 +1,115 @@ ++# <@LICENSE> ++# Copyright 2004 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ++ ++=head1 NAME ++ ++Mail::SpamAssassin::Plugin::Tokenizer - Tokenizer plugin base class ++ ++=head1 SYNOPSIS ++ ++=head2 SpamAssassin configuration: ++ ++ loadplugin MyTokenizerPlugin /path/to/MyTokenizerPlugin.pm ++ ++=head2 Perl code: ++ ++ use Mail::SpamAssassin::Plugin::Tokenizer; ++ use vars qw(@ISA); ++ @ISA = qw(Mail::SpamAssassin::Plugin::Tokenizer); ++ # language to use this plugin ++ our $language = 'ja'; ++ ++ # constructor: register language ++ sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ ++ # some boilerplate... ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject, $language); ++ bless ($self, $class); ++ ++ return $self; ++ } ++ ++ # tokenize function ++ sub tokenize { ++ my $self = shift; ++ my $text_array_ref = shift; ++ ++ ...... ++ ++ return $tokenized_array_ref; ++ } ++ ++ ++=head1 DESCRIPTION ++ ++This plugin is the base class of tokenizer plugin. ++You must define tokenize() and $language ++ ++=head1 INTERFACE ++ ++ sub tokenize { ++ my $self = shift; ++ my $text_array_ref = shift; ++ ++ ...... ++ ++ return $tokenized_array_ref; ++ } ++ ++=cut ++ ++package Mail::SpamAssassin::Plugin::Tokenizer; ++ ++use Mail::SpamAssassin::Plugin; ++use Mail::SpamAssassin::Logger; ++use strict; ++use warnings; ++use bytes; ++ ++use vars qw(@ISA); ++@ISA = qw(Mail::SpamAssassin::Plugin); ++ ++sub new { ++ my $class = shift; ++ my $mailsaobject = shift; ++ my $language = shift; ++ ++ # some boilerplate... ++ $class = ref($class) || $class; ++ my $self = $class->SUPER::new($mailsaobject); ++ bless ($self, $class); ++ ++ if ($language) { ++ $self->{main}->{conf}->{tokenizer}->{$language} = $self; ++ } ++ else { ++ dbg("plugin: $self: \$language is not defined"); ++ } ++ ++ return $self; ++} ++ ++sub tokenize { ++ my ($self, $ref) = @_; ++ ++ return $ref; ++} ++ ++1; ++ +diff -uNr lib/Mail/SpamAssassin/Util/Charset.pm lib/Mail/SpamAssassin/Util/Charset.pm +--- lib/Mail/SpamAssassin/Util/Charset.pm 1970-01-01 09:00:00.000000000 +0900 ++++ lib/Mail/SpamAssassin/Util/Charset.pm 2011-07-14 22:29:19.000000000 +0900 +@@ -0,0 +1,471 @@ ++# <@LICENSE> ++# Copyright 2006 Apache Software Foundation ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# ++ ++ ++=head1 NAME ++ ++ Mail::SpamAssassin::Util::Charset.pm - Utility for charset and language ++ ++=head1 SYNOPSIS ++ ++ my ($decoded, $detected) = Mail::SpamAssassin::Util::Charset::normalize_charset($str, $charset); ++ my $language = Mail::SpamAssassin::Util::Charset::get_language($str, $charset); ++ ++=head1 DESCRIPTION ++ ++This module implements utility methods for charset and language. ++ ++=cut ++ ++package Mail::SpamAssassin::Util::Charset; ++ ++use strict; ++use warnings; ++use Encode; ++use Encode::Guess; ++use Encode::Alias; ++ ++use vars qw ( ++ @ISA @EXPORT ++); ++ ++require Exporter; ++ ++@ISA = qw(Exporter); ++@EXPORT = qw(normalize_charset get_language); ++ ++########################################################################### ++ ++use constant HAS_ENCODE_DETECT => eval { require Encode::Detect::Detector; }; ++use constant HAS_ENCODE_HANEXTRA => eval { require Encode::HanExtra; }; ++use constant HAS_ENCODE_EUCJPMS => eval { require Encode::EUCJPMS; }; ++ ++########################################################################### ++ ++our $KANA_HAN_RE = qr{ ++ # Hiragana and Katakana ++ \xE3[\x81-\x83][\x80-\xBF] ++ # Han ++ | \xE3[\x90-\xBF][\x80-\xBF] ++ | [\xE4-\xE9][\x80-\xBF]{2} ++ | \xEF[\xA4-\xAB][\x80-\xBF] ++}x; ++ ++our %enc2lang; ++our %lang2enc; ++our %scr2lang; ++our %cjkscr2lang; ++our @scrorder; ++ ++BEGIN { ++ ++ # See the following URL about this map: ++ # http://czyborra.com/charsets/iso8859.html ++ # http://czyborra.com/charsets/codepages.html ++ # http://czyborra.com/charsets/cyrillic.html ++ # http://en.wikipedia.org/wiki/ISO_8859 ++ # http://www.w3.org/International/O-charset-lang.html ++ %enc2lang = ( ++ # buint-in Encodings and Encode::Byte ++ # N. America ++ 'ascii' => 'en', ++ 'cp437' => 'en', ++ 'cp863' => 'weurope', ++ ++ # W. Europe (Latin1, Latin9) ++ # fr es ca eu pt it sq rm nl de da sv no fi fo is ga gd en af ++ 'iso-8859-1' => 'weurope', ++ 'iso-8859-15' => 'weurope', ++ 'cp850' => 'weurope', ++ 'cp860' => 'weurope', ++ 'cp1252' => 'weurope', ++ 'MacRoman' => 'weurope', ++ ++ # Cntrl. Europe / Latin2 / Latin10 ++ # hr cs hu pl sr sk sl ++ 'iso-8859-2' => 'ceurope', ++ 'cp852' => 'ceurope', ++ 'cp1250' => 'ceurope', ++ 'MacCentralEurRoman' => 'ceurope', ++ 'MacCroatian' => 'ceurope', ++ 'iso-8859-16' => 'ceurope', ++ 'MacRomanian' => 'ceurope', ++ ++ # Latin3 (Esperanto, Maltese, and Turkish. Turkish is now on 8859-9.) ++ # eo mt ++ 'iso-8859-3' => 'seurope', ++ ++ # Baltics (Latin4, Latin7) ++ # lv lt ++ 'iso-8859-4' => 'neurope', ++ 'iso-8859-13' => 'baltic', ++ 'cp1257' => 'baltic', ++ ++ # Nordics (Latin6) ++ # et kl iu se ++ 'iso-8859-10' => 'nordic', ++ ++ # Cyrillics ++ # bg be uk sr mk ru ++ 'iso-8859-5' => 'ru', ++ 'cp855' => 'ru', ++ 'cp1251' => 'ru', ++ 'cp866' => 'ru', ++ 'MacCyrillic' => 'ru', ++ 'koi8-r' => 'ru', ++ 'MacUkrainian' => 'uk', ++ 'koi8-u' => 'uk', ++ ++ # Arabic ++ 'iso-8859-6' => 'ar', ++ 'cp864' => 'ar', ++ 'cp1256' => 'ar', ++ 'MacArabic' => 'ar', ++ 'cp1006' => 'fa', ++ 'MacFarsi' => 'fa', ++ ++ # Greek ++ 'iso-8859-7' => 'el', ++ 'cp1253' => 'el', ++ 'MacGreek' => 'el', ++ ++ # Hebrew ++ # he yi ++ 'iso-8859-8' => 'he', ++ 'cp862' => 'he', ++ 'cp1255' => 'he', ++ 'MacHebrew' => 'he', ++ ++ # Turkish ++ 'iso-8859-9' => 'tr', ++ 'cp857' => 'tr', ++ 'cp1254' => 'tr', ++ 'MacTurkish' => 'tr', ++ ++ # Thai ++ 'iso-8859-11' => 'th', ++ 'cp874' => 'th', ++ ++ # Celtics (Latin8) ++ # gd cy br ++ 'iso-8859-14' => 'celtic', ++ ++ # Vietnamese ++ 'viscii' => 'vi', ++ 'cp1258' => 'vi', ++ ++ # Encode::CN ++ 'euc-cn' => 'zh', ++ 'cp936' => 'zh', ++ 'hz' => 'zh', ++ ++ # Encode::TW ++ 'big5-eten' => 'zh', ++ 'big5-hkscs' => 'zh', ++ 'cp950' => 'zh', ++ ++ # Encode::JP ++ 'euc-jp' => 'ja', ++ 'shiftjis' => 'ja', ++ '7bit-jis' => 'ja', ++ 'iso-2022-jp' => 'ja', ++ 'iso-2022-jp-1' => 'ja', ++ 'cp932' => 'ja', ++ ++ # Encode::KR ++ 'euc-kr' => 'ko', ++ 'cp949' => 'ko', ++ 'johab' => 'ko', ++ 'iso-2022-kr' => 'ko', ++ ++ # Encode::HanExtra ++ 'euc-tw' => 'zh', ++ 'gb18030' => 'zh', ++ ++ # Encode::JIS2K ++ 'euc-jisx0213' => 'ja', ++ 'shiftjisx0123' => 'ja', ++ 'iso-2022-jp-3' => 'ja', ++ ++ # Encode::EUCJPMS ++ 'eucJP-ms' => 'ja', ++ 'cp51932' => 'ja', ++ 'cp50220' => 'ja', ++ 'cp50221' => 'ja', ++ ++ ); ++ ++ %lang2enc = ( ++ # Latin1 ++ 'en' => ['ascii'], ++ 'weurope' => ['cp1252'], ++ ++ # Latin2 ++ 'ceurope' => ['cp1250'], ++ ++ # Latin3 ++ 'seurope' => ['iso-8859-3'], ++ ++ # Latin4 ++ 'neurope' => ['iso-8859-4'], ++ ++ # Latin5 ++ 'tr' => ['cp1254'], ++ ++ # Latin6 ++ 'nordic' => ['iso-8859-10'], ++ ++ # Latin7 ++ 'baltic' => ['cp1257'], ++ ++ # Latin8 ++ 'celtic' => ['iso-8859-14'], ++ ++ # Non Latin ++ 'ru' => ['koi8-r', 'cp1251'], ++ 'uk' => ['koi8-u'], ++ ++ 'ar' => ['cp1256'], ++ 'el' => ['cp1253'], ++ 'he' => ['cp1255'], ++ 'th' => ['cp874'], ++ 'vi' => ['viscii', 'cp1258'], ++ 'zh' => ['euc-cn', 'cp950'], ++ 'ja' => ['euc-jp', 'cp932'], ++ 'ko' => ['euc-kr', 'cp949'], ++ ++ ); ++ ++ %scr2lang = ( ++ 'InLatin1Supplement' => ['weurope'], ++ 'InLatinExtendedA' => [ ++ 'ceurope', ++ 'seurope', ++ 'tr', ++ 'vi' ++ ], ++ 'InLatinExtendedB' => [ ++ 'nordic', ++ 'baltic', ++ 'celtic' ++ ], ++ 'Thai' => ['th'], ++ 'Cyrillic' => ['ru', 'uk'], ++ 'Arabic' => ['ar'], ++ 'Greek' => ['el'], ++ 'Hebrew' => ['he'], ++ ); ++ ++ # better detection for CJK ++ @scrorder = ('Hiragana','Katakana','Hangul','Han',keys(%scr2lang)); ++ %cjkscr2lang = ( ++ 'Hiragana' => ['ja'], ++ 'Katakana' => ['ja'], ++ 'Hangul' => ['ko'], ++ 'Han' => ['zh', 'ja', 'ko'], ++ ); ++ ++ unless (HAS_ENCODE_HANEXTRA) { ++ Encode::Alias::define_alias( qr/^gb18030$/i => ' "euc-cn"' ); ++ } ++ Encode::Alias::define_alias( qr/^unicode-1-1-(.+)$/i => ' "$1"' ); ++ Encode::Alias::define_alias( qr/^TIS-620$/i => ' "iso-8859-11"' ); ++ Encode::Alias::define_alias( qr/^x-mac-(.+)$/i => ' "Mac$1"' ); ++ Encode::Alias::define_alias( qr/^Shift_JIS$/i => ' "cp932"' ); ++ if (HAS_ENCODE_EUCJPMS) { ++ Encode::Alias::define_alias( qr/^iso-2022-jp$/i => ' "cp50221"' ); ++ } ++} ++ ++sub get_language { ++ my $str = shift; # $str must be UTF-8 encoding ++ my $charset = shift; ++ ++ return 'en' unless $charset; ++ if ($charset !~ /^utf/i) { ++ return $enc2lang{$charset}; ++ } elsif (defined($str)) { ++ $str =~ s/[\x00-\x7F]//g; # remove ASCII characters ++ return 'en' if ($str eq ''); ++ ++ my %handled; ++ $str = Encode::decode_utf8($str) unless (Encode::is_utf8($str)); ++ foreach my $scr (@scrorder) { ++ next if ($str !~ /\p{$scr}/); ++ my $scrlangs = exists($cjkscr2lang{$scr}) ? $cjkscr2lang{$scr} : $scr2lang{$scr}; ++ foreach my $lang (@$scrlangs) { ++ next if (exists($handled{$lang})); ++ foreach my $enc (@{$lang2enc{$lang}}) { ++ my $scratch = $str; ++ Encode::encode($enc, $scratch, Encode::FB_QUIET); ++ return $lang if ($scratch eq ''); ++ } ++ $handled{$lang} = 1; ++ } ++ } ++ } ++ return 'en'; ++} ++ ++# TEST 1: try conversion to use the specified charset. ++# TEST 2: try conversion to use Encode::Detect. ++# TEST 3: try conversion to use Encode::Guess. ++sub normalize_charset { ++ my $str = shift; ++ my $charset = shift; ++ ++ return wantarray ? ($str, 'ascii') : $str unless ($str); ++ ++ my $decoded; ++ my $detected; ++ ++ if ($charset) { ++ ($decoded, $detected) = _specified_encoding($str, $charset); ++ } ++ unless ($detected) { ++ ($decoded, $detected) = _encode_detect($str); ++ } ++ unless ($detected) { ++ ($decoded, $detected) = _encode_guess($str); ++ } ++ unless ($detected) { ++ return ($str, undef); ++ } ++ $decoded =~ s/^\x{feff}//g; ++ $decoded = Encode::encode_utf8($decoded); ++ ++ # unfold hiragana, katakana and han ++ if ($detected =~ /^(?:UTF|EUC|BIG5|GB|SHIFTJIS|ISO-2022|CP969$|CP932$|CP949|CP50221$)/i) { ++ $decoded =~ s/($KANA_HAN_RE)\012($KANA_HAN_RE)/$1$2/og; ++ } ++ return wantarray ? ($decoded, $detected) : $decoded; ++} ++ ++sub _specified_encoding { ++ my $str = shift; ++ my $encoding = shift; ++ ++ my $detected; ++ my $decoded; ++ ++ return (undef, undef) unless ($encoding); ++ ++ # note: ISO-2022-* is not deistinguish from US-ASCII ++ return (undef, undef) if ($str =~ /\e/ and $encoding !~ /^ISO-2022/i); ++ ++ # UTF-16|32 encoding without BOM cannot be trusted. ++ return (undef, undef) if ($encoding =~ /^UTF-32$/i and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); ++ return (undef, undef) if ($encoding =~ /^UTF-16$/i and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); ++ ++ #$encoding = _get_alias($encoding); ++ my $encoder = Encode::find_encoding($encoding); ++ if (ref($encoder)) { ++ $decoded = $encoder->decode($str,Encode::FB_QUIET); ++ $detected = $encoder->name if ($str eq ''); ++ } ++ return ($decoded, $detected); ++} ++ ++sub _encode_detect { ++ return undef unless HAS_ENCODE_DETECT; ++ my $str = shift; ++ ++ # UTF-16|32 encoding without BOM cannot be trusted. ++ return (undef, undef) if ($str =~ /\x00\x00/ and $str !~ /^(?:\xFF\xFE\x00\x00|\x00\x00\xFE\xFF)/); ++ return (undef, undef) if ($str =~ /\x00/ and $str !~ /^(?:\xFF\xFE|\xFE\xFF)/); ++ ++ my $decoded; ++ my $detected = Encode::Detect::Detector::detect($str); ++ if ($detected) { ++ $detected = _get_alias($detected); ++ my $encoder = Encode::find_encoding($detected); ++ if (ref($encoder)) { ++ $decoded = $encoder->decode($str); ++ $detected = $decoded ? $encoder->name : undef; ++ } ++ else { ++ $detected = undef; ++ } ++ } ++ return ($decoded, $detected); ++} ++ ++sub _encode_guess { ++ my $str = shift; ++ ++ my $detected; ++ my $decoded; ++ my $encoder; ++ ++ # Step 1: Examine ISO-2022-*. ++ if ($str =~ /\e/) { ++ $Encode::Guess::NoUTFAutoGuess = 1; ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/cp50221 7bit-jis iso-2022-kr/); ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ } ++ ++ # Step 2: Examine US-ASCII/UTF-(8|16|32) ++ unless (ref($encoder)) { ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ $encoder = Encode::Guess::guess_encoding($str); ++ } ++ ++ # Step 3: Examine other encodings ++ unless (ref($encoder)) { ++ $Encode::Guess::NoUTFAutoGuess = 1; ++ eval { ++ if ($str =~ /[\x80-\xFF]{4}/) { ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/euc-cn big5-eten euc-jp cp932 euc-kr cp949/); ++ } ++ else { ++ $encoder = Encode::Guess::guess_encoding($str, ++ qw/iso-8859-1 cp1252/); ++ } ++ }; ++ $Encode::Guess::NoUTFAutoGuess = 0; ++ } ++ if (ref($encoder)) { ++ $detected = $encoder->name; ++ if ($detected) { ++ $decoded = $encoder->decode($str); ++ } ++ } ++ return ($decoded, $detected); ++} ++ ++sub _get_alias { ++ my $encoding = shift; ++ ++ unless (HAS_ENCODE_HANEXTRA) { ++ $encoding =~ s/^gb18030$/euc-cn/i; ++ } ++ $encoding =~ s/^unicode-1-1-(.+)$/$1/i; ++ $encoding =~ s/^TIS-620$/iso-8859-11/i; ++ $encoding =~ s/x-mac-(.+)$/Mac$1/i; ++ $encoding =~ s/^Shift_JIS$/cp932/i; ++ if (HAS_ENCODE_EUCJPMS) { ++ $encoding =~ s/^iso-2022-jp$/cp50221/i; ++ $encoding =~ s/^euc-jp$/cp51932/i; ++ } ++ ++ return $encoding; ++} ++ ++ ++1; ++ diff --git a/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist new file mode 100644 index 000000000000..11643c87b058 --- /dev/null +++ b/japanese/p5-Mail-SpamAssassin/files/spamassassin-3.3.2-ja-1.plist @@ -0,0 +1,7 @@ +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm +@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer +%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm +%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm +@unexec if cmp -s %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%.sample %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%; then rm -f %%PREFIX%%/%%ETCDIR%%/%%TOKENIZER_PRE%%; fi +%%ETCDIR%%/%%TOKENIZER_PRE%%.sample diff --git a/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre b/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre new file mode 100644 index 000000000000..d21410bbadc9 --- /dev/null +++ b/japanese/p5-Mail-SpamAssassin/files/tokenizer.pre @@ -0,0 +1,8 @@ + +# Tokenizer::SimpleJA +# +loadplugin Mail::SpamAssassin::Plugin::Tokenizer::SimpleJA + +# Tokenizer::MeCab +# +#loadplugin Mail::SpamAssassin::Plugin::Tokenizer::MeCab diff --git a/japanese/p5-Mail-SpamAssassin/pkg-deinstall b/japanese/p5-Mail-SpamAssassin/pkg-deinstall deleted file mode 100644 index fb8983183880..000000000000 --- a/japanese/p5-Mail-SpamAssassin/pkg-deinstall +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -if [ "$2" != "POST-DEINSTALL" ]; then - exit 0 -fi - -if [ -d /var/db/spamassassin ]; then - echo "To delete /var/db/spamassassin, use 'rm -rf /var/db/spamassassin'" -fi - -USER=spamd - -if pw usershow "${USER}" 2>/dev/null 1>&2; then - echo "To delete ${USER} user permanently, use 'rmuser ${USER}'" -fi - -exit 0 diff --git a/japanese/p5-Mail-SpamAssassin/pkg-descr b/japanese/p5-Mail-SpamAssassin/pkg-descr deleted file mode 100644 index e4807953b7aa..000000000000 --- a/japanese/p5-Mail-SpamAssassin/pkg-descr +++ /dev/null @@ -1,14 +0,0 @@ -SpamAssassin is a mail filter which attempts to identify spam using text -analysis and several internet-based realtime blacklists. - -Using its rule base, it uses a wide range of heuristic tests on mail -headers and body text to identify "spam", also known as unsolicited -commercial email. - -Once identified, the mail can then be optionally tagged as spam for later -filtering using the user's own mail user-agent application. - -Additional drop-in rule sets are available at -http://wiki.apache.org/spamassassin/CustomRulesets - -WWW: http://spamassassin.apache.org/ diff --git a/japanese/p5-Mail-SpamAssassin/pkg-install b/japanese/p5-Mail-SpamAssassin/pkg-install deleted file mode 100644 index 568dc5e832e4..000000000000 --- a/japanese/p5-Mail-SpamAssassin/pkg-install +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh -PKG_PREFIX=${PKG_PREFIX:-/usr/local} -USER=${USER:-spamd} -GROUP=${GROUP:-spamd} -HOME=/var/spool/${USER} - -if [ "$2" = "POST-INSTALL" ];then -ask() { - local question default answer - - question=$1 - default=$2 - if [ -z "${PACKAGE_BUILDING}" -a -z "${BATCH}" ]; then - read -t120 -p "${question} [${default}]? " answer - fi - echo ${answer:-${default}} -} - -yesno() { - local question default answer - - question=$1 - default=$2 - while :; do - answer=$(ask "${question}" "${default}") - case "${answer}" in - [Yy]*) return 0;; - [Nn]*) return 1;; - esac - echo "Please answer yes or no." - done -} - - # Create pid directory - install -d -o ${USER} -g ${GROUP} /var/run/spamd - /usr/bin/su root -c "${PKG_PREFIX}/bin/spamassassin -x -L --lint" - - if [ ${?} -eq 9 ];then - echo "***********************************************" - echo "*__ ___ ____ _ _ ___ _ _ ____ *" - echo "*\ \ / / \ | _ \| \ | |_ _| \ | |/ ___|*" - echo "* \ \ /\ / / _ \ | |_) | \| || || \| | | _ *" - echo "* \ V V / ___ \| _ <| |\ || || |\ | |_| |*" - echo "* \_/\_/_/ \_\_| \_\_| \_|___|_| \_|\____|*" - echo "* *" - echo "*You must install rules before starting spamd!*" - echo "***********************************************" - if [ -z "${PACKAGE_BUILDING}" -a -z "${BATCH}" ]; then - if yesno "Do you wish to run sa-update to fetch new rules" "N";then - ${PKG_PREFIX}/bin/sa-update || true - else - echo "" - fi - /usr/bin/su root -c "${PKG_PREFIX}/bin/spamassassin -x -L --lint" - if [ ${?} -eq 0 ] && grep '^load.*Rule2XSBody' ${PKG_PREFIX}/etc/mail/spamassassin/v320.pre > /dev/null ;then - if yesno "Do you wish to compile rules with re2c (will take a long time)" "N";then - ${PKG_PREFIX}/bin/sa-compile || true - fi - fi - fi - fi - - exit 0 -fi # post-install - -exit 0 diff --git a/japanese/p5-Mail-SpamAssassin/pkg-message b/japanese/p5-Mail-SpamAssassin/pkg-message index 01153919b024..2ba532e27974 100644 --- a/japanese/p5-Mail-SpamAssassin/pkg-message +++ b/japanese/p5-Mail-SpamAssassin/pkg-message @@ -39,8 +39,8 @@ as root. To change this, also add this to rc.conf: spamd_flags="-u spamd -H /var/spool/spamd" ************************************************************************ -For Japanese users, see document in -PREFIX/share/doc/ja-p5-Mail-SpamAssassin/ +For Japanese users, see documents in +http://emaillab.jp/spamassassin/ja-patch/ Tokenizer::MeCab uses UTF-8 encoding. You may have to manually (re)install the following ports with the build options for UTF-8: diff --git a/japanese/p5-Mail-SpamAssassin/pkg-plist b/japanese/p5-Mail-SpamAssassin/pkg-plist deleted file mode 100644 index 43dae0435ff3..000000000000 --- a/japanese/p5-Mail-SpamAssassin/pkg-plist +++ /dev/null @@ -1,161 +0,0 @@ -@stopdaemon sa-spamd -bin/sa-awl -bin/sa-check_spamd -bin/sa-compile -bin/sa-learn -bin/sa-update -bin/spamassassin -bin/spamc -bin/spamd -@unexec rm -rf %D/etc/mail/spamassassin/sa-update-keys || true -etc/mail/spamassassin/local.cf.sample -@unexec if cmp -s %B/init.pre.sample %B/init.pre; then rm -f %B/init.pre; fi -etc/mail/spamassassin/init.pre.sample -@exec [ -f %B/init.pre ] || cp %B/%f %B/init.pre -@unexec if cmp -s %B/tokenizer.pre.sample %B/tokenizer.pre; then rm -f %B/tokenizer.pre; fi -etc/mail/spamassassin/tokenizer.pre.sample -@exec [ -f %B/tokenizer.pre ] || cp %B/%f %B/tokenizer.pre -@unexec if cmp -s %B/v310.pre.sample %B/v310.pre; then rm -f %B/v310.pre; fi -etc/mail/spamassassin/v310.pre.sample -@exec [ -f %B/v310.pre ] || cp %B/%f %B/v310.pre -@unexec if cmp -s %B/v312.pre.sample %B/v312.pre; then rm -f %B/v312.pre; fi -etc/mail/spamassassin/v312.pre.sample -@exec [ -f %B/v312.pre ] || cp %B/%f %B/v312.pre -@unexec if cmp -s %B/v320.pre.sample %B/v320.pre; then rm -f %B/v320.pre; fi -etc/mail/spamassassin/v320.pre.sample -@exec [ -f %B/v320.pre ] || cp %B/%f %B/v320.pre -@unexec if cmp -s %B/v330.pre.sample %B/v330.pre; then rm -f %B/v330.pre;fi -etc/mail/spamassassin/v330.pre.sample -@exec [ -f %B/v330.pre ] || cp %B/%f %B/v330.pre -include/libspamc.h -lib/libspamc.so -lib/libspamc.so.0 -%%SSL%%lib/libsslspamc.so -%%SSL%%lib/libsslspamc.so.0 -%%SITE_PERL%%/Mail/SpamAssassin.pm -%%SITE_PERL%%/Mail/SpamAssassin/AICache.pm -%%SITE_PERL%%/Mail/SpamAssassin/ArchiveIterator.pm -%%SITE_PERL%%/Mail/SpamAssassin/AsyncLoop.pm -%%SITE_PERL%%/Mail/SpamAssassin/AutoWhitelist.pm -%%SITE_PERL%%/Mail/SpamAssassin/Bayes.pm -%%SITE_PERL%%/Mail/SpamAssassin/Bayes/CombineChi.pm -%%SITE_PERL%%/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/BDB.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/DBM.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/MySQL.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/PgSQL.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SDBM.pm -%%SITE_PERL%%/Mail/SpamAssassin/BayesStore/SQL.pm -%%SITE_PERL%%/Mail/SpamAssassin/Client.pm -%%SITE_PERL%%/Mail/SpamAssassin/Conf.pm -%%SITE_PERL%%/Mail/SpamAssassin/Conf/LDAP.pm -%%SITE_PERL%%/Mail/SpamAssassin/Conf/Parser.pm -%%SITE_PERL%%/Mail/SpamAssassin/Conf/SQL.pm -%%SITE_PERL%%/Mail/SpamAssassin/Constants.pm -%%SITE_PERL%%/Mail/SpamAssassin/DBBasedAddrList.pm -%%SITE_PERL%%/Mail/SpamAssassin/Dns.pm -%%SITE_PERL%%/Mail/SpamAssassin/DnsResolver.pm -%%SITE_PERL%%/Mail/SpamAssassin/HTML.pm -%%SITE_PERL%%/Mail/SpamAssassin/Locales.pm -%%SITE_PERL%%/Mail/SpamAssassin/Locker.pm -%%SITE_PERL%%/Mail/SpamAssassin/Locker/Flock.pm -%%SITE_PERL%%/Mail/SpamAssassin/Locker/UnixNFSSafe.pm -%%SITE_PERL%%/Mail/SpamAssassin/Locker/Win32.pm -%%SITE_PERL%%/Mail/SpamAssassin/Logger.pm -%%SITE_PERL%%/Mail/SpamAssassin/Logger/File.pm -%%SITE_PERL%%/Mail/SpamAssassin/Logger/Stderr.pm -%%SITE_PERL%%/Mail/SpamAssassin/Logger/Syslog.pm -%%SITE_PERL%%/Mail/SpamAssassin/MailingList.pm -%%SITE_PERL%%/Mail/SpamAssassin/Message.pm -%%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata.pm -%%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata/Received.pm -%%SITE_PERL%%/Mail/SpamAssassin/Message/Node.pm -%%SITE_PERL%%/Mail/SpamAssassin/NetSet.pm -%%SITE_PERL%%/Mail/SpamAssassin/PerMsgLearner.pm -%%SITE_PERL%%/Mail/SpamAssassin/PerMsgStatus.pm -%%SITE_PERL%%/Mail/SpamAssassin/PersistentAddrList.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ASN.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AWL.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AccessDB.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AntiVirus.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Bayes.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/BodyEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Check.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DCC.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DKIM.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/DNSEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/FreeMail.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HTMLEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Hashcash.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/HeaderEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ImageInfo.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/MIMEEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/MIMEHeader.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/PhishTag.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Pyzor.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Razor2.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/RelayCountry.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/RelayEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/ReplaceTags.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Reuse.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Rule2XSBody.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/SPF.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Shortcircuit.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/SpamCop.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Test.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/TextCat.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/MeCab.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer/SimpleJA.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIDNSBL.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIDetail.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/URIEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/VBounce.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/WLBLEval.pm -%%SITE_PERL%%/Mail/SpamAssassin/Plugin/WhiteListSubject.pm -%%SITE_PERL%%/Mail/SpamAssassin/PluginHandler.pm -%%SITE_PERL%%/Mail/SpamAssassin/Reporter.pm -%%SITE_PERL%%/Mail/SpamAssassin/SQLBasedAddrList.pm -%%SITE_PERL%%/Mail/SpamAssassin/SpamdForkScaling.pm -%%SITE_PERL%%/Mail/SpamAssassin/SubProcBackChannel.pm -%%SITE_PERL%%/Mail/SpamAssassin/Timeout.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/Charset.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/DependencyInfo.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/Progress.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/RegistrarBoundaries.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/ScopedTimer.pm -%%SITE_PERL%%/Mail/SpamAssassin/Util/TieOneStringHash.pm -%%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin/.packlist -%%SITE_PERL%%/spamassassin-run.pod -%%DATADIR%%/languages -%%DATADIR%%/sa-update-pubkey.txt -%%DATADIR%%/user_prefs.template -@unexec rm -rf /var/lib/spamassassin/2* || true -@unexec rmdir /var/lib/spamassassin 2>/dev/null || true -@unexec rmdir /var/lib 2>/dev/null || true -@unexec rmdir /var/db/spamassassin || true -@dirrm %%DATADIR%% -@dirrm %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail/SpamAssassin -@dirrmtry %%SITE_PERL%%/%%PERL_ARCH%%/auto/Mail -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Util -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Plugin/Tokenizer -@dirrmtry %%SITE_PERL%%/Mail/SpamAssassin/Plugin -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Message/Metadata -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Message -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Logger -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Locker -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Conf -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/BayesStore -@dirrm %%SITE_PERL%%/Mail/SpamAssassin/Bayes -@dirrmtry %%SITE_PERL%%/Mail/SpamAssassin -@dirrmtry %%SITE_PERL%%/Mail -@dirrmtry etc/mail/spamassassin -@dirrmtry etc/mail -@unexec rm -rf /var/run/spamd || true -- cgit v1.2.3