From 07b20beeba542176635d5172ed092f1f8dd8f410 Mon Sep 17 00:00:00 2001 From: Martin Wilke Date: Sun, 11 Mar 2007 16:42:43 +0000 Subject: Library for automatic morphological analysis of English, Russian and German Languages. Version 2. Finds the lemmas (all forms) of a word. Written in C++. WWW: http://www.aot.ru/ - Andrei V. Shetuhin slonik-v-domene@mail.ru reki@reki.ru PR: ports/110137 Submitted by: Andrei V. Shetuhin --- textproc/Makefile | 1 + textproc/lemmatizer2/Makefile | 54 ++++++++++++++++++++++ textproc/lemmatizer2/distinfo | 12 +++++ textproc/lemmatizer2/pkg-descr | 9 ++++ textproc/lemmatizer2/pkg-plist | 102 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+) create mode 100644 textproc/lemmatizer2/Makefile create mode 100644 textproc/lemmatizer2/distinfo create mode 100644 textproc/lemmatizer2/pkg-descr create mode 100644 textproc/lemmatizer2/pkg-plist (limited to 'textproc') diff --git a/textproc/Makefile b/textproc/Makefile index bb2781ff1b3d..886e10a6ec69 100644 --- a/textproc/Makefile +++ b/textproc/Makefile @@ -243,6 +243,7 @@ SUBDIR += latex2html SUBDIR += latte SUBDIR += lemmatizer + SUBDIR += lemmatizer2 SUBDIR += libcroco SUBDIR += libebml SUBDIR += libextractor diff --git a/textproc/lemmatizer2/Makefile b/textproc/lemmatizer2/Makefile new file mode 100644 index 000000000000..8a7c5b2ba003 --- /dev/null +++ b/textproc/lemmatizer2/Makefile @@ -0,0 +1,54 @@ +# New ports collection makefile for: lemmatizer2 +# Date created: 9 Mar 2007 +# Whom: Andrei V. Shetuhin +# +# $FreeBSD$ +# + +PORTNAME= lemmatizer2 +PORTVERSION= 2.0.1 +CATEGORIES= textproc +MASTER_SITES= http://reki.ru/products/lemmatizer/ \ + http://havoc.ru/products/lemmatizer/ +DISTFILES= lemmatizer2-2.0.1.tar.gz \ + rus-src-morph.tar.gz \ + eng-src-morph.tar.gz \ + ger-src-morph.tar.gz + +MAINTAINER= reki@reki.ru +COMMENT= Russian / German / English lemmatizer library + +BUILD_DEPENDS= cmake:${PORTSDIR}/devel/cmake +LIB_DEPENDS= pcre:${PORTSDIR}/devel/pcre + +DB_DIR?= ${DESTDIR}/var/db/lemmatizer + +DIST_SUBDIR= ${PORTNAME} + +USE_LDCONFIG= ${PREFIX}/lib/lemmatizer2 + +pre-fetch: + @${ECHO} "" + @${ECHO} "You may use the following build options:" + @${ECHO} "" + @${ECHO} " DB_DIR=directory Set alternate directory for database files" + @${ECHO} " (default is ${DESTDIR}/var/db/lemmatizer)" + @${ECHO} "" + +do-configure: + @cd ${WRKSRC} && ${LOCALBASE}/bin/cmake ${WRKSRC} + +post-extract: + @cd ${WRKDIR} && ${CP} -r Dicts ${WRKSRC} + +generatemorph: + @cd ${WRKSRC} && RML=${WRKSRC};export RML && ./GenerateMorph.sh Russian + @cd ${WRKSRC} && RML=${WRKSRC};export RML && ./GenerateMorph.sh English + @cd ${WRKSRC} && RML=${WRKSRC};export RML && ./GenerateMorph.sh German + +installmorph: + @${MKDIR} ${DB_DIR} && ${MKDIR} ${DB_DIR}/Bin \ + && cd ${WRKSRC} && ${CP} ./Bin/rml.ini ${DB_DIR}/Bin \ + && ${CP} -r Dicts ${DB_DIR} + +.include diff --git a/textproc/lemmatizer2/distinfo b/textproc/lemmatizer2/distinfo new file mode 100644 index 000000000000..9cc4f1201c8f --- /dev/null +++ b/textproc/lemmatizer2/distinfo @@ -0,0 +1,12 @@ +MD5 (lemmatizer2/lemmatizer2-2.0.1.tar.gz) = 718ca416af9fa3aed6f032f6f48073c1 +SHA256 (lemmatizer2/lemmatizer2-2.0.1.tar.gz) = c5254b288e6bbe447639985e893ab6a5d00be5993575c64352b305df088404d3 +SIZE (lemmatizer2/lemmatizer2-2.0.1.tar.gz) = 440639 +MD5 (lemmatizer2/rus-src-morph.tar.gz) = c7508fd6964e66b7248df0bdfa1671fe +SHA256 (lemmatizer2/rus-src-morph.tar.gz) = 6ff6caa4771a89c69ce1cd582b7994b87f321bb457b64de8c431d8a434f95992 +SIZE (lemmatizer2/rus-src-morph.tar.gz) = 1162534 +MD5 (lemmatizer2/eng-src-morph.tar.gz) = de917c1f85b3fc108c3d405681370378 +SHA256 (lemmatizer2/eng-src-morph.tar.gz) = 654d4ffe70c036d2e1b7449cac7ac7f0e2df502de51a4b88d90e7bf7fa3396b2 +SIZE (lemmatizer2/eng-src-morph.tar.gz) = 415748 +MD5 (lemmatizer2/ger-src-morph.tar.gz) = 4acd751c3727df23957af8d5c997d752 +SHA256 (lemmatizer2/ger-src-morph.tar.gz) = 145a592327d8c07dd4b4b5b4effe8de1a637a3c35126a372e904c4884b354495 +SIZE (lemmatizer2/ger-src-morph.tar.gz) = 1247810 diff --git a/textproc/lemmatizer2/pkg-descr b/textproc/lemmatizer2/pkg-descr new file mode 100644 index 000000000000..2efbb5e67dac --- /dev/null +++ b/textproc/lemmatizer2/pkg-descr @@ -0,0 +1,9 @@ +Library for automatic morphological analysis of English, +Russian and German Languages. Version 2. +Finds the lemmas (all forms) of a word. +Written in C++. + +WWW: http://www.aot.ru/ +- Andrei V. Shetuhin +slonik-v-domene@mail.ru +reki@reki.ru diff --git a/textproc/lemmatizer2/pkg-plist b/textproc/lemmatizer2/pkg-plist new file mode 100644 index 000000000000..96867478f890 --- /dev/null +++ b/textproc/lemmatizer2/pkg-plist @@ -0,0 +1,102 @@ +bin/FileLem +bin/LemClientTest +bin/MorphGen +bin/StructDictLoader +bin/TestLem +include/lemmatizer2/AgramtabLib/EngGramTab.h +include/lemmatizer2/AgramtabLib/GerGramTab.h +include/lemmatizer2/AgramtabLib/RusGramTab.h +include/lemmatizer2/AgramtabLib/StdGramtab.h +include/lemmatizer2/AgramtabLib/agramtab_.h +include/lemmatizer2/AgramtabLib/eng_consts.h +include/lemmatizer2/AgramtabLib/ger_consts.h +include/lemmatizer2/AgramtabLib/morph_const.h +include/lemmatizer2/AgramtabLib/rus_consts.h +include/lemmatizer2/GraphanLib/Consent.h +include/lemmatizer2/GraphanLib/Descriptors.h +include/lemmatizer2/GraphanLib/GraphanDicts.h +include/lemmatizer2/GraphanLib/GraphmatFile.h +include/lemmatizer2/GraphanLib/HTMLConv.h +include/lemmatizer2/GraphanLib/StdGraph.h +include/lemmatizer2/GraphanLib/UnitHolder.h +include/lemmatizer2/GraphanLib/abbrev.h +include/lemmatizer2/GraphanLib/graline.h +include/lemmatizer2/LemClient.hpp +include/lemmatizer2/LemmatizerLib/Lemmatizers.h +include/lemmatizer2/LemmatizerLib/MorphAutomBuilder.h +include/lemmatizer2/LemmatizerLib/MorphAutomat.h +include/lemmatizer2/LemmatizerLib/MorphDict.h +include/lemmatizer2/LemmatizerLib/MorphDictBuilder.h +include/lemmatizer2/LemmatizerLib/PLMLineCollection.h +include/lemmatizer2/LemmatizerLib/Paradigm.h +include/lemmatizer2/LemmatizerLib/Predict.h +include/lemmatizer2/LemmatizerLib/StaticVectorMap.h +include/lemmatizer2/LemmatizerLib/Statistic.h +include/lemmatizer2/LemmatizerLib/StdMorph.h +include/lemmatizer2/MorphWizardLib/FormInfo.h +include/lemmatizer2/MorphWizardLib/OperationMeter.h +include/lemmatizer2/MorphWizardLib/wizard.h +include/lemmatizer2/PCRE/pcre_rml.h +include/lemmatizer2/StructDictLib/Domen.h +include/lemmatizer2/StructDictLib/Field.h +include/lemmatizer2/StructDictLib/ItemsContainer.h +include/lemmatizer2/StructDictLib/LessDomItem.h +include/lemmatizer2/StructDictLib/Ross.h +include/lemmatizer2/StructDictLib/Signat.h +include/lemmatizer2/StructDictLib/StdRoss.h +include/lemmatizer2/StructDictLib/StructDictConsts.h +include/lemmatizer2/StructDictLib/TempArticle.h +include/lemmatizer2/StructDictLib/TextField.h +include/lemmatizer2/common/CExpc.h +include/lemmatizer2/common/COMSyntaxHolder.h +include/lemmatizer2/common/Chunk.h +include/lemmatizer2/common/DDCInternalError.h +include/lemmatizer2/common/DDC_common.h +include/lemmatizer2/common/DwdsThesaurus.h +include/lemmatizer2/common/GramInfo.h +include/lemmatizer2/common/Graspace.h +include/lemmatizer2/common/MorphologyHolder.h +include/lemmatizer2/common/PlmLine.h +include/lemmatizer2/common/SyntaxHolder.h +include/lemmatizer2/common/cgic.h +include/lemmatizer2/common/cortege.h +include/lemmatizer2/common/imports.h +include/lemmatizer2/common/rus_numerals.h +include/lemmatizer2/common/string_socket.h +include/lemmatizer2/common/string_tokenizer.h +include/lemmatizer2/common/util_classes.h +include/lemmatizer2/common/utilit.h +lib/lemmatizer2/libAgramtab-st.a +lib/lemmatizer2/libAgramtab.so +lib/lemmatizer2/libAgramtab.so.2 +lib/lemmatizer2/libAgramtab.so.2.0.1 +lib/lemmatizer2/libGraphan-st.a +lib/lemmatizer2/libGraphan.so +lib/lemmatizer2/libGraphan.so.2 +lib/lemmatizer2/libGraphan.so.2.0.1 +lib/lemmatizer2/libLemClient-st.a +lib/lemmatizer2/libLemClient.so +lib/lemmatizer2/libLemClient.so.2 +lib/lemmatizer2/libLemClient.so.2.0.1 +lib/lemmatizer2/libLemmatizer-st.a +lib/lemmatizer2/libLemmatizer.so +lib/lemmatizer2/libLemmatizer.so.2 +lib/lemmatizer2/libLemmatizer.so.2.0.1 +lib/lemmatizer2/libMorphWizard-st.a +lib/lemmatizer2/libMorphWizard.so +lib/lemmatizer2/libMorphWizard.so.2 +lib/lemmatizer2/libMorphWizard.so.2.0.1 +lib/lemmatizer2/libStructDict-st.a +lib/lemmatizer2/libStructDict.so +lib/lemmatizer2/libStructDict.so.2 +lib/lemmatizer2/libStructDict.so.2.0.1 +@dirrm include/lemmatizer2/AgramtabLib +@dirrm include/lemmatizer2/GraphanLib +@dirrm include/lemmatizer2/LemmatizerLib +@dirrm include/lemmatizer2/MorphWizardLib +@dirrm include/lemmatizer2/StructDictLib +@dirrm include/lemmatizer2/PCRE +@dirrm include/lemmatizer2/common +@dirrm include/lemmatizer2 +@dirrm lib/lemmatizer2 +@unexec echo "If you going to remove lemmatizer permanently, you should also remove dictionaries directory. E.g.: rm -rf /var/db/lemmatize" | /usr/bin/fmt -- cgit v1.2.3