aboutsummaryrefslogtreecommitdiff
path: root/graphics/ocrad
diff options
context:
space:
mode:
authorSergei Kolobov <sergei@FreeBSD.org>2003-11-20 21:30:40 +0000
committerSergei Kolobov <sergei@FreeBSD.org>2003-11-20 21:30:40 +0000
commitacc9ba9bb2d27546775b7e3f2322cb6b7d008314 (patch)
tree013c02e02b7924ac4f07966dca68e917b86ccee7 /graphics/ocrad
parent694d56942864e53da03bc27308a2c4dc676f8488 (diff)
downloadports-acc9ba9bb2d27546775b7e3f2322cb6b7d008314.tar.gz
ports-acc9ba9bb2d27546775b7e3f2322cb6b7d008314.zip
Add ocrad 0.5, OCR program implemented as filter.
GNU Ocrad is an OCR (Optical Character Recognition) program implemented as a filter and based on a feature extraction method. It reads a bitmap image in pbm format and outputs text in ISO-8859-1 (Latin-1) charset. Also includes a layout analyser able to separate the columns or blocks of text normally found on printed pages. It can be used as a stand-alone console application, or as a backend to other programs. PR: 59232 Submitted by: Ulrich Spoerlein <q@uni.de>
Notes
Notes: svn path=/head/; revision=94543
Diffstat (limited to 'graphics/ocrad')
-rw-r--r--graphics/ocrad/Makefile33
-rw-r--r--graphics/ocrad/distinfo1
-rw-r--r--graphics/ocrad/files/patch-Makefile.in45
-rw-r--r--graphics/ocrad/files/patch-bitmap.cc34
-rw-r--r--graphics/ocrad/files/patch-character.h10
-rw-r--r--graphics/ocrad/files/patch-configure29
-rw-r--r--graphics/ocrad/files/patch-iso_8859_1.h14
-rw-r--r--graphics/ocrad/files/patch-main.cc21
-rw-r--r--graphics/ocrad/files/patch-textline.cc146
-rw-r--r--graphics/ocrad/pkg-descr9
-rw-r--r--graphics/ocrad/pkg-plist8
11 files changed, 350 insertions, 0 deletions
diff --git a/graphics/ocrad/Makefile b/graphics/ocrad/Makefile
new file mode 100644
index 000000000000..29d747d48b03
--- /dev/null
+++ b/graphics/ocrad/Makefile
@@ -0,0 +1,33 @@
+# New ports collection makefile for: graphics/ocrad
+# Date created: 12.11.2003
+# Whom: Ulrich Spoerlein <q@uni.de>
+#
+# $FreeBSD$
+#
+
+PORTNAME= ocrad
+PORTVERSION= 0.5
+CATEGORIES= graphics
+MASTER_SITES= ${MASTER_SITE_GNU}
+MASTER_SITE_SUBDIR= ${PORTNAME}
+
+MAINTAINER= q@uni.de
+COMMENT= OCR program implemented as filter
+
+USE_BZIP2= yes
+HAS_CONFIGURE= yes
+USE_GETOPT_LONG= yes
+
+CONFIGURE_ARGS= --prefix=${PREFIX}
+MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}"
+
+DOCS= AUTHORS ChangeLog NEWS README TODO
+INFO= ocrad
+
+.if !defined(NOPORTDOCS)
+post-install:
+ @${MKDIR} ${DOCSDIR}
+ cd ${WRKSRC} && ${INSTALL_DATA} ${DOCS} ${DOCSDIR}
+.endif
+
+.include <bsd.port.mk>
diff --git a/graphics/ocrad/distinfo b/graphics/ocrad/distinfo
new file mode 100644
index 000000000000..f91e66017830
--- /dev/null
+++ b/graphics/ocrad/distinfo
@@ -0,0 +1 @@
+MD5 (ocrad-0.5.tar.bz2) = 75bdfda680ddeede5dafa523a16c7191
diff --git a/graphics/ocrad/files/patch-Makefile.in b/graphics/ocrad/files/patch-Makefile.in
new file mode 100644
index 000000000000..56d780e5a694
--- /dev/null
+++ b/graphics/ocrad/files/patch-Makefile.in
@@ -0,0 +1,45 @@
+--- Makefile.in.orig Sat Oct 18 01:29:16 2003
++++ Makefile.in Sun Nov 16 18:18:58 2003
+@@ -4,13 +4,14 @@
+
+ DISTNAME = ocrad-0.5
+
+-CXX = g++
+-INSTALL = install
+-INSTALL_PROGRAM = $(INSTALL)
+-INSTALL_DATA = $(INSTALL) -m 644
+-SHELL = /bin/sh
+-CXXFLAGS = -Wall -W -O2
+-LDFLAGS =
++CXX?= g++
++INSTALL?= install
++INSTALL_PROGRAM?= $(INSTALL)
++INSTALL_DATA?= $(INSTALL) -m 644
++SHELL?= /bin/sh
++CXXFLAGS?= -Wall -W -O2
++CPPFLAGS?=
++LDFLAGS?=
+
+ objs = common.o rectangle.o iso_8859_1.o bitmap.o block.o blockmap.o \
+ profile.o feats.o character.o recognize1.o \
+@@ -22,10 +23,10 @@
+ all : ocrad
+
+ ocrad : $(objs)
+- $(CXX) $(LDFLAGS) $(CXXFLAGS) -o ocrad $(objs)
++ $(CXX) $(LDFLAGS) $(CXXFLAGS) $(CPPFLAGS) -o ocrad $(objs)
+
+ ocradp : $(objs)
+- $(CXX) $(LDFLAGS) $(CXXFLAGS) -pg -o ocradp $(objs)
++ $(CXX) $(LDFLAGS) $(CXXFLAGS) $(CPPFLAGS) -pg -o ocradp $(objs)
+
+ %.o : %.cc
+ $(CXX) $(CXXFLAGS) -c -o $@ $<
+@@ -42,6 +43,7 @@
+ textline.o : block.h character.h iso_8859_1.h textline.h
+ textblock.o : block.h character.h textline.h textblock.h
+ main.o : block.h blockmap.h bitmap.h character.h textline.h textblock.h
++ $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c main.cc
+
+
+ install : all install-info
diff --git a/graphics/ocrad/files/patch-bitmap.cc b/graphics/ocrad/files/patch-bitmap.cc
new file mode 100644
index 000000000000..4768997c17b1
--- /dev/null
+++ b/graphics/ocrad/files/patch-bitmap.cc
@@ -0,0 +1,34 @@
+--- bitmap.cc.orig Sun Nov 16 17:28:14 2003
++++ bitmap.cc Sun Nov 16 17:33:30 2003
+@@ -24,12 +24,11 @@
+ #include "rectangle.h"
+ #include "bitmap.h"
+
+-
+ namespace {
+
+ char pbm_getrawbyte( FILE * f ) throw( Bitmap::Error )
+ {
+- int ch = std::getc( f );
++ int ch = getc( f );
+
+ if( ch == EOF )
+ throw Bitmap::Error( "end-of-file reading pbm file.\n" );
+@@ -58,7 +57,7 @@
+ char ch;
+ int i = 0;
+
+- do ch = pbm_getc( f ); while( std::isspace( ch ) );
++ do ch = pbm_getc( f ); while( isspace( ch ) );
+ if( !std::isdigit( ch ) )
+ throw Bitmap::Error( "junk in pbm file where an integer should be.\n" );
+ do { i = (i * 10) + (ch - '0'); ch = pbm_getc( f ); }
+@@ -71,7 +70,7 @@
+ {
+ char ch;
+
+- do ch = pbm_getc( f ); while( std::isspace( ch ) );
++ do ch = pbm_getc( f ); while( isspace( ch ) );
+
+ if( ch == '0' ) return false;
+ if( ch == '1' ) return true;
diff --git a/graphics/ocrad/files/patch-character.h b/graphics/ocrad/files/patch-character.h
new file mode 100644
index 000000000000..5dacbaf8555a
--- /dev/null
+++ b/graphics/ocrad/files/patch-character.h
@@ -0,0 +1,10 @@
+--- character.h.orig Sun Nov 16 17:40:28 2003
++++ character.h Sun Nov 16 17:40:44 2003
+@@ -64,6 +64,7 @@
+ void swap_guesses( int i, int j ) throw();
+ const Guess & guess( int i ) const throw();
+ int guesses() const throw() { return _guess.size(); }
++#undef isalnum
+ bool isalnum() const throw();
+
+ void join( Character & c ) throw();
diff --git a/graphics/ocrad/files/patch-configure b/graphics/ocrad/files/patch-configure
new file mode 100644
index 000000000000..802fa316e8ee
--- /dev/null
+++ b/graphics/ocrad/files/patch-configure
@@ -0,0 +1,29 @@
+--- configure.orig Mon Aug 25 00:07:09 2003
++++ configure Wed Nov 12 22:50:24 2003
+@@ -13,7 +13,7 @@
+ while true ; do
+
+ # Break out if there are no more args
+- if [ $# == 0 ]; then break ; fi
++ if [ $# = 0 ]; then break ; fi
+
+ # Get the first arg, and shuffle
+ option=$1
+@@ -48,7 +48,7 @@
+
+ # Find the source files, if location was not specified.
+ srcdirtext=
+-if [ x${srcdir} == x ]; then
++if [ x${srcdir} = x ]; then
+ srcdirtext="or . or .." ; srcdir=.
+ if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
+ if [ ! -r ${srcdir}/${srctrigger} ] ; then
+@@ -65,7 +65,7 @@
+ fi
+
+ # Set srcdir to . if that's what it is.
+-if [ $(pwd) == $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
++if [ $(pwd) = $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi
+
+ # write variables to config file.
+ rm -f Makefile
diff --git a/graphics/ocrad/files/patch-iso_8859_1.h b/graphics/ocrad/files/patch-iso_8859_1.h
new file mode 100644
index 000000000000..9d07257db560
--- /dev/null
+++ b/graphics/ocrad/files/patch-iso_8859_1.h
@@ -0,0 +1,14 @@
+--- iso_8859_1.h.orig Sun Nov 16 17:14:19 2003
++++ iso_8859_1.h Sun Nov 16 17:33:51 2003
+@@ -100,6 +100,11 @@
+ static unsigned char base_letter( unsigned char ch ) throw();
+ static unsigned char compose( unsigned char base_letter,
+ unsigned char accent ) throw();
++#undef isalnum
++#undef isalpha
++#undef isupper
++#undef isvowel
++#undef toupper
+ static bool isalnum( unsigned char ch ) throw();
+ static bool isalpha( unsigned char ch ) throw();
+ static bool isupper( unsigned char ch ) throw();
diff --git a/graphics/ocrad/files/patch-main.cc b/graphics/ocrad/files/patch-main.cc
new file mode 100644
index 000000000000..e0497bf2e2f6
--- /dev/null
+++ b/graphics/ocrad/files/patch-main.cc
@@ -0,0 +1,21 @@
+--- main.cc.orig Sat Oct 18 01:27:29 2003
++++ main.cc Sun Nov 16 18:16:41 2003
+@@ -28,6 +28,7 @@
+ #include <cstring>
+ #include <vector>
+ #include <getopt.h>
++#include <libgen.h>
+ #include "common.h"
+ #include "rectangle.h"
+ #include "bitmap.h"
+@@ -289,8 +290,8 @@
+ {
+ if( infile == stdin )
+ {
+- std::ungetc( std::getc( infile ), infile );
+- if( std::feof( infile ) || std::ferror( infile ) ) infile = 0;
++ ungetc( getc( infile ), infile );
++ if( feof( infile ) || ferror( infile ) ) infile = 0;
+ }
+ while( infile != stdin )
+ {
diff --git a/graphics/ocrad/files/patch-textline.cc b/graphics/ocrad/files/patch-textline.cc
new file mode 100644
index 000000000000..804a281ab3fa
--- /dev/null
+++ b/graphics/ocrad/files/patch-textline.cc
@@ -0,0 +1,146 @@
+--- textline.cc.orig Sun Nov 16 17:39:59 2003
++++ textline.cc Sun Nov 16 17:44:01 2003
+@@ -33,7 +33,7 @@
+ for( ; end < characters(); ++end )
+ {
+ Character & c = character( end );
+- if( c.guesses() && std::isspace( c.guess(0).ch ) ) break;
++ if( c.guesses() && isspace( c.guess(0).ch ) ) break;
+ }
+ return end;
+ }
+@@ -264,11 +264,11 @@
+ if( c1.guesses() == 1 )
+ {
+ unsigned char ch = c1.guess( 0 ).ch;
+- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
++ if( isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'u' && ch != 'v' &&
+ ch != 'w' && ch != 'x' && ch != 'z' ) continue;
+ if( 4 * c1.height() > 5 * mean_height() )
+- { c1.only_guess( std::toupper( ch ), 0 ); continue; }
++ { c1.only_guess( toupper( ch ), 0 ); continue; }
+ if( 5 * c1.height() < 4 * mean_height() ) continue;
+ for( int j = begin; j < characters(); ++j ) if( j != i )
+ {
+@@ -276,12 +276,12 @@
+ if( c2.guesses() >= 1 )
+ {
+ unsigned char ch2 = c2.guess( 0 ).ch;
+- if( std::isspace( ch2 ) ) break;
+- if( ( std::isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
+- ( ( std::isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
++ if( isspace( ch2 ) ) break;
++ if( ( isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) ||
++ ( ( isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' &&
+ ( c1.height() >= c2.height() ||
+ Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
+- { c1.insert_guess( 0, std::toupper( ch ), 1 ); break; }
++ { c1.insert_guess( 0, toupper( ch ), 1 ); break; }
+ }
+ }
+ }
+@@ -310,7 +310,7 @@
+ if( c1.guesses() >= 1 )
+ {
+ unsigned char ch = c1.guess( 0 ).ch;
+- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
++ if( isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( ch <= 127 || c1.block_vector().size() < 2 ) continue;
+ int chb = ISO_8859_1::base_letter( ch );
+ if( chb != 'o' && chb != 'u' ) continue;
+@@ -323,11 +323,11 @@
+ unsigned char ch2 = c2.guess( 0 ).ch;
+ int ch2b = ISO_8859_1::base_letter( ch2 );
+ if( !ch2b && ch2 > 127 ) continue;
+- if( std::isspace( ch2 ) ) break;
+- if( ( std::isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
+- ( std::isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
+- ( std::isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
+- ( std::isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
++ if( isspace( ch2 ) ) break;
++ if( ( isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) ||
++ ( isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) ||
++ ( isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) ||
++ ( isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) )
+ { c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; }
+ }
+ }
+@@ -341,7 +341,7 @@
+ if( c1.guesses() >= 1 )
+ {
+ unsigned char ch = c1.guess( 0 ).ch;
+- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
++ if( isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( ch != 'o' && ch != 'O' && ch != 'l' ) continue;
+ for( int j = begin; j < characters(); ++j ) if( j != i )
+ {
+@@ -349,8 +349,8 @@
+ if( c2.guesses() >= 1 )
+ {
+ unsigned char ch2 = c2.guess( 0 ).ch;
+- if( std::isspace( ch2 ) ) break;
+- if( std::isdigit( ch2 ) )
++ if( isspace( ch2 ) ) break;
++ if( isdigit( ch2 ) )
+ {
+ if( Ocrad::similar( c1.height(), c2.height(), 10 ) )
+ c1.insert_guess( 0, (ch == 'l') ? '1' : '0', c1.guess(0).value + 1 );
+@@ -372,7 +372,7 @@
+ Character & c2 = character( i );
+ if( !c2.guesses() ) continue;
+ unsigned char ch = c2.guess( 0 ).ch;
+- if( !std::isalnum( ch ) && ch != '.' && ch != '|' ) continue;
++ if( !isalnum( ch ) && ch != '.' && ch != '|' ) continue;
+ switch( ch )
+ {
+ case 'g': case 'j': case 'p': case 'q': case 'y':
+@@ -426,14 +426,14 @@
+ if( i < characters() - 1 && character( i + 1 ).guesses() )
+ rch = character( i + 1 ).guess( 0 ).ch;
+ if( ISO_8859_1::isupper( rch ) &&
+- ( !lch || ISO_8859_1::isupper( lch ) || std::isspace( lch ) ) )
++ ( !lch || ISO_8859_1::isupper( lch ) || isspace( lch ) ) )
+ { c.insert_guess( 0, 'I', 1 ); continue; }
+ if( ch == 'l' ) continue;
+ if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) )
+ { c.insert_guess( 0, 'l', 1 ); continue; }
+- if( rch == '|' && ( !lch || std::isspace( lch ) ) &&
++ if( rch == '|' && ( !lch || isspace( lch ) ) &&
+ i < characters() - 2 && character( i + 2 ).guesses() &&
+- std::isalpha( character( i + 2 ).guess( 0 ).ch ) )
++ isalpha( character( i + 2 ).guess( 0 ).ch ) )
+ { c.insert_guess( 0, 'l', 1 ); continue; }
+ }
+ }
+@@ -475,7 +475,7 @@
+ if( c.guesses() )
+ {
+ unsigned char ch = c.guess( 0 ).ch;
+- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
++ if( isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( i == begin && ch == 'a' && c.guesses() == 2 &&
+ c.guess( 1 ).ch == 'Q' && 4 * c.height() > 5 * mean_height() )
+ c.swap_guesses( 0, 1 );
+@@ -501,7 +501,7 @@
+ if( c1.guesses() )
+ {
+ unsigned char ch = c1.guess(0).ch;
+- if( std::isspace( ch ) ) { begin = i + 1 ; continue; }
++ if( isspace( ch ) ) { begin = i + 1 ; continue; }
+ if( c1.guesses() != 2 || ch != 'B' || c1.guess(1).ch != 'a' ) continue;
+ if( 4 * c1.height() > 5 * mean_height() ) continue;
+ for( int j = begin; j < characters(); ++j ) if( j != i )
+@@ -510,9 +510,9 @@
+ if( c2.guesses() >= 1 )
+ {
+ unsigned char ch2 = c2.guess(0).ch;
+- if( std::isspace( ch2 ) ) break;
+- if( ( std::isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
+- ( std::islower( ch2 ) &&
++ if( isspace( ch2 ) ) break;
++ if( ( isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) ||
++ ( islower( ch2 ) &&
+ ( c1.height() <= c2.height() ||
+ Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) )
+ { c1.swap_guesses( 0, 1 ); break; }
diff --git a/graphics/ocrad/pkg-descr b/graphics/ocrad/pkg-descr
new file mode 100644
index 000000000000..844adfc1ce83
--- /dev/null
+++ b/graphics/ocrad/pkg-descr
@@ -0,0 +1,9 @@
+GNU Ocrad is an OCR (Optical Character Recognition) program implemented
+as a filter and based on a feature extraction method. It reads a bitmap
+image in pbm format and outputs text in ISO-8859-1 (Latin-1) charset.
+Also includes a layout analyser able to separate the columns or blocks
+of text normally found on printed pages.
+It can be used as a stand-alone console application, or as a backend to
+other programs.
+
+WWW: http://www.gnu.org/software/ocrad/ocrad.html
diff --git a/graphics/ocrad/pkg-plist b/graphics/ocrad/pkg-plist
new file mode 100644
index 000000000000..c3269db7f09f
--- /dev/null
+++ b/graphics/ocrad/pkg-plist
@@ -0,0 +1,8 @@
+@comment $FreeBSD$
+bin/ocrad
+%%PORTDOCS%%%%DOCSDIR%%/AUTHORS
+%%PORTDOCS%%%%DOCSDIR%%/ChangeLog
+%%PORTDOCS%%%%DOCSDIR%%/NEWS
+%%PORTDOCS%%%%DOCSDIR%%/README
+%%PORTDOCS%%%%DOCSDIR%%/TODO
+%%PORTDOCS%%@dirrm %%DOCSDIR%%