diff options
author | Pav Lucistnik <pav@FreeBSD.org> | 2003-12-31 00:17:41 +0000 |
---|---|---|
committer | Pav Lucistnik <pav@FreeBSD.org> | 2003-12-31 00:17:41 +0000 |
commit | 8b94b4ebc5eae120364b0df0733f6e8ffcda5047 (patch) | |
tree | 0723cf60067434039b4bb64fcd5ad3fbf4f7a660 /graphics/ocrad | |
parent | f1552e21b1179b88102346dc70b8da46ce79322d (diff) | |
download | ports-8b94b4ebc5eae120364b0df0733f6e8ffcda5047.tar.gz ports-8b94b4ebc5eae120364b0df0733f6e8ffcda5047.zip |
Notes
Diffstat (limited to 'graphics/ocrad')
-rw-r--r-- | graphics/ocrad/Makefile | 22 | ||||
-rw-r--r-- | graphics/ocrad/distinfo | 2 | ||||
-rw-r--r-- | graphics/ocrad/files/ocrad.1 | 114 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-Makefile.in | 10 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-bitmap.cc | 34 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-character.h | 10 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-configure | 29 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-iso_8859_1.h | 9 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-main.cc | 21 | ||||
-rw-r--r-- | graphics/ocrad/files/patch-textline.cc | 146 |
10 files changed, 144 insertions, 253 deletions
diff --git a/graphics/ocrad/Makefile b/graphics/ocrad/Makefile index 29d747d48b03..a03f13295923 100644 --- a/graphics/ocrad/Makefile +++ b/graphics/ocrad/Makefile @@ -6,7 +6,7 @@ # PORTNAME= ocrad -PORTVERSION= 0.5 +PORTVERSION= 0.6 CATEGORIES= graphics MASTER_SITES= ${MASTER_SITE_GNU} MASTER_SITE_SUBDIR= ${PORTNAME} @@ -17,17 +17,33 @@ COMMENT= OCR program implemented as filter USE_BZIP2= yes HAS_CONFIGURE= yes USE_GETOPT_LONG= yes +USE_REINPLACE= yes CONFIGURE_ARGS= --prefix=${PREFIX} -MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}" +MAKE_ENV= CPPFLAGS="${CPPFLAGS}" LDFLAGS="${LDFLAGS}" \ + INSTALL_PROGRAM="${INSTALL_PROGRAM}" DOCS= AUTHORS ChangeLog NEWS README TODO INFO= ocrad +MAN1= ocrad.1 + +STD_PATCH= textline.cc recognize2.cc bitmap.cc main.cc + +.include <bsd.port.pre.mk> +post-patch: +.if (${OSVERSION} < 500000) && ! (defined(USE_GCC) && ${GCCVERSION} > 30000) +.for file in ${STD_PATCH} + @${REINPLACE_CMD} -e 's/std::isspace/isspace/g; s/std::getc/getc/g' \ + -e 's/std::ungetc/ungetc/g; s/std::feof/feof/g' \ + -e 's/std::ferror/ferror/g' ${WRKSRC}/${file} +.endfor +.endif .if !defined(NOPORTDOCS) post-install: @${MKDIR} ${DOCSDIR} cd ${WRKSRC} && ${INSTALL_DATA} ${DOCS} ${DOCSDIR} + @${INSTALL_MAN} ${FILESDIR}/ocrad.1 ${PREFIX}/man/man1 .endif -.include <bsd.port.mk> +.include <bsd.port.post.mk> diff --git a/graphics/ocrad/distinfo b/graphics/ocrad/distinfo index f91e66017830..dcacc5f0109c 100644 --- a/graphics/ocrad/distinfo +++ b/graphics/ocrad/distinfo @@ -1 +1 @@ -MD5 (ocrad-0.5.tar.bz2) = 75bdfda680ddeede5dafa523a16c7191 +MD5 (ocrad-0.6.tar.bz2) = ebcefd3512a4f9d870d302167d8b8ec9 diff --git a/graphics/ocrad/files/ocrad.1 b/graphics/ocrad/files/ocrad.1 new file mode 100644 index 000000000000..a99b2fdfdd73 --- /dev/null +++ b/graphics/ocrad/files/ocrad.1 @@ -0,0 +1,114 @@ +.TH OCRAD 1 "30 December 2003" "0.6" "GNU Ocrad" +.SH NAME +ocrad \- Optical Character Recognition +.SH SYNOPSIS +.I ocrad +\-afhivV \-b NUMBER \-l MODE \-o FILE \-x FILE [FILES ...] +.Sh DESCRIPTION +.LP +.I ocrad +is an OCR (Optical Character Recognition) program +implemented as a filter and based on a feature extraction method. It +reads a bitmap image in pbm format and outputs text in ISO\-8859\-1 +(Latin\-1) charset. Also includes a layout analyser able to separate +the columns or blocks of text normally found on printed pages. It can +be used as a stand\-alone console application, or as a backend to other +programs. +.SH OPTIONS +.TP +.I "\-a", "\-\-append" +Append generated text to the output file instead of overwriting it. +.TP +.I "\-b NUMBER", "\-\-block=NUMBER" +Process only the specified text block, beginning from 1. +Is only useful when used in conjunction with layout analysis (see below). +.TP +.I "\-D LEVEL", "\-\-debug=LEVEL" +The Levels are: +.nf +100 - Show raw block list, unordered + 99 - Show recursive block list, unordered + 98 - Show main block list, unordered + 97 - Show recursive block list, ordered + 96 - Show main block list, ordered + 95..90 - reserved + 89 - Show all blocks from every character + 88 - Show main black blocks from every character + 87 - Show guess list for every character + 86 - Show best guess for every character +.fi +.TP +.I "\-f", "\-\-force" +Force overwrite of output file. +.TP +.I "\-h", "\-\-help" +Print an informative help message describing the options and then exit. +.TP +.I "\-i", "\-\-invert" +Invert image levels (white on black). +.TP +.I "\-l MODE", "\-\-layout=MODE" +Enable page layout analysis. The meaning of +.I MODE +is: +.nf +`0' no analysis at all, +`1' column separation, +`2' full analysis. +.fi +.TP +.I "\-o FILE" +Place the output into +.I FILE +instead of into the standard output. +.TP +.I "\-v", "\-\-verbose" +Verbose mode. +.TP +.I "\-V", "\-\-version" +Print the version number of Ocrad on the standard output and then exit. +.TP +.I "\-x FILE" +Write (export) OCR Results File to +.I FILE +\. +.SH BUGS +If you find a bug in GNU Ocrad, please send electronic mail to +<bug-ocrad@gnu.org>. Include the version number, which you can find by +running `ocrad \-\-version'. +.SH CAVEATS +.IP \(bu 2 +Scan directly in b/w mode. Convert from grayscale only if you know what +you are doing. +.IP \(bu 2 +For better results the characters should be at least 20 pixels high. +.IP \(bu 2 +Merged characters are always a problem. Try to avoid them. +.IP \(bu 2 +Very bold or very light (broken) characters are also a problem. +.IP \(bu 2 +Always see with your own eyes the pbm file before blaming Ocrad for the +results. Remember the saying, "garbage in, garbage out". +.SH TODO +.IP \(bu 2 +Deal with broken characters. +.IP \(bu 2 +Make a better layout detector. Every character on its line. +.IP \(bu 2 +Separate (more) merged characters. +.IP \(bu 2 +Deal better with frames, lines, pictures, etc. +.IP \(bu 2 +Change to ISO_8859\-15 (update for ISO_8859\-1 with euro sign). +.IP \(bu 2 +Add an option for recognizing ISO_8859\-9 chars (Turkish). +.SH GETTING +.I ocrad +is available from http://www.gnu.org/software/ocrad/ocrad.html +.SH AUTHOR +.nf +Antonio Diaz <ant_diaz@teleline.es> +.fi +.SH HISTORY +.I ocrad +0.6 was released in December 2003. diff --git a/graphics/ocrad/files/patch-Makefile.in b/graphics/ocrad/files/patch-Makefile.in index 56d780e5a694..dae15cfc0777 100644 --- a/graphics/ocrad/files/patch-Makefile.in +++ b/graphics/ocrad/files/patch-Makefile.in @@ -1,8 +1,8 @@ ---- Makefile.in.orig Sat Oct 18 01:29:16 2003 -+++ Makefile.in Sun Nov 16 18:18:58 2003 +--- Makefile.in.orig Thu Dec 18 11:11:05 2003 ++++ Makefile.in Tue Dec 30 20:20:01 2003 @@ -4,13 +4,14 @@ - DISTNAME = ocrad-0.5 + DISTNAME = ocrad-0.6 -CXX = g++ -INSTALL = install @@ -35,8 +35,8 @@ %.o : %.cc $(CXX) $(CXXFLAGS) -c -o $@ $< -@@ -42,6 +43,7 @@ - textline.o : block.h character.h iso_8859_1.h textline.h +@@ -43,6 +44,7 @@ + recognize2.o : block.h character.h iso_8859_1.h textline.h textblock.o : block.h character.h textline.h textblock.h main.o : block.h blockmap.h bitmap.h character.h textline.h textblock.h + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c main.cc diff --git a/graphics/ocrad/files/patch-bitmap.cc b/graphics/ocrad/files/patch-bitmap.cc deleted file mode 100644 index 4768997c17b1..000000000000 --- a/graphics/ocrad/files/patch-bitmap.cc +++ /dev/null @@ -1,34 +0,0 @@ ---- bitmap.cc.orig Sun Nov 16 17:28:14 2003 -+++ bitmap.cc Sun Nov 16 17:33:30 2003 -@@ -24,12 +24,11 @@ - #include "rectangle.h" - #include "bitmap.h" - -- - namespace { - - char pbm_getrawbyte( FILE * f ) throw( Bitmap::Error ) - { -- int ch = std::getc( f ); -+ int ch = getc( f ); - - if( ch == EOF ) - throw Bitmap::Error( "end-of-file reading pbm file.\n" ); -@@ -58,7 +57,7 @@ - char ch; - int i = 0; - -- do ch = pbm_getc( f ); while( std::isspace( ch ) ); -+ do ch = pbm_getc( f ); while( isspace( ch ) ); - if( !std::isdigit( ch ) ) - throw Bitmap::Error( "junk in pbm file where an integer should be.\n" ); - do { i = (i * 10) + (ch - '0'); ch = pbm_getc( f ); } -@@ -71,7 +70,7 @@ - { - char ch; - -- do ch = pbm_getc( f ); while( std::isspace( ch ) ); -+ do ch = pbm_getc( f ); while( isspace( ch ) ); - - if( ch == '0' ) return false; - if( ch == '1' ) return true; diff --git a/graphics/ocrad/files/patch-character.h b/graphics/ocrad/files/patch-character.h deleted file mode 100644 index 5dacbaf8555a..000000000000 --- a/graphics/ocrad/files/patch-character.h +++ /dev/null @@ -1,10 +0,0 @@ ---- character.h.orig Sun Nov 16 17:40:28 2003 -+++ character.h Sun Nov 16 17:40:44 2003 -@@ -64,6 +64,7 @@ - void swap_guesses( int i, int j ) throw(); - const Guess & guess( int i ) const throw(); - int guesses() const throw() { return _guess.size(); } -+#undef isalnum - bool isalnum() const throw(); - - void join( Character & c ) throw(); diff --git a/graphics/ocrad/files/patch-configure b/graphics/ocrad/files/patch-configure deleted file mode 100644 index 802fa316e8ee..000000000000 --- a/graphics/ocrad/files/patch-configure +++ /dev/null @@ -1,29 +0,0 @@ ---- configure.orig Mon Aug 25 00:07:09 2003 -+++ configure Wed Nov 12 22:50:24 2003 -@@ -13,7 +13,7 @@ - while true ; do - - # Break out if there are no more args -- if [ $# == 0 ]; then break ; fi -+ if [ $# = 0 ]; then break ; fi - - # Get the first arg, and shuffle - option=$1 -@@ -48,7 +48,7 @@ - - # Find the source files, if location was not specified. - srcdirtext= --if [ x${srcdir} == x ]; then -+if [ x${srcdir} = x ]; then - srcdirtext="or . or .." ; srcdir=. - if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi - if [ ! -r ${srcdir}/${srctrigger} ] ; then -@@ -65,7 +65,7 @@ - fi - - # Set srcdir to . if that's what it is. --if [ $(pwd) == $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi -+if [ $(pwd) = $(cd ${srcdir} ; pwd) ] ; then srcdir=. ; fi - - # write variables to config file. - rm -f Makefile diff --git a/graphics/ocrad/files/patch-iso_8859_1.h b/graphics/ocrad/files/patch-iso_8859_1.h index 9d07257db560..9e78ee087ed8 100644 --- a/graphics/ocrad/files/patch-iso_8859_1.h +++ b/graphics/ocrad/files/patch-iso_8859_1.h @@ -1,14 +1,15 @@ ---- iso_8859_1.h.orig Sun Nov 16 17:14:19 2003 -+++ iso_8859_1.h Sun Nov 16 17:33:51 2003 -@@ -100,6 +100,11 @@ +--- iso_8859_1.h.orig Wed Dec 3 12:12:01 2003 ++++ iso_8859_1.h Tue Dec 30 17:12:05 2003 +@@ -100,6 +100,12 @@ static unsigned char base_letter( unsigned char ch ) throw(); static unsigned char compose( unsigned char base_letter, unsigned char accent ) throw(); +#undef isalnum +#undef isalpha ++#undef islower +#undef isupper +#undef isvowel +#undef toupper static bool isalnum( unsigned char ch ) throw(); static bool isalpha( unsigned char ch ) throw(); - static bool isupper( unsigned char ch ) throw(); + static bool islower( unsigned char ch ) throw(); diff --git a/graphics/ocrad/files/patch-main.cc b/graphics/ocrad/files/patch-main.cc deleted file mode 100644 index e0497bf2e2f6..000000000000 --- a/graphics/ocrad/files/patch-main.cc +++ /dev/null @@ -1,21 +0,0 @@ ---- main.cc.orig Sat Oct 18 01:27:29 2003 -+++ main.cc Sun Nov 16 18:16:41 2003 -@@ -28,6 +28,7 @@ - #include <cstring> - #include <vector> - #include <getopt.h> -+#include <libgen.h> - #include "common.h" - #include "rectangle.h" - #include "bitmap.h" -@@ -289,8 +290,8 @@ - { - if( infile == stdin ) - { -- std::ungetc( std::getc( infile ), infile ); -- if( std::feof( infile ) || std::ferror( infile ) ) infile = 0; -+ ungetc( getc( infile ), infile ); -+ if( feof( infile ) || ferror( infile ) ) infile = 0; - } - while( infile != stdin ) - { diff --git a/graphics/ocrad/files/patch-textline.cc b/graphics/ocrad/files/patch-textline.cc deleted file mode 100644 index 804a281ab3fa..000000000000 --- a/graphics/ocrad/files/patch-textline.cc +++ /dev/null @@ -1,146 +0,0 @@ ---- textline.cc.orig Sun Nov 16 17:39:59 2003 -+++ textline.cc Sun Nov 16 17:44:01 2003 -@@ -33,7 +33,7 @@ - for( ; end < characters(); ++end ) - { - Character & c = character( end ); -- if( c.guesses() && std::isspace( c.guess(0).ch ) ) break; -+ if( c.guesses() && isspace( c.guess(0).ch ) ) break; - } - return end; - } -@@ -264,11 +264,11 @@ - if( c1.guesses() == 1 ) - { - unsigned char ch = c1.guess( 0 ).ch; -- if( std::isspace( ch ) ) { begin = i + 1 ; continue; } -+ if( isspace( ch ) ) { begin = i + 1 ; continue; } - if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'u' && ch != 'v' && - ch != 'w' && ch != 'x' && ch != 'z' ) continue; - if( 4 * c1.height() > 5 * mean_height() ) -- { c1.only_guess( std::toupper( ch ), 0 ); continue; } -+ { c1.only_guess( toupper( ch ), 0 ); continue; } - if( 5 * c1.height() < 4 * mean_height() ) continue; - for( int j = begin; j < characters(); ++j ) if( j != i ) - { -@@ -276,12 +276,12 @@ - if( c2.guesses() >= 1 ) - { - unsigned char ch2 = c2.guess( 0 ).ch; -- if( std::isspace( ch2 ) ) break; -- if( ( std::isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) || -- ( ( std::isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' && -+ if( isspace( ch2 ) ) break; -+ if( ( isalpha( ch2 ) && 4 * c1.height() > 5 * c2.height() ) || -+ ( ( isupper( ch2 ) || ch2 == 'l' ) && ch2 != 'B' && - ( c1.height() >= c2.height() || - Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) ) -- { c1.insert_guess( 0, std::toupper( ch ), 1 ); break; } -+ { c1.insert_guess( 0, toupper( ch ), 1 ); break; } - } - } - } -@@ -310,7 +310,7 @@ - if( c1.guesses() >= 1 ) - { - unsigned char ch = c1.guess( 0 ).ch; -- if( std::isspace( ch ) ) { begin = i + 1 ; continue; } -+ if( isspace( ch ) ) { begin = i + 1 ; continue; } - if( ch <= 127 || c1.block_vector().size() < 2 ) continue; - int chb = ISO_8859_1::base_letter( ch ); - if( chb != 'o' && chb != 'u' ) continue; -@@ -323,11 +323,11 @@ - unsigned char ch2 = c2.guess( 0 ).ch; - int ch2b = ISO_8859_1::base_letter( ch2 ); - if( !ch2b && ch2 > 127 ) continue; -- if( std::isspace( ch2 ) ) break; -- if( ( std::isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) || -- ( std::isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) || -- ( std::isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) || -- ( std::isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) -+ if( isspace( ch2 ) ) break; -+ if( ( isalpha( ch2 ) && 4 * b1.height() > 5 * c2.height() ) || -+ ( isupper( ch2 ) && Ocrad::similar( b1.height(), c2.height(), 10 ) ) || -+ ( isalpha( ch2b ) && 4 * c1.height() > 5 * c2.height() ) || -+ ( isupper( ch2b ) && Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) - { c1.insert_guess( 0, ISO_8859_1::toupper( ch ), 1 ); break; } - } - } -@@ -341,7 +341,7 @@ - if( c1.guesses() >= 1 ) - { - unsigned char ch = c1.guess( 0 ).ch; -- if( std::isspace( ch ) ) { begin = i + 1 ; continue; } -+ if( isspace( ch ) ) { begin = i + 1 ; continue; } - if( ch != 'o' && ch != 'O' && ch != 'l' ) continue; - for( int j = begin; j < characters(); ++j ) if( j != i ) - { -@@ -349,8 +349,8 @@ - if( c2.guesses() >= 1 ) - { - unsigned char ch2 = c2.guess( 0 ).ch; -- if( std::isspace( ch2 ) ) break; -- if( std::isdigit( ch2 ) ) -+ if( isspace( ch2 ) ) break; -+ if( isdigit( ch2 ) ) - { - if( Ocrad::similar( c1.height(), c2.height(), 10 ) ) - c1.insert_guess( 0, (ch == 'l') ? '1' : '0', c1.guess(0).value + 1 ); -@@ -372,7 +372,7 @@ - Character & c2 = character( i ); - if( !c2.guesses() ) continue; - unsigned char ch = c2.guess( 0 ).ch; -- if( !std::isalnum( ch ) && ch != '.' && ch != '|' ) continue; -+ if( !isalnum( ch ) && ch != '.' && ch != '|' ) continue; - switch( ch ) - { - case 'g': case 'j': case 'p': case 'q': case 'y': -@@ -426,14 +426,14 @@ - if( i < characters() - 1 && character( i + 1 ).guesses() ) - rch = character( i + 1 ).guess( 0 ).ch; - if( ISO_8859_1::isupper( rch ) && -- ( !lch || ISO_8859_1::isupper( lch ) || std::isspace( lch ) ) ) -+ ( !lch || ISO_8859_1::isupper( lch ) || isspace( lch ) ) ) - { c.insert_guess( 0, 'I', 1 ); continue; } - if( ch == 'l' ) continue; - if( ISO_8859_1::isalpha( lch ) || ISO_8859_1::isalpha( rch ) ) - { c.insert_guess( 0, 'l', 1 ); continue; } -- if( rch == '|' && ( !lch || std::isspace( lch ) ) && -+ if( rch == '|' && ( !lch || isspace( lch ) ) && - i < characters() - 2 && character( i + 2 ).guesses() && -- std::isalpha( character( i + 2 ).guess( 0 ).ch ) ) -+ isalpha( character( i + 2 ).guess( 0 ).ch ) ) - { c.insert_guess( 0, 'l', 1 ); continue; } - } - } -@@ -475,7 +475,7 @@ - if( c.guesses() ) - { - unsigned char ch = c.guess( 0 ).ch; -- if( std::isspace( ch ) ) { begin = i + 1 ; continue; } -+ if( isspace( ch ) ) { begin = i + 1 ; continue; } - if( i == begin && ch == 'a' && c.guesses() == 2 && - c.guess( 1 ).ch == 'Q' && 4 * c.height() > 5 * mean_height() ) - c.swap_guesses( 0, 1 ); -@@ -501,7 +501,7 @@ - if( c1.guesses() ) - { - unsigned char ch = c1.guess(0).ch; -- if( std::isspace( ch ) ) { begin = i + 1 ; continue; } -+ if( isspace( ch ) ) { begin = i + 1 ; continue; } - if( c1.guesses() != 2 || ch != 'B' || c1.guess(1).ch != 'a' ) continue; - if( 4 * c1.height() > 5 * mean_height() ) continue; - for( int j = begin; j < characters(); ++j ) if( j != i ) -@@ -510,9 +510,9 @@ - if( c2.guesses() >= 1 ) - { - unsigned char ch2 = c2.guess(0).ch; -- if( std::isspace( ch2 ) ) break; -- if( ( std::isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) || -- ( std::islower( ch2 ) && -+ if( isspace( ch2 ) ) break; -+ if( ( isalpha( ch2 ) && 5 * c1.height() < 4 * c2.height() ) || -+ ( islower( ch2 ) && - ( c1.height() <= c2.height() || - Ocrad::similar( c1.height(), c2.height(), 10 ) ) ) ) - { c1.swap_guesses( 0, 1 ); break; } |