diff options
author | Felippe de Meirelles Motta <lippe@FreeBSD.org> | 2008-06-21 00:01:57 +0000 |
---|---|---|
committer | Felippe de Meirelles Motta <lippe@FreeBSD.org> | 2008-06-21 00:01:57 +0000 |
commit | 702ce223052d37a75d90d1e9a7c23cce604c68e7 (patch) | |
tree | b6ffa11c790521083f26bb569a7e35357201671e /biology/ssaha | |
parent | 7956f3346255cf091e670acd7693abf2754a4775 (diff) | |
download | ports-702ce223052d37a75d90d1e9a7c23cce604c68e7.tar.gz ports-702ce223052d37a75d90d1e9a7c23cce604c68e7.zip |
Notes
Diffstat (limited to 'biology/ssaha')
35 files changed, 1361 insertions, 0 deletions
diff --git a/biology/ssaha/Makefile b/biology/ssaha/Makefile new file mode 100644 index 000000000000..8855ac6ec0fa --- /dev/null +++ b/biology/ssaha/Makefile @@ -0,0 +1,33 @@ +# New ports collection makefile for: ssaha +# Date created: 12.Jun.2008 +# Whom: Fernan Aguero <fernan@iib.unsam.edu.ar> +# +# $FreeBSD$ +# + +PORTNAME= ssaha +PORTVERSION= 3.1c +CATEGORIES= biology +MASTER_SITES= http://www.sanger.ac.uk/Software/analysis/${PORTNAME:U}/ +DISTNAME= ${PORTNAME}_v${PORTVERSION:S/.//} + +MAINTAINER= fernan@iib.unsam.edu.ar +COMMENT= Very fast matching and alignment of DNA sequences + +NO_WRKSUBDIR= yes + +USE_GMAKE= yes +MAKEFILE= makefile +BUILD_WRKSRC= ${WRKDIR}/Binary +ALL_TARGET= ssaha + +DATA_FILES= test.fasta test_extract.fasta test_filter.fail \ + test_filter.fastq test_protein.fasta README + +do-install: + ${INSTALL_PROGRAM} ${BUILD_WRKSRC}/ssaha ${PREFIX}/bin + @${MKDIR} ${DATADIR} + ${INSTALL_SCRIPT} ${BUILD_WRKSRC}/testSSAHA.csh ${DATADIR}/ + ${INSTALL_DATA} ${DATA_FILES:S,^,${WRKSRC}/,} ${DATADIR}/ + +.include <bsd.port.mk> diff --git a/biology/ssaha/distinfo b/biology/ssaha/distinfo new file mode 100644 index 000000000000..1d978ee099cd --- /dev/null +++ b/biology/ssaha/distinfo @@ -0,0 +1,3 @@ +MD5 (ssaha_v31c.tar.gz) = 0260a0cce67c5c465f5b54a45b3f65ae +SHA256 (ssaha_v31c.tar.gz) = 63fa38ccd2725db6ba10881f8cc94d899afd2eba9c2f6436223d9284c5abfced +SIZE (ssaha_v31c.tar.gz) = 251510 diff --git a/biology/ssaha/files/patch-GlobalDefinitions.cpp b/biology/ssaha/files/patch-GlobalDefinitions.cpp new file mode 100644 index 000000000000..dbf6ed8a038f --- /dev/null +++ b/biology/ssaha/files/patch-GlobalDefinitions.cpp @@ -0,0 +1,11 @@ +--- ./Global/GlobalDefinitions.cpp.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/GlobalDefinitions.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -119,7 +119,7 @@ + MakeIntoWord::MakeIntoWord( int bitsPerSymbol, const char* tt ): + bitsPerSymbol_( bitsPerSymbol ), tt_( tt ) + { +- for ( unsigned int i(0) ; i < (1<<bitsPerSymbol_) ; ++i ) ++ for ( unsigned int i(0) ; i < ( unsigned int )(1<<bitsPerSymbol_) ; ++i ) + { + map_.insert( make_pair( (static_cast<char>(tolower(tt[i]))),i ) ); + map_.insert( make_pair( (static_cast<char>(toupper(tt[i]))),i ) ); diff --git a/biology/ssaha/files/patch-GlobalDefinitions.h b/biology/ssaha/files/patch-GlobalDefinitions.h new file mode 100644 index 000000000000..fb7851574cc7 --- /dev/null +++ b/biology/ssaha/files/patch-GlobalDefinitions.h @@ -0,0 +1,82 @@ +--- ./Global/GlobalDefinitions.h.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/GlobalDefinitions.h 2008-06-12 15:39:31.000000000 -0300 +@@ -726,8 +726,6 @@ + template <typename T> class Allocator + { + public: +- typedef T MyType; +- + Allocator( T** ptr, const string& name, ostream& monStream=cerr ) : + ptr_(ptr), name_(name), size_(0), isAllocated_(false), + monStream_( monStream ) +@@ -788,32 +786,34 @@ + + virtual void allocate( unsigned long size ) + { +- size_=size; +- (*ptr_)=new T[size_]; +- isAllocated_=true; ++ Allocator<T>::size_=size; ++ (*Allocator<T>::ptr_)=new T[Allocator<T>::size_]; ++ Allocator<T>::isAllocated_=true; + } + + virtual void allocateAndZero( unsigned long size ) + { + const unsigned char zero(0); + allocate(size); +- memset( (void*)(*ptr_), zero, size_*sizeof(MyType) ); ++ memset( (void*)(*Allocator<T>::ptr_), zero, Allocator<T>::size_*sizeof(T) ); + } + virtual void load( unsigned long size ) + { + allocate(size); +- loadFromFile( name_, (char*)(*ptr_), size_*sizeof(MyType), monStream_ ); ++ loadFromFile( Allocator<T>::name_, (char*)(*Allocator<T>::ptr_), ++ Allocator<T>::size_*sizeof(T), Allocator<T>::monStream_ ); + } + virtual void save() + { +- saveToFile( name_, (char*)(*ptr_), size_*sizeof(MyType), monStream_ ); ++ saveToFile( Allocator<T>::name_, (char*)(*Allocator<T>::ptr_), ++ Allocator<T>::size_*sizeof(T), Allocator<T>::monStream_ ); + } + + virtual void deallocate() + { +- if (!isAllocated_) return; +- delete [] (*ptr_); +- isAllocated_=false; ++ if (!Allocator<T>::isAllocated_) return; ++ delete [] (*Allocator<T>::ptr_); ++ Allocator<T>::isAllocated_=false; + } + protected: + }; +@@ -892,7 +892,7 @@ + if (isAllocated_) return; + mode_ = MemoryMapper::createMap; + size_ = size; +- (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(MyType)); ++ (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(T)); + isAllocated_ = true; + } + +@@ -908,7 +908,7 @@ + if (isAllocated_) return; + mode_ = MemoryMapper::readMap; + size_ = size; +- (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(MyType)); ++ (*ptr_) = (T*) linkToMap(mode_,name_,size_*sizeof(T)); + isAllocated_ = true; + } + +@@ -921,7 +921,7 @@ + virtual void deallocate() + { + if (!isAllocated_) return; +- if(munmap((caddr_t)(*ptr_), size_*sizeof(MyType)) < 0) ++ if(munmap((caddr_t)(*ptr_), size_*sizeof(T)) < 0) + perror("unmap error"); // don't throw - called from destructor! + close(fileDesc_); + if (mode_.deleteFileOnExit) shm_unlink(name_.c_str()); diff --git a/biology/ssaha/files/patch-HashTable.cpp b/biology/ssaha/files/patch-HashTable.cpp new file mode 100644 index 000000000000..c57e3745c639 --- /dev/null +++ b/biology/ssaha/files/patch-HashTable.cpp @@ -0,0 +1,20 @@ +--- ./HashTable/HashTable.cpp.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./HashTable/HashTable.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -265,7 +265,7 @@ + void HashTable::countWords( SequenceAdapter& thisSeq ) + { + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + // only count words that have not been flagged + pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] +@@ -284,7 +284,7 @@ + // NB We stop at the last but one element of the + // sequence (as the last isn't a full word) + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + thisWord = thisSeq[j]; + // only hash words that have not been flagged diff --git a/biology/ssaha/files/patch-HashTableGeneric.cpp b/biology/ssaha/files/patch-HashTableGeneric.cpp new file mode 100644 index 000000000000..0d1ffb441303 --- /dev/null +++ b/biology/ssaha/files/patch-HashTableGeneric.cpp @@ -0,0 +1,48 @@ +--- ./HashTable/HashTableGeneric.cpp.orig 2005-06-21 05:48:27.000000000 -0300 ++++ ./HashTable/HashTableGeneric.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -63,18 +63,18 @@ + ( ostream& monitoringStream, + const string& name, + Allocator<PositionInHitList>& arrayAllocator ) : +- isInitialized_( false ), +- monitoringStream_( monitoringStream ), + name_( name ), +- bitsPerSymbol_( gBaseBits ), // default: may be overwritten in subclass ctor ++ isInitialized_( false ), + maxNumHits_( defaultMaxNumHits ), ++ bitsPerSymbol_( gBaseBits ), // default: may be overwritten in subclass ctor + hitListFormat_( gNotSpecified ), ++ monitoringStream_( monitoringStream ), + pArrayAllocator_ + ( arrayAllocator.clone(&pWordPositionInHitList_, + name+(string)".head", + monitoringStream_) ), +- pSequenceSizes_(NULL), +- pNameReader_(NULL) ++ pNameReader_(NULL), ++ pSequenceSizes_(NULL) + { + monitoringStream_ << "constructing HashTableGeneric\n"; + if (name_=="") +@@ -290,7 +290,7 @@ + WordSequence thisSeq; + + // NB sequences are numbered 1...n not 0...n-1 +- for ( unsigned int i(1); i <= numSeqs ; i++ ) ++ for ( int i(1); i <= numSeqs ; i++ ) + { + if( sequenceReader.getNextSequence( thisSeq, wordLength_) == -1 ) + { +@@ -973,10 +973,10 @@ + + SequenceAdapterWithOverlap::SequenceAdapterWithOverlap + ( int bitsPerSymbol, int wordLength, int stepLength ) : ++SequenceAdapter(), + bitsPerSymbol_( bitsPerSymbol ), + wordLength_( wordLength ), +-stepLength_( stepLength ), +-SequenceAdapter() ++stepLength_( stepLength ) + { + + maskLeft_ = new Word[ wordLength_ ]; diff --git a/biology/ssaha/files/patch-HashTablePacked.cpp b/biology/ssaha/files/patch-HashTablePacked.cpp new file mode 100644 index 000000000000..6efc6d24bd77 --- /dev/null +++ b/biology/ssaha/files/patch-HashTablePacked.cpp @@ -0,0 +1,29 @@ +--- ./HashTable/HashTablePacked.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTablePacked.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -132,7 +132,7 @@ + void HashTablePacked::countWords( SequenceAdapter& thisSeq ) + { + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + // only count words that have not been flagged + pWordPositionInHitList_[(thisSeq[j]&(~gCursedWord))] +@@ -151,7 +151,7 @@ + // NB We stop at the last but one element of the + // sequence (as the last isn't a full word) + +- for ( int j(0) ; j < thisSeq.size() ; ++ j ) ++ for ( WordSequence::size_type j(0) ; j < thisSeq.size() ; ++ j ) + { + thisWord = thisSeq[j]; + +@@ -200,7 +200,7 @@ + for ( WordSequence::const_iterator thisWord(seq.begin()); + thisWord != last ; ++thisWord ) + { +- int oldSize(packedHits.size()); // %%%%%% ++// int oldSize(packedHits.size()); // %%%%%% + matchWordDeluxe( *thisWord, packedHits, baseOffset ); + // cout << printResidue(*thisWord, wordLength_) << " " + // << packedHits.size()-oldSize; diff --git a/biology/ssaha/files/patch-HashTablePacked.h b/biology/ssaha/files/patch-HashTablePacked.h new file mode 100644 index 000000000000..c00860d1bbfd --- /dev/null +++ b/biology/ssaha/files/patch-HashTablePacked.h @@ -0,0 +1,16 @@ +--- ./HashTable/HashTablePacked.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTablePacked.h 2008-06-12 15:39:31.000000000 -0300 +@@ -236,11 +236,11 @@ + HashTableView<PositionPacked,HashTablePacked> + (monitoringStream, name, hitListAllocator, arrayAllocator), + wordNum_(0), ++ numRepeats_(0), ++ substituteThreshold_(0), + pMatchSequence_(&HashTablePacked::matchSequenceStandard), + pMatchWord_(&HashTablePacked::matchWordStandard), + pGenerateSubstitutes_(&generateSubstitutesDNA), +- numRepeats_(0), +- substituteThreshold_(0), + sorter_(4,(sizeof(PositionPacked)*8)/4) + { + hitListFormat_ = g32BitPacked; diff --git a/biology/ssaha/files/patch-HashTableTranslated.cpp b/biology/ssaha/files/patch-HashTableTranslated.cpp new file mode 100644 index 000000000000..e4ab9af82423 --- /dev/null +++ b/biology/ssaha/files/patch-HashTableTranslated.cpp @@ -0,0 +1,77 @@ +--- ./HashTable/HashTableTranslated.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/HashTableTranslated.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -50,9 +50,9 @@ + Allocator<PositionPacked>& hitListAllocator, + Allocator<PositionInHitList>& arrayAllocator + ): +- queryFrame_(0), + HashTablePacked( monitoringStream, name, +- hitListAllocator, arrayAllocator ) ++ hitListAllocator, arrayAllocator ), ++ queryFrame_(0) + { + hitListFormat_ = g32BitPackedProtein; + bitsPerSymbol_ = gResidueBits; +@@ -135,12 +135,12 @@ + Allocator<PositionPacked>& hitListAllocator, + Allocator<PositionInHitList>& arrayAllocator + ): ++ HashTablePacked( monitoringStream, name, ++ hitListAllocator, arrayAllocator ), ++ codonEncoder_(5), + queryFrame_(0), + queryMult_(1), +- codonEncoder_(5), +- pMatchSequence_( &HashTablePackedProtein::matchSequenceProtein ), +- HashTablePacked( monitoringStream, name, +- hitListAllocator, arrayAllocator ) ++ pMatchSequence_( &HashTablePackedProtein::matchSequenceProtein ) + { + hitListFormat_ = g32BitPackedProtein; + bitsPerSymbol_ = gResidueBits; +@@ -249,14 +249,14 @@ + Allocator<PositionPacked>& hitListAllocator, + Allocator<PositionInHitList>& arrayAllocator + ) : ++ HashTableGeneric( monitoringStream, name, arrayAllocator ), + hashFwd_( monitoringStream, name+(string)"_fwd", + hitListAllocator, arrayAllocator ), + hashRev_( monitoringStream, name+(string)"_rev", + hitListAllocator, arrayAllocator ), + pHash_(&hashFwd_), + codonEncoder_(5), +- pMatchSequence_( &HashTableTranslated::matchSequenceProtein ), +- HashTableGeneric( monitoringStream, name, arrayAllocator ) ++ pMatchSequence_( &HashTableTranslated::matchSequenceProtein ) + { + bitsPerSymbol_=gResidueBits; + hitListFormat_ = gTranslated; +@@ -344,8 +344,8 @@ + + // Change encoding mode + SequenceReaderModeFlagReplace mode('X'); +- assert(ttCodon['X']==ttProtein['X']); +- assert(ttCodon['X']!=nv); ++ assert(ttCodon[(int)'X']==ttProtein[(int)'X']); ++ assert(ttCodon[(int)'X']!=nv); + codonEncoder_.changeMode( &mode ); + + +@@ -418,7 +418,7 @@ + seq->link( translatedSeq ); + + // NB sequences are numbered 1...n not 0...n-1 +- for ( unsigned int i(1); i <= numSeqs ; i++ ) ++ for ( int i(1); i <= numSeqs ; i++ ) + { + // cout << "hashing sequence " << i << endl; + if( sequenceReader.getNextSequence( thisSeq, eDNAWordSizeForHashing) == -1 ) +@@ -520,7 +520,7 @@ + int HashTableTranslated::getHitTypeSize( void ) const + { + assert(1==0); +- return NULL; ++ return 0; + } + void HashTableTranslated::allocateHitList( unsigned long size ) + { diff --git a/biology/ssaha/files/patch-MatchAligner.cpp b/biology/ssaha/files/patch-MatchAligner.cpp new file mode 100644 index 000000000000..e7420b1ddec4 --- /dev/null +++ b/biology/ssaha/files/patch-MatchAligner.cpp @@ -0,0 +1,94 @@ +--- ./QueryManager/MatchAligner.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchAligner.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -53,13 +53,13 @@ + bool reverseQueryCoords, + bool doAlignment, + ostream& outputStream ): ++ pAlign_(pAlign), ++ outputStream_(outputStream), + querySource_(querySource), + subjectSource_(subjectSource), +- pAlign_(pAlign), + // numCols_(numCols), + reverseQueryCoords_(reverseQueryCoords), +- doAlignment_(doAlignment), +- outputStream_(outputStream) ++ doAlignment_(doAlignment) + { + } // ~MatchTaskAlign::MatchTaskAlign + +@@ -194,10 +194,10 @@ + int bandExtension, + ScoreTable* pTable, + ostream& outputStream ) : ++ outputStream_(outputStream), + numCols_(numCols), + bandExtension_(bandExtension), +- pTable_(pTable), +- outputStream_(outputStream) ++ pTable_(pTable) + { + pBufSeq1_= new char [numCols+1]; + pBufSeq2_= new char [numCols+1]; +@@ -563,17 +563,17 @@ + // cout << int ( ttDNA[ *(pChar++ ] << 4 + // | (ttDNA[ *(pChar++) ] << 2) + // | ttDNA[ *(pChar++) ] ) << endl; +- if ( (ttDNA[ *(pChar) ]==nv) +- || (ttDNA[ *(pChar+1) ]==nv) +- || (ttDNA[ *(pChar+2) ]==nv) ) ++ if ( (ttDNA[ (int) *(pChar) ]==nv) ++ || (ttDNA[ (int) *(pChar+1) ]==nv) ++ || (ttDNA[ (int) *(pChar+2) ]==nv) ) + { + *i='X'; + } // ~if + else + { +- *i= gResidueNames[ ttCodon[ ttDNA[ *(pChar) ] << 4 +- | ttDNA[ *(pChar+1) ] << 2 +- | ttDNA[ *(pChar+2) ] ] ]; ++ *i= gResidueNames[ ttCodon[ ttDNA[ (int) *(pChar) ] << 4 ++ | ttDNA[ (int) *(pChar+1) ] << 2 ++ | ttDNA[ (int) *(pChar+2) ] ] ]; + } // ~else + + } // ~for i +@@ -983,7 +983,7 @@ + + void print( PathMatrix<PathType>& p ) + { +- for (int i(0); i< p.front().size(); i++) ++ for (size_t i(0); i< p.front().size(); i++) + { + for (vector<vector<PathType> >::iterator j(p.begin()); + j!=p.end();++j) +@@ -1249,16 +1249,16 @@ + bandWidth_(p2Size-p1Size+1), + bandLength_(p1Size+1), + colSize_(p2Size-p1Size+1+(2*bandExtension_)), +- fillCell_(), + finalFrame1_(p1FinalFrame), + finalFrame2_(p2FinalFrame), + numFrames1_((p1Trans[1]==NULL)?1:gNumReadingFrames), + numFrames2_((p2Trans[1]==NULL)?1:gNumReadingFrames), ++ fillCell_(), ++ getScore_(scoreTable), + v1_(colSize_, veryBadScore3D ), + v2_(colSize_, veryBadScore3D ), + pLast_(&v1_), +- pCurrent_(&v2_), +- getScore_(scoreTable) ++ pCurrent_(&v2_) + { + + p1_[0] = p1Trans[0]; +@@ -1287,7 +1287,7 @@ + { + + // ScoreType lastScore, prevFrameScore1, prevFrameScore2; +- int i,j,k,l; ++ int i,j; + + matrix.resize(bandLength_, vector<PathType3D>(colSize_) ); + diff --git a/biology/ssaha/files/patch-MatchAligner.h b/biology/ssaha/files/patch-MatchAligner.h new file mode 100644 index 000000000000..1eafad595c91 --- /dev/null +++ b/biology/ssaha/files/patch-MatchAligner.h @@ -0,0 +1,33 @@ +--- ./QueryManager/MatchAligner.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchAligner.h 2008-06-12 15:39:31.000000000 -0300 +@@ -315,13 +315,13 @@ + + static char getCodon( const char* pChar ) + { +- return ( ( (ttDNA[ *pChar ] ==nv) +- || (ttDNA[ *(pChar+1) ] ==nv) +- || (ttDNA[ *(pChar+2) ] ==nv) ) ++ return ( ( (ttDNA[ (int) *pChar ] ==nv) ++ || (ttDNA[ (int) *(pChar+1) ] ==nv) ++ || (ttDNA[ (int) *(pChar+2) ] ==nv) ) + ? 'X' +- : gResidueNames[ ttCodon[ ttDNA[ *(pChar) ] << 4 +- | ttDNA[ *(pChar+1) ] << 2 +- | ttDNA[ *(pChar+2) ] ] ] ); ++ : gResidueNames[ ttCodon[ ttDNA[ (int) *(pChar) ] << 4 ++ | ttDNA[ (int) *(pChar+1) ] << 2 ++ | ttDNA[ (int) *(pChar+2) ] ] ] ); + } // ~getCodon + + +@@ -519,8 +519,8 @@ + : public vector<vector<PATH_TYPE> > + { + public: +- typedef pair<vector<vector<PATH_TYPE> >::iterator, +- vector<PATH_TYPE>::iterator> CellIterator; ++ typedef pair<typename vector<vector<PATH_TYPE> >::iterator, ++ typename vector<PATH_TYPE>::iterator> CellIterator; + + template<class MATRIX_FILLER> ScoreType fillIn( MATRIX_FILLER& doMatrix ) + { diff --git a/biology/ssaha/files/patch-MatchStore.h b/biology/ssaha/files/patch-MatchStore.h new file mode 100644 index 000000000000..076639489869 --- /dev/null +++ b/biology/ssaha/files/patch-MatchStore.h @@ -0,0 +1,54 @@ +--- ./QueryManager/MatchStore.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStore.h 2008-06-12 15:39:31.000000000 -0300 +@@ -93,20 +93,20 @@ + virtual SequenceNumber getSubjectNum( void ) const + { return subjectNum_; } + // virtual inline string getSubjectName( void ) const; +- virtual inline const char* getSubjectName( void ) const; ++ virtual const char* getSubjectName( void ) const; + virtual SequenceOffset getSubjectStart( void ) const + { return subjectStart_; } + virtual SequenceOffset getSubjectEnd( void ) const + { return subjectEnd_; } + +- virtual inline SequenceNumber getQueryNum( void ) const; +- virtual inline string getQueryName( void ) const; ++ virtual SequenceNumber getQueryNum( void ) const; ++ virtual string getQueryName( void ) const; + virtual SequenceOffset getQueryStart( void ) const + { return queryStart_; } + virtual SequenceOffset getQueryEnd( void ) const + { return queryEnd_; } + +- virtual inline int getQuerySize( void ) const; ++ virtual int getQuerySize( void ) const; + virtual int getNumBases(void ) const + { return numBases_; } + virtual bool isQueryForward( void ) const +@@ -127,7 +127,6 @@ + SequenceOffset subjectEnd, + bool isQueryForward, + bool isSubjectForward ): +- myStore_( myStore ), + subjectNum_( subjectNum ), + numBases_( numBases ), + queryStart_( queryStart ), +@@ -135,7 +134,8 @@ + subjectStart_( subjectStart ), + subjectEnd_( subjectEnd ), + isQueryForward_( isQueryForward ), +- isSubjectForward_( isSubjectForward ){} ++ isSubjectForward_( isSubjectForward ), ++ myStore_( myStore ) {} + + SequenceNumber subjectNum_; + SequenceOffset numBases_; +@@ -444,7 +444,7 @@ + ( + unsigned int maxToSort = 1<<30, + double partialThreshold = 0.0 +- ) : sorter_(), maxToSort_(maxToSort), partialThreshold_(partialThreshold) {} ++ ) : maxToSort_(maxToSort), partialThreshold_(partialThreshold), sorter_() {} + // TBD should be virtual??? + void operator()(MatchStore& store ) + { diff --git a/biology/ssaha/files/patch-MatchStoreGapped.h b/biology/ssaha/files/patch-MatchStoreGapped.h new file mode 100644 index 000000000000..7252f06a6ad8 --- /dev/null +++ b/biology/ssaha/files/patch-MatchStoreGapped.h @@ -0,0 +1,25 @@ +--- ./QueryManager/MatchStoreGapped.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStoreGapped.h 2008-06-12 15:39:31.000000000 -0300 +@@ -57,7 +57,8 @@ + { + public: + MatchAlgorithm( int numRepeats ) : +- numRepeats_( numRepeats ), sortNeeded_(true) {} ++ sortNeeded_(true), numRepeats_( numRepeats ) {} ++ virtual ~MatchAlgorithm() {}; + void operator() + ( WordSequence& querySeq, + MatchAdder& addMatch, +@@ -82,10 +83,10 @@ + public: + MatchAlgorithmGapped + ( int maxGap, int maxInsert, int minToProcess, int numRepeats ): ++ MatchAlgorithm( numRepeats ), + maxGap_( maxGap ), + maxInsert_( maxInsert ), +- minToProcess_( minToProcess ), +- MatchAlgorithm( numRepeats ) ++ minToProcess_( minToProcess ) + {} + + virtual void generateMatches diff --git a/biology/ssaha/files/patch-MatchStoreUngapped.h b/biology/ssaha/files/patch-MatchStoreUngapped.h new file mode 100644 index 000000000000..f71815fa673c --- /dev/null +++ b/biology/ssaha/files/patch-MatchStoreUngapped.h @@ -0,0 +1,13 @@ +--- ./QueryManager/MatchStoreUngapped.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/MatchStoreUngapped.h 2008-06-12 15:39:31.000000000 -0300 +@@ -60,8 +60,8 @@ + public: + MatchAlgorithmUngapped + ( int minToProcess, int numRepeats ): +- minToProcess_( minToProcess ), +- MatchAlgorithm( numRepeats ) ++ MatchAlgorithm( numRepeats ), ++ minToProcess_( minToProcess ) + {} + + virtual void generateMatches diff --git a/biology/ssaha/files/patch-QueryManager.cpp b/biology/ssaha/files/patch-QueryManager.cpp new file mode 100644 index 000000000000..bb80ea676c1d --- /dev/null +++ b/biology/ssaha/files/patch-QueryManager.cpp @@ -0,0 +1,54 @@ +--- ./QueryManager/QueryManager.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/QueryManager.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -164,8 +164,8 @@ + // MatchPolicy member function definitions + + MatchPolicy::MatchPolicy( HashTableGeneric& subjectTable ) : +- subjectTable_( subjectTable ), +- queryWordLength_( subjectTable.getWordLength() ) ++ queryWordLength_( subjectTable.getWordLength() ), ++ subjectTable_( subjectTable ) + {} + + +@@ -203,8 +203,8 @@ + + MatchPolicyProteinProtein::MatchPolicyProteinProtein + ( HashTablePackedProtein& subjectTable ) : +-subjectTable_( subjectTable ), +-MatchPolicy( subjectTable ) ++MatchPolicy( subjectTable ), ++subjectTable_( subjectTable ) + { + + subjectTable_.setQueryProtein(); +@@ -386,9 +386,9 @@ + QueryManager::QueryManager + ( SequenceReader& querySeqs, + HashTableGeneric& subjectSeqs, ostream& monitoringStream ) : +- queryReader_( querySeqs ), + subjectTable_( subjectSeqs ), +- monitoringStream_( monitoringStream ) ++ monitoringStream_( monitoringStream ), ++ queryReader_( querySeqs ) + { + monitoringStream_ << "constructing QueryManager\n"; + +@@ -518,7 +518,7 @@ + + task( store ); + +- if ( queryReader_.getLastSequenceNumber() == queryEnd ) break; ++ if ( (int)queryReader_.getLastSequenceNumber() == queryEnd ) break; + + // clear the query sequence ready to read in next query + querySeqFwd.clear(); +@@ -530,7 +530,7 @@ + } // ~while + while ( numBasesInLast != -1 ); + +- if ( ( queryReader_.getLastSequenceNumber() < queryEnd ) ++ if ( ( (int)queryReader_.getLastSequenceNumber() < queryEnd ) + && ( queryEnd != - 1 ) ) + { + monitoringStream_ << "Info: requested final sequence (" << queryEnd diff --git a/biology/ssaha/files/patch-QueryManager.h b/biology/ssaha/files/patch-QueryManager.h new file mode 100644 index 000000000000..fef80e7b3865 --- /dev/null +++ b/biology/ssaha/files/patch-QueryManager.h @@ -0,0 +1,19 @@ +--- ./QueryManager/QueryManager.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/QueryManager.h 2008-06-12 15:39:31.000000000 -0300 +@@ -145,6 +145,7 @@ + // readFrame_(0), + pStore_(NULL), + subjectTable_( subjectTable ) {} ++ virtual ~MatchAdder() {}; + virtual void operator()( SequenceNumber subjectNum, + SequenceOffset numBases, + SequenceOffset queryStart, +@@ -172,7 +173,7 @@ + { + public: + MatchAdderImp( HashTableGeneric& subjectTable ) : +- lastSubjectNum_(0), name_(), MatchAdder( subjectTable ) {} ++ MatchAdder( subjectTable ), name_(), lastSubjectNum_(0) {} + virtual void operator()( SequenceNumber subjectNum, + SequenceOffset numBases, + SequenceOffset queryStart, diff --git a/biology/ssaha/files/patch-README b/biology/ssaha/files/patch-README new file mode 100644 index 000000000000..e1a8461090b7 --- /dev/null +++ b/biology/ssaha/files/patch-README @@ -0,0 +1,13 @@ +--- ./Binary/README.orig 2004-08-25 11:35:58.000000000 -0300 ++++ ./Binary/README 2008-06-12 15:39:31.000000000 -0300 +@@ -34,6 +34,10 @@ + + Patched for linux/g++3.2.2 25/8/4 AWS. + ++This version was patched for Linux/GCC 4.0.2 and Mac OS X/GCC 4.0.1 by ++Conrad Halling on 26 January 2006. See http://www.bifx.org/SSAHA/index.html ++for more information. ++ + 1. To compile the main ssaha executable + + make ssaha diff --git a/biology/ssaha/files/patch-SSAHAMain.cpp b/biology/ssaha/files/patch-SSAHAMain.cpp new file mode 100644 index 000000000000..c65c50796f9f --- /dev/null +++ b/biology/ssaha/files/patch-SSAHAMain.cpp @@ -0,0 +1,48 @@ +--- ./Global/SSAHAMain.cpp.orig 2004-03-01 14:12:38.000000000 -0300 ++++ ./Global/SSAHAMain.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -86,7 +86,8 @@ + Copyright (C) 2004 by Genome Research Limited\n\ + This software is released under the terms of version 2 of the GNU General\n\ + Public Licence, as published by the Free Software Foundation.\n\ +-This is SSAHA Version 3.2, released 1st March 2004.\n\n"; ++This is SSAHA Version 3.2, released 1st March 2004,\n\ ++patched 26 January 2006.\n\n"; + + Timer timeStamp; + +@@ -623,7 +624,7 @@ + { + pHashTable = new HashTablePacked( cerr, queryParams.saveName ); + if ( ( queryParams.wordLength <= 0 ) +- || ( queryParams.wordLength*gBaseBits > ( 8*sizeof(Word)) -1 ) ) ++ || ( (size_t) queryParams.wordLength*gBaseBits > ( 8*sizeof(Word)) -1 ) ) + { + cerr << "Warning: word length (" << queryParams.wordLength + << ") outside valid range (0 to " +@@ -656,7 +657,7 @@ + } // ~if + + if ( ( queryParams.wordLength <= 0 ) +- || ( queryParams.wordLength*gResidueBits > ( 8*sizeof(Word)) -1 ) ) ++ || ( (size_t) queryParams.wordLength*gResidueBits > ( 8*sizeof(Word)) -1 ) ) + { + cerr << "Warning: word length (" << queryParams.wordLength + << ") outside valid range (0 to " +@@ -782,7 +783,7 @@ + cerr << "Info: would expect " << expectedNumHits + << " hits per word for a random database of this size." << endl; + +- queryParams.maxStore=1+(int)(expectedNumHits*queryParams.maxStore); ++ queryParams.maxStore=(int)(expectedNumHits*queryParams.maxStore); + + cerr << "Info: will ignore hits on words that occur more than " + << queryParams.maxStore << " times in the database." << endl; +@@ -1151,7 +1152,7 @@ + } // ~if + dirent* dirEntry; + string entryName; +- while( dirEntry = readdir(pDir) ) ++ while( 0 != ( dirEntry = readdir(pDir) ) ) + { + entryName = (string) dirEntry->d_name; + if ((entryName == ".")||(entryName=="..")) continue; diff --git a/biology/ssaha/files/patch-SSAHAMain.h b/biology/ssaha/files/patch-SSAHAMain.h new file mode 100644 index 000000000000..ee646b476fc1 --- /dev/null +++ b/biology/ssaha/files/patch-SSAHAMain.h @@ -0,0 +1,19 @@ +--- ./Global/SSAHAMain.h.orig 2004-03-01 13:51:28.000000000 -0300 ++++ ./Global/SSAHAMain.h 2008-06-12 15:39:31.000000000 -0300 +@@ -131,7 +131,7 @@ + -1, // int queryEnd; + -1, // int wordLength; + -1, // int stepLength; +- 100000, // int maxToStore; ++ 10000, // int maxToStore; + 1, // int minToPrint; + -1, // int maxGap; + 0, // int maxInsert; +@@ -174,6 +174,7 @@ + + CommandLineArg( const string& nameLong, const string& nameShort ) : + nameLong_( nameLong ), nameShort_( nameShort ) {} ++ virtual ~CommandLineArg() {}; // base class with virtual functions requires virtual constructor + // Is the current argument equal to 'my' argument name? + virtual bool isThisMe( const string& argName ) + { diff --git a/biology/ssaha/files/patch-SequenceEncoder.cpp b/biology/ssaha/files/patch-SequenceEncoder.cpp new file mode 100644 index 000000000000..5832a96a40bb --- /dev/null +++ b/biology/ssaha/files/patch-SequenceEncoder.cpp @@ -0,0 +1,75 @@ +--- ./SequenceReader/SequenceEncoder.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceEncoder.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -54,10 +54,10 @@ + ostream& monitoringStream): + monitoringStream_( monitoringStream ), + tt_(tt), sourceData_( sourceData ), bitsPerSymbol_(bitsPerSymbol), ++ symbolMask_((1<<bitsPerSymbol)-1), + pState_( new SequenceReaderModeIgnore( monitoringStream ) ), + wordFlag_(0), +- doubleBitShift_(bitsPerSymbol<<1), +- symbolMask_((1<<bitsPerSymbol)-1) ++ doubleBitShift_(bitsPerSymbol<<1) + // numSymbolPairs_(wordLength>>1), + // oddNumSymbols_(wordLength%1==1), + { +@@ -71,13 +71,13 @@ + ett_( rhs.ett_ ), + sourceData_( rhs.sourceData_ ), + bitsPerSymbol_( rhs.bitsPerSymbol_ ), ++ symbolMask_(rhs.symbolMask_), + wordLength_( rhs.wordLength_ ), + // pSeq_( rhs.pSeq_ ),// don't want 2 encoders linking to same seq + pState_( rhs.pState_->clone() ), + wordFlag_(0), + // numSymbolPairs_(rhs.numSymbolPairs_), + // oddNumSymbols_(rhs.oddNumSymbols_), +- symbolMask_(rhs.symbolMask_), + doubleBitShift_(rhs.doubleBitShift_) + { + monitoringStream_ << "copy constructing SequenceEncoder" << endl; +@@ -218,7 +218,7 @@ + { + pTemp = (uchar*)p; + // cout << basesInLast << " doing odd char at end " << *pTemp << endl; +- encodeChar( *p, thisWord, wordFlag_, basesInLast ); ++ encodeChar( *pTemp, thisWord, wordFlag_, basesInLast ); + } + + pSeq_->setNumBasesInLast(basesInLast); +@@ -453,7 +453,7 @@ + + for( ; i!=lastWord ; ++i ) + { +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | (((*i) >> (4*gCodonBits + 2*gBaseBits))&maskBase )); +@@ -488,7 +488,7 @@ + + for( ; i!=lastWord ; ++i ) + { +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | (((*i) >> (4*gCodonBits + gBaseBits))&mask2Bases )); +@@ -610,7 +610,7 @@ + do + { + i--; +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | ( ((*i) & mask2Bases ) << gBaseBits ) ); +@@ -650,7 +650,7 @@ + do + { + i--; +- if (toCarry!=~0) ++ if (toCarry!=(Word)~0) + (*pCodon++) = ( ((*i)&gCursedWord)|lastWordFlag ) + ? flaggedChar + : ( toCarry | ( ((*i) & maskBase ) << (2*gBaseBits) ) ); diff --git a/biology/ssaha/files/patch-SequenceReader.cpp b/biology/ssaha/files/patch-SequenceReader.cpp new file mode 100644 index 000000000000..356c6e8363d9 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReader.cpp @@ -0,0 +1,20 @@ +--- ./SequenceReader/SequenceReader.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReader.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -179,7 +179,7 @@ + + for ( ; i != reverseBuffer_.end() ; i++, j-- ) + { +- *i= reverseChar[ *j ]; ++ *i= reverseChar[ (int) *j ]; + // cout << "char: " << (int)*j << *j << " - " << (int)*i << *i << endl; + } + +@@ -356,7 +356,7 @@ + throw SSAHAException + ("Requested seq start exceeds requested seq end in SourceReaderIndex::extractSource"); + } // ~if +- else if (seqEnd>lastSourceSeq_.size() ) ++ else if (seqEnd>(int)lastSourceSeq_.size() ) + { + cout << seqEnd << " " << lastSourceSeq_.size() << endl; + throw SSAHAException diff --git a/biology/ssaha/files/patch-SequenceReader.h b/biology/ssaha/files/patch-SequenceReader.h new file mode 100644 index 000000000000..c230fa08cbd8 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReader.h @@ -0,0 +1,11 @@ +--- ./SequenceReader/SequenceReader.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReader.h 2008-06-12 15:39:31.000000000 -0300 +@@ -264,6 +264,8 @@ + pReader_( rhs.pReader_ ), + seqNum_( rhs.seqNum_ ) {} + ++ // A virtual destructor is required because this is a base class. ++ virtual ~SequenceReaderPrinter() {}; + + SequenceReaderPrinter& operator()( SequenceNumber inSeqNum ) + { diff --git a/biology/ssaha/files/patch-SequenceReaderFasta.cpp b/biology/ssaha/files/patch-SequenceReaderFasta.cpp new file mode 100644 index 000000000000..eaf3a2a5477f --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFasta.cpp @@ -0,0 +1,32 @@ +--- ./SequenceReader/SequenceReaderFasta.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFasta.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -135,9 +135,9 @@ + seqStopChar_( rhs.seqStopChar_ ), + pInputFileStream_( new ifstream( rhs.fileName_.c_str() ) ), + fileName_( rhs.fileName_.c_str() ), +- seqPositions_( rhs.seqPositions_ ), + // lastSourceSeqNum_(0), +- pEncoder_( rhs.pEncoder_->clone() ) ++ pEncoder_( rhs.pEncoder_->clone() ), ++ seqPositions_( rhs.seqPositions_ ) + { + monitoringStream_ << "copy constructing SequenceReaderFile" << this + << endl; +@@ -603,7 +603,7 @@ + throw SSAHAException + ("Requested seq start exceeds requested seq end in SequenceReaderFile::extractSource"); + } // ~if +- else if (seqEnd>lastSourceSeq_.size() ) ++ else if (seqEnd>(SequenceOffset)lastSourceSeq_.size() ) + { + throw SSAHAException + ("Requested last byte exceeds end of seq in SequenceReaderFile::extractSource"); +@@ -628,7 +628,7 @@ + computeNumSequencesInFile(); // ensure have scanned to end of file + fileFile << fileName_ << endl; + SeqIndexInfo* pIndex = new SeqIndexInfo[seqPositions_.size()]; +- for (int i(0) ; i < seqPositions_.size() ; i++) ++ for (unsigned int i(0) ; i < seqPositions_.size() ; i++) + { + pIndex[i].fileNum=fileNumber; + pIndex[i].seqPos=seqPositions_[i]; diff --git a/biology/ssaha/files/patch-SequenceReaderFasta.h b/biology/ssaha/files/patch-SequenceReaderFasta.h new file mode 100644 index 000000000000..a2988fbb91d8 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFasta.h @@ -0,0 +1,11 @@ +--- ./SequenceReader/SequenceReaderFasta.h.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFasta.h 2008-06-12 15:39:31.000000000 -0300 +@@ -55,7 +55,7 @@ + { + public: + SequenceReaderFileState( SequenceNumber lsn, std::streampos fp ) : +- filePos_(fp), SequenceReaderState(lsn) {} ++ SequenceReaderState(lsn), filePos_(fp) {} + // no point in making this private as it's const + const std::streampos filePos_; + }; diff --git a/biology/ssaha/files/patch-SequenceReaderFilter.h b/biology/ssaha/files/patch-SequenceReaderFilter.h new file mode 100644 index 000000000000..38c65183fb7a --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderFilter.h @@ -0,0 +1,20 @@ +--- ./SequenceReader/SequenceReaderFilter.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderFilter.h 2008-06-12 15:39:31.000000000 -0300 +@@ -80,7 +80,7 @@ + + size_t max( void ) const + { +- int max(0); ++ size_t max(0); + for (vector<vector<string*> >::const_iterator i(bins_.begin()); i!= bins_.end(); i++ ) + if (i->size()>max) max=i->size(); + return max; +@@ -109,7 +109,7 @@ + public: + SequenceReaderFilterState + ( SequenceNumber lsn, SequenceReader* ps ) : +- pState_(ps->saveState()), SequenceReaderState(lsn) {} ++ SequenceReaderState(lsn), pState_(ps->saveState()) {} + virtual ~SequenceReaderFilterState() {} //delete pState_; + // no point in making this private as it's const + // this is state info for *ps, whatever it is diff --git a/biology/ssaha/files/patch-SequenceReaderLocal.cpp b/biology/ssaha/files/patch-SequenceReaderLocal.cpp new file mode 100644 index 000000000000..f8629b6ffafe --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderLocal.cpp @@ -0,0 +1,28 @@ +--- ./SequenceReader/SequenceReaderLocal.cpp.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderLocal.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -46,10 +46,10 @@ + // and seqNames_ + SequenceReaderLocal::SequenceReaderLocal + ( SequenceReader& seqFile, int wordLength, ostream& monitoringStream ) : +-sourceData_( seqFile.getSourceDataType() ), ++SequenceReader( monitoringStream ), + wordLength_( wordLength ), + bitsPerSymbol_( seqFile.getBitsPerSymbol() ), +-SequenceReader( monitoringStream ) ++sourceData_( seqFile.getSourceDataType() ) + { + monitoringStream_ + << "constructing SequenceReaderLocal from SequenceReader" << endl; +@@ -72,10 +72,10 @@ + + SequenceReaderLocal::SequenceReaderLocal + ( int wordLength, int bitsPerSymbol, ostream& monitoringStream ) : +-sourceData_( gUnknownData ), ++SequenceReader( monitoringStream ), + wordLength_( wordLength ), + bitsPerSymbol_( bitsPerSymbol ), +-SequenceReader( monitoringStream ) ++sourceData_( gUnknownData ) + { + monitoringStream_ + << "constructing empty SequenceReaderLocal" << endl; diff --git a/biology/ssaha/files/patch-SequenceReaderMulti.cpp b/biology/ssaha/files/patch-SequenceReaderMulti.cpp new file mode 100644 index 000000000000..1e160249d180 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderMulti.cpp @@ -0,0 +1,30 @@ +--- ./SequenceReader/SequenceReaderMulti.cpp.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderMulti.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -166,7 +166,7 @@ + { + if (thisReader_->allSeqsRead_) + { +- if ( currentSeqNum_ <= thisReader_->size_ ) ++ if ( currentSeqNum_ <= (SequenceNumber)thisReader_->size_ ) + { + // assert( thisReader_->ptr_->findSequence( currentSeqNum_ ) == true ); + lastSequenceNumber_ = --seqNum; // last read = 1 behind current +@@ -240,7 +240,7 @@ + for ( vector<SeqReaderInfo>::iterator i = allReaders_.begin(); + i != allReaders_.end(); i++ ) + { +- if ( seqNum <= i->size_ ) { thisReader_ = i; break; } // %%%%% ++ if ( seqNum <= (SequenceNumber)i->size_ ) { thisReader_ = i; break; } // %%%%% + seqNum -= i->size_; + } // ~for i + +@@ -276,7 +276,8 @@ + { + DEBUG_L2( "SequenceReaderMulti::getNextSequence" ); + +- int numInLast; ++ int numInLast = -1; // Initialized to avoid -Wall possibly used before initialized warning. ++ // The algorithm should be checked carefully. + + while + ( ( thisReader_ diff --git a/biology/ssaha/files/patch-SequenceReaderMulti.h b/biology/ssaha/files/patch-SequenceReaderMulti.h new file mode 100644 index 000000000000..72f079f97add --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderMulti.h @@ -0,0 +1,13 @@ +--- ./SequenceReader/SequenceReaderMulti.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderMulti.h 2008-06-12 15:39:31.000000000 -0300 +@@ -81,8 +81,9 @@ + ( SequenceNumber lsn, + vector<SeqReaderInfo>::iterator tr, + SequenceReaderState* ps ) : ++ SequenceReaderState(lsn), + thisReader_(tr), +- pState_(ps), SequenceReaderState(lsn) {} ++ pState_(ps) {} + virtual ~SequenceReaderMultiState() {} //delete pState_; + // no point in making this private as it's const + const vector<SeqReaderInfo>::iterator thisReader_; diff --git a/biology/ssaha/files/patch-SequenceReaderString.h b/biology/ssaha/files/patch-SequenceReaderString.h new file mode 100644 index 000000000000..a35c0b7cfc01 --- /dev/null +++ b/biology/ssaha/files/patch-SequenceReaderString.h @@ -0,0 +1,26 @@ +--- ./SequenceReader/SequenceReaderString.h.orig 2004-03-01 13:51:30.000000000 -0300 ++++ ./SequenceReader/SequenceReaderString.h 2008-06-12 15:39:31.000000000 -0300 +@@ -65,9 +65,9 @@ + // NB SequenceReaderStringBase takes ownership of *pEncoder + SequenceEncoder* pEncoder, + ostream& monitoringStream = cerr ) : ++ SequenceReader( monitoringStream ), + sequenceString_( sequenceString ), +- pEncoder_( pEncoder ), +- SequenceReader( monitoringStream ) ++ pEncoder_( pEncoder ) + { + monitoringStream_ << "constructing SequenceReaderStringBase" << endl; + } // constructor +@@ -78,9 +78,9 @@ + // TYPE NAME IN/OUT COMMENT + // Returns: TYPE COMMENT + SequenceReaderStringBase( const SequenceReaderStringBase& rhs ) : ++SequenceReader( rhs.monitoringStream_ ), + sequenceString_( rhs.sequenceString_ ), +-pEncoder_( rhs.pEncoder_->clone() ), +-SequenceReader( rhs.monitoringStream_ ) ++pEncoder_( rhs.pEncoder_->clone() ) + { + monitoringStream_ << "copy constructing SequenceReaderStringBase" << endl; + } // copy constructor diff --git a/biology/ssaha/files/patch-makefile b/biology/ssaha/files/patch-makefile new file mode 100644 index 000000000000..0154077a92ce --- /dev/null +++ b/biology/ssaha/files/patch-makefile @@ -0,0 +1,15 @@ +--- ./Binary/makefile.orig 2008-06-12 16:00:19.000000000 -0300 ++++ ./Binary/makefile 2008-06-12 16:00:43.000000000 -0300 +@@ -11,10 +11,10 @@ + # copy of the SSAHA directory structure you should be able to make files + # straight away. + # +-# SSAHA_TOP_DIR=$(SSAHA_DIR?$(SSAHA_DIR):$(PWD)/../) ++SSAHA_TOP_DIR=$(PWD)/../ + # Above is a nice idea, but syntax doesn't work for all versions of make + # so must define CURRENT_SSAHA_VERSION, eg in your .cshrc file - TC 14.3.01 +-SSAHA_TOP_DIR=$(CURRENT_SSAHA_VERSION) ++# SSAHA_TOP_DIR=$(CURRENT_SSAHA_VERSION) + + GLOBAL_DIR=${SSAHA_TOP_DIR}/Global + SEQ_DIR=${SSAHA_TOP_DIR}/SequenceReader diff --git a/biology/ssaha/files/patch-testHashTableNoOverlap.cpp b/biology/ssaha/files/patch-testHashTableNoOverlap.cpp new file mode 100644 index 000000000000..721e03ae9134 --- /dev/null +++ b/biology/ssaha/files/patch-testHashTableNoOverlap.cpp @@ -0,0 +1,178 @@ +--- ./HashTable/testHashTableNoOverlap.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./HashTable/testHashTableNoOverlap.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -70,10 +70,10 @@ + cout << " Test of class HashTable" << endl << endl; + cout << "*******************************************" << endl << endl; + +- int numSeqs = 10; +- int seqSize = 100; +- int wordLength = 10; +- int maxHits = 50; ++ unsigned int numSeqs = 10; ++ unsigned int seqSize = 100; ++ unsigned int wordLength = 10; ++ unsigned int maxHits = 50; + + // Generate a random sequence of (numSeqs*seqSize) base pairs ... + // 1128 is the seed value for the random number generator +@@ -116,7 +116,7 @@ + // so we are checking that all sequence data 'finds itself' in the hash + // table in the correct position + +- for ( int i(1) ; i <= numSeqs ; i++ ) ++ for ( unsigned int i(1) ; i <= numSeqs ; i++ ) + { // for each sequence in testReader ... + testReader.getNextSequence(seq,wordLength); + +@@ -131,7 +131,7 @@ + cout << seq.getNumBasesInLast() << "!!\n"; + // ... go through the Words in the sequence one by one and look + // for matches in the hash table +- for ( int j(0) ; j < (seqSize/wordLength) ; j++ ) ++ for ( unsigned int j(0) ; j < (seqSize/wordLength) ; j++ ) + { + cout << j << " " << printBase(seq[j],wordLength) << endl; + +@@ -146,7 +146,7 @@ + // ... and that the sequence number and offset for the hit are OK + + assert(hits[0].subjectNum == i ); +- assert(hits[0].diff == j*wordLength ); ++ assert((unsigned int)hits[0].diff == j*wordLength ); + assert(hits[0].queryPos == 1); + + hits.clear(); +@@ -157,12 +157,12 @@ + testHash.matchWord(seq,hits); + cout << hits.size() << "!!!!!\n"; + assert( hits.size() == (seqSize/wordLength) ); +- for ( int j(0) ; j < hits.size() ; j ++ ) ++ for ( unsigned int j(0) ; j < hits.size() ; j ++ ) + { + + assert(hits[j].subjectNum == i); + assert(hits[j].diff == 0); +- assert(hits[j].queryPos == 1 + ( j * wordLength ) ); ++ assert((unsigned int)hits[j].queryPos == 1 + ( j * wordLength ) ); + + } // ~for j + +@@ -194,7 +194,7 @@ + assert( testLoad.isInitialized() == true ); + + // and that word length matches original +- assert( testLoad.getWordLength() == wordLength ); ++ assert( (unsigned int)testLoad.getWordLength() == wordLength ); + + testLoad.setMaxNumHits( testHash.getMaxNumHits() ); + +@@ -208,7 +208,7 @@ + // and new hash tables, storing the resulting hits in hitsOrig and + // hitsCopy respectively. + +- for ( int i(0) ; i < numSeqs ; i++ ) ++ for ( unsigned int i(0) ; i < numSeqs ; i++ ) + { + + // ... check that the name strings match +@@ -258,7 +258,7 @@ + HashTable shiftHash(cout); + creator.createHashTable(shiftHash,shiftReader,wordLength,maxHits); + +- for ( int i(0) ; i < wordLength ; i++ ) ++ for ( unsigned int i(0) ; i < wordLength ; i++ ) + { + testSeq = testSeq.substr(1); // delete first character + { // braces ensure a new instance is created each time round loop +@@ -460,7 +460,7 @@ + SequenceEncoderCodon encoder; + encoder.setWordLength(wordLength); + +- for ( int i(1) ; i < hashTrans.getNumSequences() ; i++ ) ++ for ( unsigned int i(1) ; i < hashTrans.getNumSequences() ; i++ ) + { + + hashTrans.getSequenceName( s1, i ); +@@ -477,7 +477,7 @@ + assert(hashTrans.getSequenceSize(i)==hashTrans2.getSequenceSize(i)); + + assert( (((w1.size()-1) * gMaxBasesPerWord ) + w1.getNumBasesInLast()) +- == hashTrans.getSequenceSize(i)); ++ == (unsigned int)hashTrans.getSequenceSize(i)); + + + // Sequence should produce same (nonzero) num hits in fwd direction +@@ -573,19 +573,19 @@ + string s, s1; + Word w; + vector<Word> subs; +- int wl=15; ++ unsigned int wl=15; + + // test substitution for DNA + // + +- for (int i(0); i<wl; i++) s+="A"; ++ for ( unsigned int i(0); i<wl; i++) s+="A"; + + w = makeBase(s); + + generateSubstitutesDNA( w, subs, wl ); + assert(subs.size()==wl); + +- for (int i(0); i<wl ; i++) ++ for (unsigned int i(0); i<wl ; i++) + { + cout << printWord(subs[i],wl) << endl; + s1=s; +@@ -596,14 +596,14 @@ + // + s=""; subs.clear(); + +- for (int i(0); i<wl; i++) s+="C"; ++ for (unsigned int i(0); i<wl; i++) s+="C"; + + w = makeBase(s); + + generateSubstitutesDNA( w, subs, wl ); + assert(subs.size()==wl); + +- for (int i(0); i<wl ; i++) ++ for (unsigned int i(0); i<wl ; i++) + { + cout << printWord(subs[i],wl) << endl; + s1=s; +@@ -614,14 +614,14 @@ + // + s=""; subs.clear(); + +- for (int i(0); i<wl; i++) s+="G"; ++ for (unsigned int i(0); i<wl; i++) s+="G"; + + w = makeBase(s); + + generateSubstitutesDNA( w, subs, wl ); + assert(subs.size()==wl); + +- for (int i(0); i<wl ; i++) ++ for (unsigned int i(0); i<wl ; i++) + { + cout << printWord(subs[i],wl) << endl; + s1=s; +@@ -632,14 +632,14 @@ + // + s=""; subs.clear(); + +- for (int i(0); i<wl; i++) s+="T"; ++ for (unsigned int i(0); i<wl; i++) s+="T"; + + w = makeBase(s); + + generateSubstitutesDNA( w, subs, wl ); + assert(subs.size()==wl); + +- for (int i(0); i<wl ; i++) ++ for (unsigned int i(0); i<wl ; i++) + { + cout << printWord(subs[i],wl) << endl; + s1=s; diff --git a/biology/ssaha/files/patch-testQueryManager.cpp b/biology/ssaha/files/patch-testQueryManager.cpp new file mode 100644 index 000000000000..19f1ec879fd5 --- /dev/null +++ b/biology/ssaha/files/patch-testQueryManager.cpp @@ -0,0 +1,20 @@ +--- ./QueryManager/testQueryManager.cpp.orig 2004-03-01 13:51:29.000000000 -0300 ++++ ./QueryManager/testQueryManager.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -53,7 +53,7 @@ + void reverseString( string& seq ) + { + string rc; +- for ( int i(0) ; i < seq.size() ; i++ ) ++ for ( unsigned int i(0) ; i < seq.size() ; i++ ) + { + if ( ( seq[i] == 'A' ) || (seq[i] == 'a') ) rc = 'T' + rc; + else if ( ( seq[i] == 'T' ) || (seq[i] == 't') ) rc = 'A' + rc; +@@ -165,7 +165,7 @@ + int numSeqs = 3; + int seqSize = 1000; + int wordLength = 7; +- int maxHits = 50; ++// int maxHits = 50; + + // Generate a random sequence of (numSeqs*seqSize) base pairs ... + // 1128 is the seed value for the random number generator diff --git a/biology/ssaha/files/patch-testSequenceReaderFasta.cpp b/biology/ssaha/files/patch-testSequenceReaderFasta.cpp new file mode 100644 index 000000000000..3bc005bff32b --- /dev/null +++ b/biology/ssaha/files/patch-testSequenceReaderFasta.cpp @@ -0,0 +1,171 @@ +--- ./SequenceReader/testSequenceReaderFasta.cpp.orig 2004-08-25 11:31:17.000000000 -0300 ++++ ./SequenceReader/testSequenceReaderFasta.cpp 2008-06-12 15:39:31.000000000 -0300 +@@ -49,12 +49,12 @@ + // ### Function Definitions ### + void capitalise( string& s ) + { +- for ( int i(0) ; i < s.size() ; ++i ) s[i] = toupper(s[i]); ++ for ( unsigned int i(0) ; i < s.size() ; ++i ) s[i] = toupper(s[i]); + } + void reverseString( string& seq ) + { + string rc; +- for ( int i(0) ; i < seq.size() ; i++ ) ++ for ( unsigned int i(0) ; i < seq.size() ; i++ ) + { + if ( ( seq[i] == 'A' ) || (seq[i] == 'a') ) rc = 'T' + rc; + else if ( ( seq[i] == 'T' ) || (seq[i] == 't') ) rc = 'A' + rc; +@@ -91,7 +91,7 @@ + cout << "Test " << ++numTests <<": test of function getNextSequence" + << endl << endl; + +- int numSeqs = 10; ++ unsigned int numSeqs = 10; + int seqSize = 57; + + // Generate a random sequence of (numSeqs*seqSize) base pairs ... +@@ -118,7 +118,7 @@ + + + +- for ( int i(0) ; i < numSeqs ; i++ ) ++ for ( unsigned int i(0) ; i < numSeqs ; i++ ) + { + + testReader.getNextSequence(w,wordLength); +@@ -188,7 +188,7 @@ + cout << "Test " << ++numTests <<": test of getSequence" + << endl << endl; + +- int toRead = 5; ++ unsigned int toRead = 5; + + w.clear(); + testReader.getSequence(w,toRead,wordLength); +@@ -242,7 +242,7 @@ + cout << "Test " << ++numTests <<": test of random access output functions" + << endl << endl; + +- for ( int i(1) ; i <= numSeqs; i++ ) ++ for ( unsigned int i(1) ; i <= numSeqs; i++ ) + { + cout << testReader.getName(i) << endl; + cout << testReader.getSideInfo(i) << endl; +@@ -275,7 +275,7 @@ + // add on the end of the expected sequence string for it to match actual. + // const string spareBases( wordLength - ( seqSize % wordLength ), 'A'); + +- for ( int i(0) ; i < numSeqs ; i++ ) ++ for ( unsigned int i(0) ; i < numSeqs ; i++ ) + { + + testReaderFastq.getNextSequence(w,wordLength); +@@ -403,7 +403,7 @@ + cout << "Test " << ++numTests <<": test of random access output functions" + << endl << endl; + +- for ( int i(1) ; i <= numSeqs; i++ ) ++ for ( unsigned int i(1) ; i <= numSeqs; i++ ) + { + cout << testReaderFastq.getName(i) << endl; + cout << testReaderFastq.getSideInfo(i) << endl; +@@ -466,7 +466,7 @@ + { + + testReader.rewind(); +- for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) ++ for ( unsigned int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) + { + cout << i << " " << j << endl; + wSingle.clear(); wMulti.clear(); +@@ -772,7 +772,7 @@ + + for (int wordLength(9); wordLength <= 12; wordLength++) + { +- for (int i(0) ; i < fiftyAs.size(); i++) ++ for (unsigned int i(0) ; i < fiftyAs.size(); i++) + { + thisString=fiftyAs; + thisString[i]='X'; +@@ -793,7 +793,7 @@ + stringReader.changeMode(&ignore); + w.clear(); + numInLast = stringReader.getNextSequence(w,wordLength); +- assert(numInLast==(fiftyAs.size()-1)%wordLength); ++ assert(numInLast==(int)(fiftyAs.size()-1)%wordLength); + assert((w.size()-1)*wordLength+numInLast==fiftyAs.size()-1); + + for (WordSequence::iterator j(w.begin()); j!=w.end();j++) +@@ -810,7 +810,7 @@ + numInLast = stringReader.getNextSequence(w,wordLength); + cout << "blegh " << wordLength << " " << i << " " << fiftyAs.size() << " " << numInLast << " " << w.size() << endl; + +- assert(numInLast==fiftyAs.size()%wordLength); ++ assert(numInLast==(int)fiftyAs.size()%wordLength); + assert((w.size()-1)*wordLength+numInLast==fiftyAs.size()); + + for (WordSequence::iterator j(w.begin()); j!=w.end();j++) +@@ -828,7 +828,7 @@ + stringReader.changeMode(&tag); + w.clear(); + numInLast = stringReader.getNextSequence(w,wordLength); +- assert(numInLast==fiftyAs.size()%wordLength); ++ assert(numInLast==(int)fiftyAs.size()%wordLength); + assert((w.size()-1)*wordLength+numInLast==fiftyAs.size()); + + for (WordSequence::iterator j(w.begin()); j!=w.end();j++) +@@ -915,7 +915,7 @@ + cout << "actual: " << printResidue(W,wordLength) << endl; + + +- for ( int i(0),j(0); i < protString.size() ; i+=wordLength,j++ ) ++ for ( unsigned int i(0),j(0); i < protString.size() ; i+=wordLength,j++ ) + { + wordString = protString.substr(i, wordLength); + cout << wordString << " " << printResidue(W[j],wordLength) << endl; +@@ -1163,7 +1163,7 @@ + assert ( localReader.getNumSequencesInFile() + == testReader.getNumSequencesInFile() ); + +- for ( int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) ++ for ( unsigned int j(0) ; j < testReader.getNumSequencesInFile() ; j ++ ) + { + wSingle.clear(); wMulti.clear(); + +@@ -1244,7 +1244,7 @@ + WordSequence seq; + string name; + +- for ( int i(1) ; i <= filterer.getNumSequencesInFile() ; i++ ) ++ for ( unsigned int i(1) ; i <= filterer.getNumSequencesInFile() ; i++ ) + { + filterer.getSequence(seq, i, 16); + filterer.getLastSequenceName(name); +@@ -1330,12 +1330,12 @@ + // assert(v1==v1i); + // assert(v2==v2i); + +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1[i-j]==data[i-1]); +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2[i-j]==data[i-1]); +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1m[i-j]==data[i-1]); +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2m[i-j]==data[i-1]); +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v1i[i-j]==data[i-1]); +- for ( int (i(j)) ; i <=seqSize ; i++ ) assert(v2i[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v1[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v2[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v1m[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v2m[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v1i[i-j]==data[i-1]); ++ for ( int i(j) ; i <=seqSize ; i++ ) assert(v2i[i-j]==data[i-1]); + + } + cout << "got through first bit" << endl; +@@ -1402,7 +1402,7 @@ + + string n1,n2; + +- for (int i(0);i<numSeqs; i++) ++ for (unsigned int i(0);i<(unsigned int)numSeqs; i++) + { + cout << "i: " << i << endl; + w1.clear(); w2.clear(); diff --git a/biology/ssaha/pkg-descr b/biology/ssaha/pkg-descr new file mode 100644 index 000000000000..97d3b05a3d8d --- /dev/null +++ b/biology/ssaha/pkg-descr @@ -0,0 +1,11 @@ +SSAHA is a software tool for very fast matching and alignment of DNA +sequences. It stands for Sequence Search and Alignment by Hashing +Algorithm. It achieves its fast search speed by converting sequence +information into a `hash table' data structure, which can then be +searched very rapidly for matches. + +SSAHA: a fast search method for large DNA databases (2001). +Ning Z, Cox AJ, Mullikin JC. Genome Res. 11: 1725-9. +PMID: 11591649 + +WWW: http://www.sanger.ac.uk/Software/analysis/SSAHA/ diff --git a/biology/ssaha/pkg-plist b/biology/ssaha/pkg-plist new file mode 100644 index 000000000000..c1b49b17ae24 --- /dev/null +++ b/biology/ssaha/pkg-plist @@ -0,0 +1,9 @@ +bin/ssaha +%%DATADIR%%/testSSAHA.csh +%%DATADIR%%/test.fasta +%%DATADIR%%/test_extract.fasta +%%DATADIR%%/test_filter.fail +%%DATADIR%%/test_filter.fastq +%%DATADIR%%/test_protein.fasta +%%DATADIR%%/README +@dirrmtry %%DATADIR%% |