@@ -52,8 +52,7 @@ class DbInfo {
5252};
5353
5454
55- void IndexBuilder::fillDatabase (IndexTable *indexTable, SequenceLookup **maskedLookup,
56- SequenceLookup **unmaskedLookup,BaseMatrix &subMat,
55+ void IndexBuilder::fillDatabase (IndexTable *indexTable, SequenceLookup ** externalLookup, BaseMatrix &subMat,
5756 ScoreMatrix & three, ScoreMatrix & two, Sequence *seq,
5857 DBReader<unsigned int > *dbr, size_t dbFrom, size_t dbTo, int kmerThr,
5958 bool mask, bool maskLowerCaseMode, float maskProb, int maskNrepeats, int targetSearchMode) {
@@ -65,27 +64,14 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
6564 size_t dbSize = dbTo - dbFrom;
6665 DbInfo* info = new DbInfo (dbFrom, dbTo, seq->getEffectiveKmerSize (), *dbr);
6766
68- SequenceLookup *sequenceLookup;
69- if (unmaskedLookup != NULL && maskedLookup == NULL ) {
70- *unmaskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
71- sequenceLookup = *unmaskedLookup;
72- } else if (unmaskedLookup == NULL && maskedLookup != NULL ) {
73- *maskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
74- sequenceLookup = *maskedLookup;
75- } else if (unmaskedLookup != NULL && maskedLookup != NULL ) {
76- *unmaskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
77- *maskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
78- sequenceLookup = *maskedLookup;
79- } else {
80- Debug (Debug::ERROR) << " This should not happen\n " ;
81- EXIT (EXIT_FAILURE);
82- }
67+ *externalLookup = new SequenceLookup (dbSize, info->aaDbSize );
68+ SequenceLookup *sequenceLookup = *externalLookup;
8369
8470
8571 // identical scores for memory reduction code
8672 char *idScoreLookup = getScoreLookup (subMat);
8773 Debug::Progress progress (dbTo-dbFrom);
88-
74+ bool needMasking = (mask == 1 || maskNrepeats > 0 || maskLowerCaseMode == 1 );
8975 size_t maskedResidues = 0 ;
9076 size_t totalKmerCount = 0 ;
9177 #pragma omp parallel
@@ -96,16 +82,17 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
9682#endif
9783 // need to prune low scoring k-mers through masking
9884 Masker *masker = NULL ;
99- if (maskedLookup != NULL ) {
85+ if (needMasking ) {
10086 masker = new Masker (subMat);
10187 }
10288
103-
104- Indexer idxer (static_cast <unsigned int >(indexTable->getAlphabetSize ()), seq->getKmerSize ());
89+ unsigned int alphabetSize = (indexTable != NULL ) ? static_cast <unsigned int >(indexTable->getAlphabetSize ())
90+ : static_cast <unsigned int >(subMat.alphabetSize );
91+ Indexer idxer (alphabetSize, seq->getKmerSize ());
10592 Sequence s (seq->getMaxLen (), seq->getSeqType (), &subMat, seq->getKmerSize (), seq->isSpaced (), false , true , seq->getUserSpacedKmerPattern ());
10693
10794 KmerGenerator *generator = NULL ;
108- if (isTargetSimiliarKmerSearch) {
95+ if (isTargetSimiliarKmerSearch && indexTable != NULL ) {
10996 generator = new KmerGenerator (seq->getKmerSize (), indexTable->getAlphabetSize (), kmerThr);
11097 if (isProfile){
11198 generator->setDivideStrategy (s.profile_matrix );
@@ -132,26 +119,21 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
132119 // count similar or exact k-mers based on sequence type
133120 if (isTargetSimiliarKmerSearch) {
134121 // Find out if we should also mask profiles
135- totalKmerCount += indexTable->addSimilarKmerCount (&s, generator);
136- unsigned char * seq = (isProfile) ? s.numConsensusSequence : s.numSequence ;
137- if (unmaskedLookup != NULL ) {
138- (*unmaskedLookup)->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
139- } else if (maskedLookup != NULL ) {
140- (*maskedLookup)->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
122+ if (indexTable != NULL ){
123+ totalKmerCount += indexTable->addSimilarKmerCount (&s, generator);
141124 }
125+ unsigned char * seq = (isProfile) ? s.numConsensusSequence : s.numSequence ;
126+
127+ sequenceLookup->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
128+
142129 } else {
143130 // Do not mask if column state sequences are used
144- if (unmaskedLookup != NULL ) {
145- (*unmaskedLookup)->addSequence (s.numSequence , s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
146- }
147-
148131 maskedResidues += masker->maskSequence (s, mask, maskProb, maskLowerCaseMode, maskNrepeats);
132+ sequenceLookup->addSequence (s.numSequence , s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
149133
150- if (maskedLookup != NULL ){
151- (*maskedLookup)-> addSequence (s. numSequence , s. L , id - dbFrom, info-> sequenceOffsets [id - dbFrom] );
134+ if (indexTable != NULL ){
135+ totalKmerCount += indexTable-> addKmerCount (&s, &idxer, buffer, kmerThr, idScoreLookup );
152136 }
153-
154- totalKmerCount += indexTable->addKmerCount (&s, &idxer, buffer, kmerThr, idScoreLookup);
155137 }
156138 }
157139
@@ -168,14 +150,13 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
168150
169151
170152 Debug (Debug::INFO) << " Index table: Masked residues: " << maskedResidues << " \n " ;
171- if (totalKmerCount == 0 ) {
172- Debug (Debug::ERROR ) << " No k-mer could be extracted for the database " << dbr->getDataFileName () << " .\n "
153+ if (indexTable != NULL && totalKmerCount == 0 ) {
154+ Debug (Debug::WARNING ) << " No k-mer could be extracted for the database " << dbr->getDataFileName () << " .\n "
173155 << " Maybe the sequences length is less than 14 residues.\n " ;
174156 if (maskedResidues == true ){
175- Debug (Debug::ERROR ) << " or contains only low complexity regions." ;
176- Debug (Debug::ERROR ) << " Use --mask 0 to deactivate the low complexity filter.\n " ;
157+ Debug (Debug::WARNING ) << " or contains only low complexity regions." ;
158+ Debug (Debug::WARNING ) << " Use --mask 0 to deactivate the low complexity filter.\n " ;
177159 }
178- EXIT (EXIT_FAILURE);
179160 }
180161
181162 dbr->remapData ();
@@ -193,9 +174,10 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
193174// }
194175// Debug(Debug::INFO) << "Index table: Remove "<< lowSelectiveResidues <<" none selective residues\n";
195176// Debug(Debug::INFO) << "Index table: init... from "<< dbFrom << " to "<< dbTo << "\n";
196-
197- indexTable->initMemory (info->tableSize );
198- indexTable->init ();
177+ if (indexTable != NULL ){
178+ indexTable->initMemory (info->tableSize );
179+ indexTable->init ();
180+ }
199181
200182 delete info;
201183 Debug::Progress progress2 (dbTo-dbFrom);
@@ -208,7 +190,9 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
208190 thread_idx = static_cast <unsigned int >(omp_get_thread_num ());
209191#endif
210192 Sequence s (seq->getMaxLen (), seq->getSeqType (), &subMat, seq->getKmerSize (), seq->isSpaced (), false , true , seq->getUserSpacedKmerPattern ());
211- Indexer idxer (static_cast <unsigned int >(indexTable->getAlphabetSize ()), seq->getKmerSize ());
193+ unsigned int alphabetSize = (indexTable != NULL ) ? static_cast <unsigned int >(indexTable->getAlphabetSize ())
194+ : static_cast <unsigned int >(subMat.alphabetSize );
195+ Indexer idxer (alphabetSize, seq->getKmerSize ());
212196 IndexEntryLocalTmp *buffer = static_cast <IndexEntryLocalTmp *>(malloc ( seq->getMaxLen () * sizeof (IndexEntryLocalTmp)));
213197 size_t bufferSize = seq->getMaxLen ();
214198 KmerGenerator *generator = NULL ;
@@ -229,10 +213,14 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
229213 unsigned int qKey = dbr->getDbKey (id);
230214 if (isTargetSimiliarKmerSearch) {
231215 s.mapSequence (id - dbFrom, qKey, dbr->getData (id, thread_idx), dbr->getSeqLen (id));
232- indexTable->addSimilarSequence (&s, generator, &buffer, bufferSize, &idxer);
216+ if (indexTable != NULL ) {
217+ indexTable->addSimilarSequence (&s, generator, &buffer, bufferSize, &idxer);
218+ }
233219 } else {
234220 s.mapSequence (id - dbFrom, qKey, sequenceLookup->getSequence (id - dbFrom));
235- indexTable->addSequence (&s, &idxer, &buffer, bufferSize, kmerThr, idScoreLookup);
221+ if (indexTable != NULL ) {
222+ indexTable->addSequence (&s, &idxer, &buffer, bufferSize, kmerThr, idScoreLookup);
223+ }
236224 }
237225 }
238226
@@ -245,6 +233,8 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
245233 if (idScoreLookup!=NULL ){
246234 delete[] idScoreLookup;
247235 }
248- indexTable->revertPointer ();
249- indexTable->sortDBSeqLists ();
236+ if (indexTable != NULL ){
237+ indexTable->revertPointer ();
238+ indexTable->sortDBSeqLists ();
239+ }
250240}
0 commit comments