11#include " Prefiltering.h"
22#include " NucleotideMatrix.h"
33#include " ReducedMatrix.h"
4- #include " ExtendedSubstitutionMatrix.h"
54#include " SubstitutionMatrixProfileStates.h"
65#include " DBWriter.h"
76#include " QueryMatcherTaxonomyHook.h"
@@ -42,6 +41,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
4241 scoringMatrixFile(par.scoringMatrixFile),
4342 seedScoringMatrixFile(par.seedScoringMatrixFile),
4443 targetSeqType(targetSeqType),
44+ targetSearchMode(par.targetSearchMode),
4545 maxResListLen(par.maxResListLen),
4646 sensitivity(par.sensitivity),
4747 maxSeqLen(par.maxSeqLen),
@@ -52,7 +52,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
5252 aaBiasCorrectionScale(par.compBiasCorrectionScale),
5353 covThr(par.covThr), covMode(par.covMode), includeIdentical(par.includeIdentity),
5454 preloadMode(par.preloadMode),
55- threads(static_cast <unsigned int >(par.threads)), compressed(par.compressed) {
55+ threads(static_cast <unsigned int >(par.threads)),
56+ compressed(par.compressed) {
5657 sameQTDB = isSameQTDB ();
5758
5859 // init the substitution matrices
@@ -173,7 +174,8 @@ Prefiltering::Prefiltering(const std::string &queryDB,
173174
174175 takeOnlyBestKmer = (par.exactKmerMatching ==1 ) ||
175176 (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_AMINO_ACIDS)) ||
176- (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_NUCLEOTIDES));
177+ (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) && Parameters::isEqualDbtype (querySeqType,Parameters::DBTYPE_NUCLEOTIDES)) ||
178+ (targetSearchMode == 1 );
177179
178180 // memoryLimit in bytes
179181 size_t memoryLimit=Util::computeMemory (par.splitMemoryLimit );
@@ -203,6 +205,13 @@ Prefiltering::Prefiltering(const std::string &queryDB,
203205
204206 Debug (Debug::INFO) << " Target database size: " << tdbr->getSize () << " type: " <<Parameters::getDbTypeName (targetSeqType) << " \n " ;
205207
208+ if (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
209+ kmerSubMat->alphabetSize = kmerSubMat->alphabetSize - 1 ;
210+ _2merSubMatrix = getScoreMatrix (*kmerSubMat, 2 );
211+ _3merSubMatrix = getScoreMatrix (*kmerSubMat, 3 );
212+ kmerSubMat->alphabetSize = alphabetSize;
213+ }
214+
206215 if (splitMode == Parameters::QUERY_DB_SPLIT) {
207216 // create the whole index table
208217 getIndexTable (0 , 0 , tdbr->getSize ());
@@ -214,12 +223,7 @@ Prefiltering::Prefiltering(const std::string &queryDB,
214223 EXIT (EXIT_FAILURE);
215224 }
216225
217- if (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_AMINO_ACIDS)) {
218- kmerSubMat->alphabetSize = kmerSubMat->alphabetSize - 1 ;
219- _2merSubMatrix = getScoreMatrix (*kmerSubMat, 2 );
220- _3merSubMatrix = getScoreMatrix (*kmerSubMat, 3 );
221- kmerSubMat->alphabetSize = alphabetSize;
222- }
226+
223227
224228 if (par.taxonList .length () > 0 ) {
225229 taxonomyHook = new QueryMatcherTaxonomyHook (targetDB, tdbr, par.taxonList );
@@ -519,7 +523,7 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
519523 Sequence tseq (maxSeqLen, targetSeqType, kmerSubMat, kmerSize, spacedKmer, aaBiasCorrection, true , spacedKmerPattern);
520524 int localKmerThr = (Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_HMM_PROFILE) ||
521525 Parameters::isEqualDbtype (querySeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
522- (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false && takeOnlyBestKmer == true ) ) ? 0 : kmerThr;
526+ (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_HMM_PROFILE) == false && targetSearchMode == 0 && takeOnlyBestKmer == true ) ) ? 0 : kmerThr;
523527
524528 // remove X or N for seeding
525529 int adjustAlphabetSize = (Parameters::isEqualDbtype (targetSeqType, Parameters::DBTYPE_NUCLEOTIDES) ||
@@ -530,7 +534,10 @@ void Prefiltering::getIndexTable(int split, size_t dbFrom, size_t dbSize) {
530534 SequenceLookup **maskedLookup = maskMode == 1 || maskLowerCaseMode == 1 ? &sequenceLookup : NULL ;
531535
532536 Debug (Debug::INFO) << " Index table k-mer threshold: " << localKmerThr << " at k-mer size " << kmerSize << " \n " ;
533- IndexBuilder::fillDatabase (indexTable, maskedLookup, unmaskedLookup, *kmerSubMat, &tseq, tdbr, dbFrom, dbFrom + dbSize, localKmerThr, maskMode, maskLowerCaseMode, maskProb);
537+ IndexBuilder::fillDatabase (indexTable, maskedLookup, unmaskedLookup, *kmerSubMat,
538+ _3merSubMatrix, _2merSubMatrix,
539+ &tseq, tdbr, dbFrom, dbFrom + dbSize,
540+ localKmerThr, maskMode, maskLowerCaseMode, maskProb, targetSearchMode);
534541
535542 // sequenceLookup has to be temporarily present to speed up masking
536543 // afterwards its not needed anymore without diagonal scoring
0 commit comments