@@ -236,11 +236,20 @@ struct AdjacentRepeatPair : public Mappable<AdjacentRepeatPair>
236236 region {encompassing_region (this ->lhs , this ->rhs )} {}
237237};
238238
239+ auto generate_tandem_repeats (const ReferenceGenome& reference, const GenomicRegion& region,
240+ const unsigned max_period, const unsigned min_tract_length)
241+ {
242+ auto result = find_exact_tandem_repeats (reference, region, max_period);
243+ const auto is_too_short = [=] (const auto & repeat) { return region_size (repeat) < min_tract_length; };
244+ result.erase (std::remove_if (std::begin (result), std::end (result), is_too_short), std::end (result));
245+ return result;
246+ }
247+
239248std::deque<AdjacentRepeatPair>
240249find_adjacent_tandem_repeats (const ReferenceGenome& reference, const GenomicRegion& region,
241- const unsigned max_period)
250+ const unsigned max_period, const unsigned min_tract_length )
242251{
243- const auto repeats = find_exact_tandem_repeats (reference, region, max_period);
252+ const auto repeats = generate_tandem_repeats (reference, region, max_period, min_tract_length );
244253 std::deque<AdjacentRepeatPair> result {};
245254 if (repeats.size () > 1 ) {
246255 for (auto lhs_itr = std::cbegin (repeats); lhs_itr != std::prev (std::cend (repeats)); ++lhs_itr) {
@@ -300,9 +309,14 @@ void RepeatScanner::generate(const GenomicRegion& region, std::vector<Variant>&
300309 assert (!segment.empty ());
301310 const auto segment_region = encompassing_region (segment);
302311 const auto repeat_search_region = expand (segment_region, 100 );
303- const auto segment_repeat_pairs = find_adjacent_tandem_repeats (reference_, repeat_search_region, options_.max_period );
312+ const auto segment_repeat_pairs = find_adjacent_tandem_repeats (reference_, repeat_search_region, options_.max_period , options_. min_tract_length );
304313 for (const auto & mnv : segment) {
305314 for (const auto & repeat_pair : overlap_range (segment_repeat_pairs, mnv)) {
315+ if (is_snv (mnv) && (repeat_pair.lhs .period () > 1 || repeat_pair.rhs .period () > 1
316+ || !(mnv.alt_allele ().sequence () == repeat_pair.lhs .motif () || mnv.alt_allele ().sequence () == repeat_pair.rhs .motif ()))) {
317+ // Only try to split SNV candidates sandwiched by homopolymers
318+ continue ;
319+ }
306320 if (are_adjacent (repeat_pair.lhs , mnv) && contains (repeat_pair.rhs , mnv)) {
307321 // insertion of lhs repeat, deletion of rhs repeat
308322 const auto num_deleted_periods = count_whole_repeats (region_size (mnv), repeat_pair.rhs .period ());
0 commit comments