Skip to content

Commit cf63609

Browse files
author
Daniel Cooke
committed
Merge branch 'exp/homopolymer-expand' into develop
2 parents d2aad91 + a75e988 commit cf63609

File tree

3 files changed

+20
-3
lines changed

3 files changed

+20
-3
lines changed

src/config/option_collation.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,8 @@ auto make_variant_generator_builder(const OptionMap& options, const boost::optio
12431243
}
12441244
if (repeat_candidate_variant_generator_enabled(options)) {
12451245
RepeatScanner::Options repeat_scanner_options {};
1246+
repeat_scanner_options.min_snvs = 1;
1247+
repeat_scanner_options.min_base_quality = 10;
12461248
repeat_scanner_options.min_vaf = get_repeat_scanner_min_vaf(options);
12471249
result.set_repeat_scanner(repeat_scanner_options);
12481250
}

src/core/tools/vargen/repeat_scanner.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,20 @@ struct AdjacentRepeatPair : public Mappable<AdjacentRepeatPair>
236236
region {encompassing_region(this->lhs, this->rhs)} {}
237237
};
238238

239+
auto generate_tandem_repeats(const ReferenceGenome& reference, const GenomicRegion& region,
240+
const unsigned max_period, const unsigned min_tract_length)
241+
{
242+
auto result = find_exact_tandem_repeats(reference, region, max_period);
243+
const auto is_too_short = [=] (const auto& repeat) { return region_size(repeat) < min_tract_length; };
244+
result.erase(std::remove_if(std::begin(result), std::end(result), is_too_short), std::end(result));
245+
return result;
246+
}
247+
239248
std::deque<AdjacentRepeatPair>
240249
find_adjacent_tandem_repeats(const ReferenceGenome& reference, const GenomicRegion& region,
241-
const unsigned max_period)
250+
const unsigned max_period, const unsigned min_tract_length)
242251
{
243-
const auto repeats = find_exact_tandem_repeats(reference, region, max_period);
252+
const auto repeats = generate_tandem_repeats(reference, region, max_period, min_tract_length);
244253
std::deque<AdjacentRepeatPair> result {};
245254
if (repeats.size() > 1) {
246255
for (auto lhs_itr = std::cbegin(repeats); lhs_itr != std::prev(std::cend(repeats)); ++lhs_itr) {
@@ -300,9 +309,14 @@ void RepeatScanner::generate(const GenomicRegion& region, std::vector<Variant>&
300309
assert(!segment.empty());
301310
const auto segment_region = encompassing_region(segment);
302311
const auto repeat_search_region = expand(segment_region, 100);
303-
const auto segment_repeat_pairs = find_adjacent_tandem_repeats(reference_, repeat_search_region, options_.max_period);
312+
const auto segment_repeat_pairs = find_adjacent_tandem_repeats(reference_, repeat_search_region, options_.max_period, options_.min_tract_length);
304313
for (const auto& mnv : segment) {
305314
for (const auto& repeat_pair : overlap_range(segment_repeat_pairs, mnv)) {
315+
if (is_snv(mnv) && (repeat_pair.lhs.period() > 1 || repeat_pair.rhs.period() > 1
316+
|| !(mnv.alt_allele().sequence() == repeat_pair.lhs.motif() || mnv.alt_allele().sequence() == repeat_pair.rhs.motif()))) {
317+
// Only try to split SNV candidates sandwiched by homopolymers
318+
continue;
319+
}
306320
if (are_adjacent(repeat_pair.lhs, mnv) && contains(repeat_pair.rhs, mnv)) {
307321
// insertion of lhs repeat, deletion of rhs repeat
308322
const auto num_deleted_periods = count_whole_repeats(region_size(mnv), repeat_pair.rhs.period());

src/core/tools/vargen/repeat_scanner.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class RepeatScanner : public VariantGenerator
3434
{
3535
unsigned min_snvs = 2;
3636
unsigned max_period = 6;
37+
unsigned min_tract_length = 3;
3738
unsigned min_observations = 2;
3839
unsigned min_sample_observations = 2;
3940
boost::optional<double> min_vaf = boost::none;

0 commit comments

Comments
 (0)