99#include < cassert>
1010
1111#include " utils/maths.hpp"
12+ #include " utils/sequence_utils.hpp"
1213
1314namespace octopus { namespace readpipe {
1415
@@ -164,6 +165,17 @@ MaskLowAverageQualitySoftClippedTails::MaskLowAverageQualitySoftClippedTails(Bas
164165
165166namespace {
166167
168+ auto get_soft_clip_head_size (const AlignedRead& read) noexcept
169+ {
170+ CigarOperation::Size front_size, back_size;
171+ std::tie (front_size, back_size) = get_soft_clipped_sizes (read);
172+ if (read.is_marked_reverse_mapped ()) {
173+ return back_size;
174+ } else {
175+ return front_size;
176+ }
177+ }
178+
167179auto get_soft_clip_tail_size (const AlignedRead& read) noexcept
168180{
169181 CigarOperation::Size front_size, back_size;
@@ -192,6 +204,15 @@ auto mean_tail_quality(const AlignedRead& read, const std::size_t num_bases) noe
192204 }
193205}
194206
207+ void zero_head_base_qualities (AlignedRead& read, const std::size_t num_bases) noexcept
208+ {
209+ if (read.is_marked_reverse_mapped ()) {
210+ zero_back_qualities (read, num_bases);
211+ } else {
212+ zero_front_qualities (read, num_bases);
213+ }
214+ }
215+
195216void zero_tail_base_qualities (AlignedRead& read, const std::size_t num_bases) noexcept
196217{
197218 if (read.is_marked_reverse_mapped ()) {
@@ -214,6 +235,78 @@ void MaskLowAverageQualitySoftClippedTails::operator()(AlignedRead& read) const
214235 }
215236}
216237
238+ MaskInvertedSoftClippedReadEnds::MaskInvertedSoftClippedReadEnds (const ReferenceGenome& reference,
239+ AlignedRead::NucleotideSequence::size_type min_clip_length,
240+ GenomicRegion::Size max_flank_search)
241+ : reference_ {reference}
242+ , min_clip_length_ {min_clip_length}
243+ , max_flank_search_ {max_flank_search}
244+ {}
245+
246+ namespace {
247+
248+ auto copy_head_sequence (const AlignedRead& read, const AlignedRead::NucleotideSequence::size_type length)
249+ {
250+ if (length >= sequence_size (read)) return read.sequence ();
251+ if (read.is_marked_reverse_mapped ()) {
252+ return AlignedRead::NucleotideSequence {std::prev (std::cend (read.sequence ()), length), std::cend (read.sequence ())};
253+ } else {
254+ return AlignedRead::NucleotideSequence {std::cbegin (read.sequence ()), std::next (std::cbegin (read.sequence ()), length)};
255+ }
256+ }
257+
258+ auto copy_tail_sequence (const AlignedRead& read, const AlignedRead::NucleotideSequence::size_type length)
259+ {
260+ if (length >= sequence_size (read)) return read.sequence ();
261+ if (read.is_marked_reverse_mapped ()) {
262+ return AlignedRead::NucleotideSequence {std::cbegin (read.sequence ()), std::next (std::cbegin (read.sequence ()), length)};
263+ } else {
264+ return AlignedRead::NucleotideSequence {std::prev (std::cend (read.sequence ()), length), std::cend (read.sequence ())};
265+ }
266+ }
267+
268+ auto copy_soft_clipped_head_sequence (const AlignedRead& read)
269+ {
270+ return copy_head_sequence (read, get_soft_clip_head_size (read));
271+ }
272+
273+ auto copy_soft_clipped_tail_sequence (const AlignedRead& read)
274+ {
275+ return copy_tail_sequence (read, get_soft_clip_tail_size (read));
276+ }
277+
278+ template <typename Range1, typename Range2>
279+ bool includes (const Range1& target, const Range2& query)
280+ {
281+ return std::search (std::cbegin (target), std::cend (target), std::cbegin (query), std::cend (query)) != std::cend (target);
282+ }
283+
284+ } // namespace
285+
286+ void MaskInvertedSoftClippedReadEnds::operator ()(AlignedRead& read) const
287+ {
288+ if (is_soft_clipped (read)) {
289+ const auto soft_clipped_head_length = get_soft_clip_head_size (read);
290+ if (soft_clipped_head_length >= min_clip_length_) {
291+ auto query = copy_head_sequence (read, soft_clipped_head_length);
292+ utils::reverse_complement (query);
293+ auto target = reference_.get ().fetch_sequence (expand (mapped_region (read), max_flank_search_));
294+ if (includes (target, query)) {
295+ zero_head_base_qualities (read, soft_clipped_head_length);
296+ }
297+ }
298+ const auto soft_clipped_tail_length = get_soft_clip_tail_size (read);
299+ if (soft_clipped_tail_length >= min_clip_length_) {
300+ auto query = copy_tail_sequence (read, soft_clipped_tail_length);
301+ utils::reverse_complement (query);
302+ auto target = reference_.get ().fetch_sequence (expand (mapped_region (read), max_flank_search_));
303+ if (includes (target, query)) {
304+ zero_tail_base_qualities (read, soft_clipped_tail_length);
305+ }
306+ }
307+ }
308+ }
309+
217310// template transforms
218311
219312void mask_adapter_contamination (AlignedRead& forward, AlignedRead& reverse) noexcept
0 commit comments