Skip to content

Commit 2a6a106

Browse files
author
Daniel Cooke
committed
Consider ambiguous support for AlleleDepth and AlleleFrequency
1 parent 6400eae commit 2a6a106

File tree

2 files changed

+31
-89
lines changed

2 files changed

+31
-89
lines changed

src/core/csr/measures/allele_depth.cpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,40 +46,34 @@ bool is_evaluable(const VcfRecord& call, const VcfRecord::SampleName& sample)
4646
return has_called_alt_allele(call, sample);
4747
}
4848

49+
template <typename T>
50+
void pop_front(std::vector<T>& v)
51+
{
52+
v.erase(std::cbegin(v));
53+
}
54+
55+
auto min_support_count(const AlleleSupportMap& support)
56+
{
57+
const static auto support_less = [] (const auto& lhs, const auto& rhs) { return lhs.second.size() < rhs.second.size(); };
58+
return std::min_element(std::cbegin(support), std::cend(support), support_less)->second.size();
59+
}
60+
4961
} // namespace
5062

5163
Measure::ResultType AlleleDepth::do_evaluate(const VcfRecord& call, const FacetMap& facets) const
5264
{
5365
const auto& samples = get_value<Samples>(facets.at("Samples"));
54-
const auto& assignments = get_value<ReadAssignments>(facets.at("ReadAssignments")).support;
66+
const auto& assignments = get_value<ReadAssignments>(facets.at("ReadAssignments"));
5567
std::vector<boost::optional<int>> result {};
5668
result.reserve(samples.size());
5769
for (const auto& sample : samples) {
5870
boost::optional<int> sample_result {};
5971
if (is_evaluable(call, sample)) {
60-
const auto& sample_assignments = assignments.at(sample);
6172
std::vector<Allele> alleles; bool has_ref;
62-
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
63-
assert(!alleles.empty());
64-
std::vector<int> allele_counts(alleles.size());
65-
for (const auto& p : sample_assignments) {
66-
const auto& haplotype = p.first;
67-
const auto& reads = p.second;
68-
const auto haplotype_support_depth = count_overlapped(reads, call);
69-
if (haplotype_support_depth > 0) {
70-
std::transform(std::cbegin(alleles), std::cend(alleles), std::cbegin(allele_counts), std::begin(allele_counts),
71-
[&] (const auto& allele, auto count) {
72-
if (haplotype.includes(allele)) {
73-
count += haplotype_support_depth;
74-
}
75-
return count;
76-
});
77-
}
78-
}
79-
auto first_called_count_itr = std::cbegin(allele_counts);
80-
if (has_ref) ++first_called_count_itr;
81-
assert(first_called_count_itr != std::cend(allele_counts));
82-
sample_result = *std::min_element(first_called_count_itr, std::cend(allele_counts));
73+
std::tie(alleles, has_ref) = get_called_alleles(call, sample);
74+
if (has_ref) pop_front(alleles); // ref always first
75+
const auto allele_support = compute_allele_support(alleles, assignments, sample);
76+
sample_result = min_support_count(allele_support);
8377
}
8478
result.push_back(sample_result);
8579
}

src/core/csr/measures/allele_frequency.cpp

Lines changed: 14 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@
88

99
#include <boost/variant.hpp>
1010

11-
#include "core/tools/read_assigner.hpp"
1211
#include "core/types/allele.hpp"
1312
#include "io/variant/vcf_record.hpp"
14-
#include "io/variant/vcf_spec.hpp"
15-
#include "utils/genotype_reader.hpp"
16-
#include "../facets/samples.hpp"
17-
#include "../facets/read_assignments.hpp"
13+
#include "utils/concat.hpp"
14+
#include "allele_depth.hpp"
15+
#include "depth.hpp"
1816

1917
namespace octopus { namespace csr {
2018

@@ -25,69 +23,19 @@ std::unique_ptr<Measure> AlleleFrequency::do_clone() const
2523
return std::make_unique<AlleleFrequency>(*this);
2624
}
2725

28-
namespace {
29-
30-
bool is_canonical(const VcfRecord::NucleotideSequence& allele) noexcept
31-
{
32-
const static VcfRecord::NucleotideSequence deleted_allele {vcfspec::deletedBase};
33-
return !(allele == vcfspec::missingValue || allele == deleted_allele);
34-
}
35-
36-
bool has_called_alt_allele(const VcfRecord& call, const VcfRecord::SampleName& sample)
37-
{
38-
if (!call.has_genotypes()) return true;
39-
const auto& genotype = get_genotype(call, sample);
40-
return std::any_of(std::cbegin(genotype), std::cend(genotype),
41-
[&] (const auto& allele) { return allele != call.ref() && is_canonical(allele); });
42-
}
43-
44-
bool is_evaluable(const VcfRecord& call, const VcfRecord::SampleName& sample)
45-
{
46-
return has_called_alt_allele(call, sample);
47-
}
48-
49-
} // namespace
50-
5126
Measure::ResultType AlleleFrequency::do_evaluate(const VcfRecord& call, const FacetMap& facets) const
5227
{
53-
const auto& samples = get_value<Samples>(facets.at("Samples"));
54-
const auto& assignments = get_value<ReadAssignments>(facets.at("ReadAssignments")).support;
55-
std::vector<boost::optional<double>> result {};
56-
result.reserve(samples.size());
57-
for (const auto& sample : samples) {
58-
boost::optional<double> sample_result {};
59-
if (is_evaluable(call, sample)) {
60-
const auto& sample_assignments = assignments.at(sample);
61-
std::vector<Allele> alleles; bool has_ref;
62-
std::tie(alleles, has_ref) = get_called_alleles(call, sample, true);
63-
assert(!alleles.empty());
64-
std::size_t read_count {0};
65-
std::vector<unsigned> allele_counts(alleles.size());
66-
for (const auto& p : sample_assignments) {
67-
const auto& haplotype = p.first;
68-
const auto& reads = p.second;
69-
const auto haplotype_support_depth = count_overlapped(reads, call);
70-
if (haplotype_support_depth > 0) {
71-
std::transform(std::cbegin(alleles), std::cend(alleles), std::cbegin(allele_counts), std::begin(allele_counts),
72-
[&] (const auto& allele, auto count) {
73-
if (haplotype.includes(allele)) {
74-
count += haplotype_support_depth;
75-
}
76-
return count;
77-
});
78-
read_count += haplotype_support_depth;
79-
}
80-
}
81-
if (read_count > 0) {
82-
auto first_called_count_itr = std::cbegin(allele_counts);
83-
if (has_ref) ++first_called_count_itr;
84-
assert(first_called_count_itr != std::cend(allele_counts));
85-
const auto min_count_itr = std::min_element(first_called_count_itr, std::cend(allele_counts));
86-
sample_result = static_cast<double>(*min_count_itr) / read_count;
87-
}
28+
const auto allele_depths = boost::get<std::vector<boost::optional<int>>>(AlleleDepth{}.evaluate(call, facets));
29+
const auto depths = boost::get<std::vector<std::size_t>>(Depth{true, false}.evaluate(call, facets));
30+
std::vector<boost::optional<double>> result(allele_depths.size());
31+
std::transform(std::cbegin(allele_depths), std::cend(allele_depths), std::cbegin(depths), std::begin(result),
32+
[] (const auto allele_depth, const auto depth) -> boost::optional<double> {
33+
if (allele_depth && depth > 0) {
34+
return static_cast<double>(*allele_depth) / depth;
35+
} else {
36+
return boost::none;
8837
}
89-
result.push_back(sample_result);
90-
}
38+
});
9139
return result;
9240
}
9341

@@ -108,7 +56,7 @@ std::string AlleleFrequency::do_describe() const
10856

10957
std::vector<std::string> AlleleFrequency::do_requirements() const
11058
{
111-
return {"Samples", "ReadAssignments"};
59+
return concat(AlleleDepth{}.requirements(), Depth{true, false}.requirements());
11260
}
11361

11462
} // namespace csr

0 commit comments

Comments
 (0)