Skip to content

Commit bc062d2

Browse files
author
Daniel Cooke
committed
Optimise cell component responsibility with expanded likelihood matrix
1 parent d445697 commit bc062d2

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

src/core/models/genotype/variational_bayes_mixture_mixture_model.cpp

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ VariationalBayesMixtureMixtureModel::evaluate(const LogProbabilityVector& genoty
3030
std::vector<LogProbabilityVector> seeds) const
3131
{
3232
const auto group_log_priors = to_logs(group_priors);
33+
const auto expanded_log_likelihoods = expand(log_likelihoods);
3334
const auto evaluate_seed = [&] (auto&& seed) {
34-
return this->evaluate(genotype_log_priors, log_likelihoods, group_log_priors, group_concentrations, mixture_concentrations, std::move(seed)); };
35+
return this->evaluate(genotype_log_priors, log_likelihoods, expanded_log_likelihoods, group_log_priors, group_concentrations, mixture_concentrations, std::move(seed)); };
3536
std::vector<PointInferences> seed_inferences(seeds.size());
3637
if (options_.parallel_execution) {
3738
parallel_transform(std::make_move_iterator(std::begin(seeds)), std::make_move_iterator(std::end(seeds)), std::begin(seed_inferences), evaluate_seed);
@@ -163,6 +164,30 @@ VariationalBayesMixtureMixtureModel::to_logs(const GroupOptionalPriorArray& prio
163164
return result;
164165
}
165166

167+
VariationalBayesMixtureMixtureModel::ExpandedHaplotypeLikelihoodMatrix
168+
VariationalBayesMixtureMixtureModel::expand(const HaplotypeLikelihoodMatrix& likelihoods) const
169+
{
170+
const auto S = likelihoods.size();
171+
const auto G = likelihoods[0].size();
172+
const auto T = likelihoods[0][0].size();
173+
ExpandedHaplotypeLikelihoodMatrix result(S, ExpandedGroupLikelihoodVector(T));
174+
for (std::size_t s {0}; s < S; ++s) {
175+
const auto N = likelihoods[s][0][0][0].size();
176+
for (std::size_t t {0}; t < T; ++t) {
177+
const auto K = likelihoods[0][0][t].size();
178+
result[s][t].assign(K, ExpandedGenotype(N, ExpandedLikelihood(G)));
179+
for (std::size_t k {0}; k < K; ++k) {
180+
for (std::size_t g {0}; g < G; ++g) {
181+
for (std::size_t n {0}; n < N; ++n) {
182+
result[s][t][k][n][g] = likelihoods[s][g][t][k][n];
183+
}
184+
}
185+
}
186+
}
187+
}
188+
return result;
189+
}
190+
166191
namespace {
167192

168193
VariationalBayesMixtureMixtureModel::ProbabilityVector&
@@ -229,6 +254,7 @@ bool all_equal_sizes(const VariationalBayesMixtureMixtureModel::MixtureConcentra
229254
VariationalBayesMixtureMixtureModel::PointInferences
230255
VariationalBayesMixtureMixtureModel::evaluate(const LogProbabilityVector& genotype_log_priors,
231256
const HaplotypeLikelihoodMatrix& log_likelihoods,
257+
const ExpandedHaplotypeLikelihoodMatrix& expanded_log_likelihoods,
232258
const GroupOptionalLogPriorArray& group_log_priors,
233259
const GroupConcentrationVector& prior_group_concentrations,
234260
const MixtureConcentrationArray& prior_mixture_concentrations,
@@ -242,7 +268,7 @@ VariationalBayesMixtureMixtureModel::evaluate(const LogProbabilityVector& genoty
242268
latents.group_responsibilities = init_responsibilities(group_log_priors, prior_group_concentrations, prior_mixture_concentrations,
243269
latents.genotype_posteriors, log_likelihoods);
244270
latents.component_responsibilities = init_responsibilities(prior_group_concentrations, prior_mixture_concentrations,
245-
latents.genotype_posteriors, latents.group_responsibilities, log_likelihoods);
271+
latents.genotype_posteriors, latents.group_responsibilities, expanded_log_likelihoods);
246272
auto prev_evidence = std::numeric_limits<double>::lowest();
247273
for (unsigned i {0}; i < options_.max_iterations; ++i) {
248274
update_genotype_log_posteriors(latents.genotype_log_posteriors, genotype_log_priors,
@@ -266,7 +292,7 @@ VariationalBayesMixtureMixtureModel::evaluate(const LogProbabilityVector& genoty
266292
latents.mixture_concentrations, latents.genotype_posteriors,
267293
latents.component_responsibilities, log_likelihoods);
268294
update_responsibilities(latents.component_responsibilities, latents.group_concentrations, latents.mixture_concentrations,
269-
latents.genotype_posteriors, latents.group_responsibilities, log_likelihoods);
295+
latents.genotype_posteriors, latents.group_responsibilities, expanded_log_likelihoods);
270296
}
271297
return {std::move(latents), prev_evidence};
272298
}
@@ -377,7 +403,7 @@ VariationalBayesMixtureMixtureModel::init_responsibilities(const GroupConcentrat
377403
const MixtureConcentrationArray& mixture_concentrations,
378404
const ProbabilityVector& genotype_priors,
379405
const GroupResponsibilityVector& group_responsibilities,
380-
const HaplotypeLikelihoodMatrix& log_likelihoods) const
406+
const ExpandedHaplotypeLikelihoodMatrix& log_likelihoods) const
381407
{
382408
const auto S = log_likelihoods.size();
383409
const auto T = group_concentrations.size();
@@ -389,7 +415,7 @@ VariationalBayesMixtureMixtureModel::init_responsibilities(const GroupConcentrat
389415
}
390416
ComponentResponsibilityMatrix result(S, ComponentResponsibilityVectorArray(T, ComponentResponsibilityVector(K)));
391417
for (std::size_t s {0}; s < S; ++s) {
392-
const auto N = log_likelihoods[s][0][0][0].size();
418+
const auto N = log_likelihoods[s][0][0].size();
393419
for (std::size_t t {0}; t < T; ++t) {
394420
for (std::size_t k {0}; k < K; ++k) {
395421
result[s][t][k].resize(N);
@@ -423,21 +449,23 @@ VariationalBayesMixtureMixtureModel::update_responsibilities(ComponentResponsibi
423449
const MixtureConcentrationArray& mixture_concentrations,
424450
const ProbabilityVector& genotype_posteriors,
425451
const GroupResponsibilityVector& group_responsibilities,
426-
const HaplotypeLikelihoodMatrix& log_likelihoods) const
452+
const ExpandedHaplotypeLikelihoodMatrix& log_likelihoods) const
427453
{
428454
const auto T = group_concentrations.size();
429455
const auto S = log_likelihoods.size();
430456
const auto max_K = result[0][0].size();
457+
std::vector<float> float_genotype_posteriors {std::cbegin(genotype_posteriors), std::cend(genotype_posteriors)};
431458
for (std::size_t s {0}; s < S; ++s) {
432-
const auto N = log_likelihoods[s][0][0][0].size();
459+
const auto N = log_likelihoods[s][0][0].size();
433460
for (std::size_t t {0}; t < T; ++t) {
434461
const auto ln_exp_pi = dirichlet_expectation_log(mixture_concentrations[s][t]);
435462
const auto K = ln_exp_pi.size();
436463
for (std::size_t k {0}; k < max_K; ++k) {
437464
for (std::size_t n {0}; n < N; ++n) {
438465
if (t == 0) result[s][t][k][n] = 0;
439466
if (k < K) {
440-
result[s][t][k][n] += ln_exp_pi[k] + inner_product(genotype_posteriors, log_likelihoods[s], t, k, n);
467+
//result[s][t][k][n] += ln_exp_pi[k] + inner_product(genotype_posteriors, log_likelihoods[s], t, k, n);
468+
result[s][t][k][n] += ln_exp_pi[k] + inner_product(float_genotype_posteriors, log_likelihoods[s][t][k][n]);
441469
} else {
442470
result[s][t][k][n] = options_.null_log_probability;
443471
}

src/core/models/genotype/variational_bayes_mixture_mixture_model.hpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,21 @@ class VariationalBayesMixtureMixtureModel
135135
using GroupOptionalLogPriorVector = boost::optional<LogProbabilityVector>; // One element per group
136136
using GroupOptionalLogPriorArray = std::vector<GroupOptionalLogPriorVector>; // One element per sample
137137

138+
using ExpandedLikelihood = std::vector<float>; // One element per genotype combination
139+
using ExpandedGenotype = std::vector<ExpandedLikelihood>; // One element per read
140+
using ExpandedHaplotypeLikelihoodVector = std::vector<ExpandedGenotype>; // One element per haplotype in genotype
141+
using ExpandedGroupLikelihoodVector = std::vector<ExpandedHaplotypeLikelihoodVector>; // One element per group
142+
using ExpandedHaplotypeLikelihoodMatrix = std::vector<ExpandedGroupLikelihoodVector>; // One element per sample
143+
138144
Options options_;
139145

140146
GroupOptionalLogPriorVector to_logs(const GroupOptionalPriorVector& prior) const;
141147
GroupOptionalLogPriorArray to_logs(const GroupOptionalPriorArray& priors) const;
148+
ExpandedHaplotypeLikelihoodMatrix expand(const HaplotypeLikelihoodMatrix& likelihoods) const;
142149
PointInferences
143150
evaluate(const LogProbabilityVector& genotype_log_priors,
144151
const HaplotypeLikelihoodMatrix& log_likelihoods,
152+
const ExpandedHaplotypeLikelihoodMatrix& expanded_log_likelihoods,
145153
const GroupOptionalLogPriorArray& group_log_priors,
146154
const GroupConcentrationVector& group_concentrations,
147155
const MixtureConcentrationArray& mixture_concentrations,
@@ -165,14 +173,14 @@ class VariationalBayesMixtureMixtureModel
165173
const MixtureConcentrationArray& mixture_concentrations,
166174
const ProbabilityVector& genotype_priors,
167175
const GroupResponsibilityVector& group_responsibilities,
168-
const HaplotypeLikelihoodMatrix& log_likelihoods) const;
176+
const ExpandedHaplotypeLikelihoodMatrix& log_likelihoods) const;
169177
void
170178
update_responsibilities(ComponentResponsibilityMatrix& result,
171179
const GroupConcentrationVector& group_concentrations,
172180
const MixtureConcentrationArray& mixture_concentrations,
173181
const ProbabilityVector& genotype_posteriors,
174182
const GroupResponsibilityVector& group_responsibilities,
175-
const HaplotypeLikelihoodMatrix& log_likelihoods) const;
183+
const ExpandedHaplotypeLikelihoodMatrix& log_likelihoods) const;
176184
void
177185
update_genotype_log_posteriors(LogProbabilityVector& result,
178186
const LogProbabilityVector& genotype_log_priors,

0 commit comments

Comments
 (0)