-
Notifications
You must be signed in to change notification settings - Fork 79
Remove guard against internal samples when decoding genotypes #3301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,12 +90,10 @@ tsk_variant_copy_alleles(tsk_variant_t *self, const char **alleles) | |
| static int | ||
| variant_init_samples_and_index_map(tsk_variant_t *self, | ||
| const tsk_treeseq_t *tree_sequence, const tsk_id_t *samples, tsk_size_t num_samples, | ||
| size_t num_samples_alloc, tsk_flags_t options) | ||
| size_t num_samples_alloc, tsk_flags_t TSK_UNUSED(options)) | ||
| { | ||
| int ret = 0; | ||
| const tsk_flags_t *flags = tree_sequence->tables->nodes.flags; | ||
| tsk_size_t j, num_nodes; | ||
| bool impute_missing = !!(options & TSK_ISOLATED_NOT_MISSING); | ||
| tsk_id_t u; | ||
|
|
||
| num_nodes = tsk_treeseq_get_num_nodes(tree_sequence); | ||
|
|
@@ -120,11 +118,6 @@ variant_init_samples_and_index_map(tsk_variant_t *self, | |
| ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE); | ||
| goto out; | ||
| } | ||
| /* We can only detect missing data for samples */ | ||
| if (!impute_missing && !(flags[u] & TSK_NODE_IS_SAMPLE)) { | ||
| ret = tsk_trace_error(TSK_ERR_MUST_IMPUTE_NON_SAMPLES); | ||
| goto out; | ||
| } | ||
| self->alt_sample_index_map[samples[j]] = (tsk_id_t) j; | ||
| } | ||
| out: | ||
|
|
@@ -218,15 +211,21 @@ tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence, | |
| } | ||
| self->sample_index_map = self->alt_sample_index_map; | ||
| } | ||
| /* When a list of samples is given, we use the traversal based algorithm | ||
| * which doesn't use sample list tracking in the tree */ | ||
| /* When a list of samples is given, we use the traversal based algorithm, | ||
| * so we need traversal scratch space and presence tracking. */ | ||
| if (self->alt_samples != NULL) { | ||
| self->traversal_stack = tsk_malloc( | ||
| tsk_treeseq_get_num_nodes(tree_sequence) * sizeof(*self->traversal_stack)); | ||
| if (self->traversal_stack == NULL) { | ||
| ret = tsk_trace_error(TSK_ERR_NO_MEMORY); | ||
| goto out; | ||
| } | ||
| self->sample_is_present | ||
| = tsk_malloc(num_samples_alloc * sizeof(*self->sample_is_present)); | ||
| if (self->sample_is_present == NULL) { | ||
| ret = tsk_trace_error(TSK_ERR_NO_MEMORY); | ||
| goto out; | ||
| } | ||
| } | ||
|
|
||
| self->genotypes = tsk_malloc(num_samples_alloc * sizeof(*self->genotypes)); | ||
|
|
@@ -274,6 +273,7 @@ tsk_variant_free(tsk_variant_t *self) | |
| tsk_safe_free(self->samples); | ||
| tsk_safe_free(self->alt_samples); | ||
| tsk_safe_free(self->alt_sample_index_map); | ||
| tsk_safe_free(self->sample_is_present); | ||
| tsk_safe_free(self->traversal_stack); | ||
| return 0; | ||
| } | ||
|
|
@@ -425,13 +425,48 @@ tsk_variant_mark_missing(tsk_variant_t *self) | |
| const tsk_id_t *restrict sample_index_map = self->sample_index_map; | ||
| const tsk_id_t N = self->tree.virtual_root; | ||
| int32_t *restrict genotypes = self->genotypes; | ||
| tsk_id_t root, sample_index; | ||
| tsk_id_t root, sample_index, u, v; | ||
| const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags; | ||
| bool *restrict present = self->sample_is_present; | ||
| tsk_id_t *restrict stack = self->traversal_stack; | ||
| int stack_top = -1; | ||
| tsk_size_t j = 0; | ||
|
|
||
| if (self->alt_samples == NULL) { | ||
| for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) { | ||
| if (left_child[root] == TSK_NULL) { | ||
| sample_index = sample_index_map[root]; | ||
| if (sample_index != TSK_NULL) { | ||
| genotypes[sample_index] = TSK_MISSING_DATA; | ||
| num_missing++; | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| tsk_memset(present, 0, self->num_samples * sizeof(*present)); | ||
|
|
||
| for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) { | ||
| stack[++stack_top] = root; | ||
|
||
| } | ||
|
|
||
| for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) { | ||
| if (left_child[root] == TSK_NULL) { | ||
| sample_index = sample_index_map[root]; | ||
| while (stack_top >= 0) { | ||
| u = stack[stack_top--]; | ||
| sample_index = sample_index_map[u]; | ||
| if (sample_index != TSK_NULL) { | ||
| genotypes[sample_index] = TSK_MISSING_DATA; | ||
| present[sample_index] = true; | ||
| } | ||
| for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) { | ||
| stack[++stack_top] = v; | ||
| } | ||
| } | ||
|
|
||
| for (j = 0; j < self->num_samples; j++) { | ||
| u = self->samples[j]; | ||
| if (!present[j] | ||
| || ((flags[u] & TSK_NODE_IS_SAMPLE) != 0 | ||
| && self->tree.parent[u] == TSK_NULL | ||
| && left_child[u] == TSK_NULL)) { | ||
| genotypes[j] = TSK_MISSING_DATA; | ||
| num_missing++; | ||
| } | ||
| } | ||
|
|
@@ -591,6 +626,7 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other) | |
| other->sample_index_map = NULL; | ||
| other->alt_samples = NULL; | ||
| other->alt_sample_index_map = NULL; | ||
| other->sample_is_present = NULL; | ||
| other->user_alleles_mem = NULL; | ||
|
|
||
| total_len = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
usual practise is to bundle multiple mem error checks together
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed