@@ -3,18 +3,21 @@ import {
3
3
Outbreak ,
4
4
} from "../../../app/apis/catalog/brc-analytics-catalog/common/entities" ;
5
5
import { getGenomeId } from "../../../app/apis/catalog/brc-analytics-catalog/common/utils" ;
6
- import { Organisms as SourceOrganisms } from "../../schema/generated/schema" ;
7
6
import { SOURCE_GENOME_KEYS } from "./constants" ;
8
7
import { SourceGenome } from "./entities" ;
9
8
import {
10
9
defaultStringToNone ,
10
+ getOutbreakMatchingLineage ,
11
+ getPloidyForAssembly ,
12
+ getSourceOrganismsByTaxonomyId ,
13
+ getSpeciesStrainName ,
11
14
parseBoolean ,
12
15
parseList ,
16
+ parseListOrNull ,
13
17
parseNumber ,
14
18
parseNumberOrNull ,
15
19
parseStringOrNull ,
16
20
readValuesFile ,
17
- readYamlFile ,
18
21
verifyUniqueIds ,
19
22
} from "./utils" ;
20
23
@@ -29,31 +32,18 @@ export async function buildAssemblies(
29
32
undefined ,
30
33
SOURCE_GENOME_KEYS
31
34
) ;
32
- const sourceOrganisms = await readYamlFile < SourceOrganisms > (
35
+ const sourceOrganismsByTaxonomyId = await getSourceOrganismsByTaxonomyId (
33
36
SOURCE_PATH_ORGANISMS
34
37
) ;
35
- const sourceOrganismsByTaxonomyId = new Map (
36
- sourceOrganisms . organisms . map ( ( sourceOrganism ) => [
37
- String ( sourceOrganism . taxonomy_id ) ,
38
- sourceOrganism ,
39
- ] )
40
- ) ;
41
38
const mappedRows : BRCDataCatalogGenome [ ] = [ ] ;
42
39
for ( const row of sourceRows ) {
43
- const ploidy = sourceOrganismsByTaxonomyId . get (
44
- row . speciesTaxonomyId
45
- ) ?. ploidy ;
46
- if ( ploidy === undefined ) {
47
- console . log (
48
- `Skipping assembly ${ row . accession } [tax_id: ${ row . speciesTaxonomyId } ] - ploidy not found`
49
- ) ;
50
- continue ;
51
- }
52
- const taxonomicLevelStrain =
53
- row . taxonomicLevelStrain ||
54
- ( row . strain
55
- ? `${ row . taxonomicLevelSpecies } strain ${ row . strain } `
56
- : "None" ) ;
40
+ const ploidy = getPloidyForAssembly (
41
+ sourceOrganismsByTaxonomyId ,
42
+ row . speciesTaxonomyId ,
43
+ true ,
44
+ row . accession
45
+ ) ;
46
+ if ( ploidy === null ) continue ;
57
47
const lineageTaxonomyIds = parseList ( row . lineageTaxonomyIds ) ;
58
48
const outbreak = getOutbreakMatchingLineage (
59
49
outbreaksByTaxonomyId ,
@@ -72,7 +62,7 @@ export async function buildAssemblies(
72
62
level : row . level ,
73
63
lineageTaxonomyIds,
74
64
ncbiTaxonomyId : row . taxonomyId ,
75
- otherTaxa : row . otherTaxa ? row . otherTaxa . split ( "," ) : null ,
65
+ otherTaxa : parseListOrNull ( row . otherTaxa ) ,
76
66
ploidy,
77
67
priority : outbreak ?. priority ?? null ,
78
68
priorityPathogenName : outbreak ?. name ?? null ,
@@ -81,7 +71,7 @@ export async function buildAssemblies(
81
71
scaffoldN50 : parseNumberOrNull ( row . scaffoldN50 ) ,
82
72
speciesTaxonomyId : row . speciesTaxonomyId ,
83
73
strainName : parseStringOrNull ( row . strain ) ,
84
- taxonomicGroup : row . taxonomicGroup ? row . taxonomicGroup . split ( "," ) : [ ] ,
74
+ taxonomicGroup : parseList ( row . taxonomicGroup ) ,
85
75
taxonomicLevelClass : defaultStringToNone ( row . taxonomicLevelClass ) ,
86
76
taxonomicLevelDomain : defaultStringToNone ( row . taxonomicLevelDomain ) ,
87
77
taxonomicLevelFamily : defaultStringToNone ( row . taxonomicLevelFamily ) ,
@@ -93,7 +83,11 @@ export async function buildAssemblies(
93
83
taxonomicLevelRealm : defaultStringToNone ( row . taxonomicLevelRealm ) ,
94
84
taxonomicLevelSerotype : defaultStringToNone ( row . taxonomicLevelSerotype ) ,
95
85
taxonomicLevelSpecies : defaultStringToNone ( row . taxonomicLevelSpecies ) ,
96
- taxonomicLevelStrain,
86
+ taxonomicLevelStrain : getSpeciesStrainName (
87
+ row . taxonomicLevelSpecies ,
88
+ row . taxonomicLevelStrain ,
89
+ row . strain
90
+ ) ,
97
91
ucscBrowserUrl : parseStringOrNull ( row . ucscBrowser ) ,
98
92
} ) ;
99
93
}
@@ -103,20 +97,3 @@ export async function buildAssemblies(
103
97
verifyUniqueIds ( "assembly" , sortedRows , getGenomeId ) ;
104
98
return sortedRows ;
105
99
}
106
-
107
- /**
108
- * Get the outbreak associated with the first of the given lineage taxa that has an assocated outbreak, or null if none is found.
109
- * @param outbreaksByTaxonomyId - Map from taxonomy ID (number) to outbreak.
110
- * @param lineageTaxonomyIds - Taxonomic lineage (array of taxonomy ID strings).
111
- * @returns matching outbreak, or null.
112
- */
113
- function getOutbreakMatchingLineage (
114
- outbreaksByTaxonomyId : Map < number , Outbreak > ,
115
- lineageTaxonomyIds : string [ ]
116
- ) : Outbreak | null {
117
- for ( const stringId of lineageTaxonomyIds ) {
118
- const outbreak = outbreaksByTaxonomyId . get ( Number ( stringId ) ) ;
119
- if ( outbreak !== undefined ) return outbreak ;
120
- }
121
- return null ;
122
- }
0 commit comments