Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions app/apis/catalog/brc-analytics-catalog/common/entities.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { MDXRemoteSerializeResult } from "next-mdx-remote";
import { WorkflowUrlParameter } from "../../../../utils/galaxy-api/entities";
import {
ORGANISM_PLOIDY,
OUTBREAK_PRIORITY,
OUTBREAK_RESOURCE_TYPE,
WORKFLOW_PARAMETER_VARIABLE,
WORKFLOW_PLOIDY,
} from "./schema-entities";
Expand Down Expand Up @@ -72,6 +75,21 @@ export interface EntitiesResponsePagination {
total: number;
}

export interface Outbreak {
active: boolean;
description: MDXRemoteSerializeResult;
highlight_descendant_taxonomy_ids: number[] | null;
priority: OUTBREAK_PRIORITY;
resources: OutbreakResource[];
taxonomy_id: number;
}

export interface OutbreakResource {
title: string;
type: OUTBREAK_RESOURCE_TYPE;
url: string;
}

export interface WorkflowCategory {
category: string;
description: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
export {
OrganismPloidy as ORGANISM_PLOIDY,
OutbreakPriority as OUTBREAK_PRIORITY,
OutbreakResourceType as OUTBREAK_RESOURCE_TYPE,
WorkflowParameterVariable as WORKFLOW_PARAMETER_VARIABLE,
WorkflowPloidy as WORKFLOW_PLOIDY,
} from "../../../../../catalog/schema/generated/schema";
90 changes: 90 additions & 0 deletions catalog/build/ts/build-assemblies.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { BRCDataCatalogGenome } from "../../../app/apis/catalog/brc-analytics-catalog/common/entities";
import { getGenomeId } from "../../../app/apis/catalog/brc-analytics-catalog/common/utils";
import { Organisms as SourceOrganisms } from "../../schema/generated/schema";
import { SOURCE_GENOME_KEYS } from "./constants";
import { SourceGenome } from "./entities";
import {
defaultStringToNone,
parseBoolean,
parseList,
parseNumber,
parseNumberOrNull,
parseStringOrNull,
readValuesFile,
readYamlFile,
verifyUniqueIds,
} from "./utils";

const SOURCE_PATH_GENOMES = "catalog/build/intermediate/genomes-from-ncbi.tsv";
const SOURCE_PATH_ORGANISMS = "catalog/source/organisms.yml";

export async function buildAssemblies(): Promise<BRCDataCatalogGenome[]> {
const sourceRows = await readValuesFile<SourceGenome>(
SOURCE_PATH_GENOMES,
undefined,
SOURCE_GENOME_KEYS
);
const sourceOrganisms = await readYamlFile<SourceOrganisms>(
SOURCE_PATH_ORGANISMS
);
const sourceOrganismsByTaxonomyId = new Map(
sourceOrganisms.organisms.map((sourceOrganism) => [
String(sourceOrganism.taxonomy_id),
sourceOrganism,
])
);
const mappedRows: BRCDataCatalogGenome[] = [];
for (const row of sourceRows) {
const ploidy = sourceOrganismsByTaxonomyId.get(
row.speciesTaxonomyId
)?.ploidy;
if (ploidy === undefined) {
console.log(
`Skipping assembly ${row.accession} [tax_id: ${row.speciesTaxonomyId}] - ploidy not found`
);
continue;
}
const taxonomicLevelStrain =
row.taxonomicLevelStrain ||
(row.strain
? `${row.taxonomicLevelSpecies} strain ${row.strain}`
: "None");
mappedRows.push({
accession: row.accession,
annotationStatus: parseStringOrNull(row.annotationStatus),
chromosomes: parseNumberOrNull(row.chromosomeCount),
commonName: parseStringOrNull(row.commonName),
coverage: parseStringOrNull(row.coverage),
gcPercent: parseNumberOrNull(row.gcPercent),
geneModelUrl: parseStringOrNull(row.geneModelUrl),
isRef: parseBoolean(row.isRef),
length: parseNumber(row.length),
level: row.level,
lineageTaxonomyIds: parseList(row.lineageTaxonomyIds),
ncbiTaxonomyId: row.taxonomyId,
ploidy,
scaffoldCount: parseNumberOrNull(row.scaffoldCount),
scaffoldL50: parseNumberOrNull(row.scaffoldL50),
scaffoldN50: parseNumberOrNull(row.scaffoldN50),
speciesTaxonomyId: row.speciesTaxonomyId,
strainName: parseStringOrNull(row.strain),
taxonomicGroup: row.taxonomicGroup ? row.taxonomicGroup.split(",") : [],
taxonomicLevelClass: defaultStringToNone(row.taxonomicLevelClass),
taxonomicLevelDomain: defaultStringToNone(row.taxonomicLevelDomain),
taxonomicLevelFamily: defaultStringToNone(row.taxonomicLevelFamily),
taxonomicLevelGenus: defaultStringToNone(row.taxonomicLevelGenus),
taxonomicLevelKingdom: defaultStringToNone(row.taxonomicLevelKingdom),
taxonomicLevelOrder: defaultStringToNone(row.taxonomicLevelOrder),
taxonomicLevelPhylum: defaultStringToNone(row.taxonomicLevelPhylum),
taxonomicLevelRealm: defaultStringToNone(row.taxonomicLevelRealm),
taxonomicLevelSpecies: defaultStringToNone(row.taxonomicLevelSpecies),
taxonomicLevelStrain,
ucscBrowserUrl: parseStringOrNull(row.ucscBrowser),
});
}
const sortedRows = mappedRows.sort((a, b) =>
a.accession.localeCompare(b.accession)
);
verifyUniqueIds("assembly", sortedRows, getGenomeId);
return sortedRows;
}
Loading
Loading