Skip to content

Commit d52ab60

Browse files
committed
feat: exclude rows without browser urls from catalog input (#17)
1 parent 06b3181 commit d52ab60

File tree

5 files changed

+36
-2776
lines changed

5 files changed

+36
-2776
lines changed

data-catalog/app/apis/catalog/brc-analytics-catalog/common/entities.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ export interface BRCDataCatalogGenome {
66
species: string;
77
strain: string;
88
supercontigs: number;
9-
ucscBrowserUrl: string | null;
9+
ucscBrowserUrl: string;
1010
vEuPathDbProject: string;
1111
}

data-catalog/files/build-catalog.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
2727
species: row.Species,
2828
strain: row.Strain,
2929
supercontigs: parseNumber(row.Supercontigs),
30-
ucscBrowserUrl: parseStringOrNull(row.ucscBrowser),
30+
ucscBrowserUrl: row.ucscBrowser,
3131
vEuPathDbProject: row["VEuPathDB Project"],
3232
})
3333
);
@@ -52,10 +52,6 @@ async function saveJson(filePath: string, data: unknown): Promise<void> {
5252
await fsp.writeFile(filePath, JSON.stringify(data, undefined, 2) + "\n");
5353
}
5454

55-
function parseStringOrNull(value: string): string | null {
56-
return value || null;
57-
}
58-
5955
function parseNumber(value: string): number {
6056
value = value.trim();
6157
const n = Number(value);

data-catalog/files/build-genomes-files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def build_genomes_files():
1616
gen_bank_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="Genome Version/Assembly ID", right_on="genBank")
1717
ref_seq_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="Genome Version/Assembly ID", right_on="refSeq")
1818

19-
result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df)
19+
result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df).dropna(subset=["ucscBrowser"])
2020

2121
result_df.to_csv(OUTPUT_PATH, index=False, sep="\t")
2222

0 commit comments

Comments
 (0)