galaxyproject
diff --git a/‎.github/workflows/run-checks.yml
Lines changed: 34 additions & 8 deletions b/‎.github/workflows/run-checks.yml
Lines changed: 34 additions & 8 deletions
diff --git a/‎.linkmllint.yaml
Lines changed: 4 additions & 0 deletions b/‎.linkmllint.yaml
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 10 additions & 0 deletions b/‎README.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎app/apis/catalog/brc-analytics-catalog/common/entities.ts
Lines changed: 5 additions & 3 deletions b/‎app/apis/catalog/brc-analytics-catalog/common/entities.ts
Lines changed: 5 additions & 3 deletions
diff --git a/‎app/components/Entity/components/AnalysisMethodsCatalog/analysisMethodsCatalog.tsx
Lines changed: 1 addition & 1 deletion b/‎app/components/Entity/components/AnalysisMethodsCatalog/analysisMethodsCatalog.tsx
Lines changed: 1 addition & 1 deletion
diff --git a/‎catalog/README.md
Lines changed: 1 addition & 0 deletions b/‎catalog/README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎catalog/build/py/requirements.txt
Lines changed: 65 additions & 0 deletions b/‎catalog/build/py/requirements.txt
Lines changed: 65 additions & 0 deletions
diff --git a/‎catalog/build/ts/build-catalog.ts
Lines changed: 13 additions & 11 deletions b/‎catalog/build/ts/build-catalog.ts
Lines changed: 13 additions & 11 deletions
diff --git a/‎catalog/build/ts/entities.ts
Lines changed: 2 additions & 2 deletions b/‎catalog/build/ts/entities.ts
Lines changed: 2 additions & 2 deletions
diff --git a/‎catalog/output/workflows.json
Lines changed: 9 additions & 9 deletions b/‎catalog/output/workflows.json
Lines changed: 9 additions & 9 deletions
@@ -2,15 +2,41 @@ name: Run checks
 on: [pull_request]
 
 jobs:
-  build:
+  run-checks:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
-      - uses: actions/setup-node@v2
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
         with:
           node-version: "20.10.0"
-      - run: |
-          npm ci
-          npm run check-format
-          npm run lint
-          npx tsc --noEmit
+      - name: Cache npm cache
+        uses: actions/cache@v4
+        with:
+          path: ~/.npm
+          key: ${{ runner.os }}-node-${{ hashFiles('package-lock.json') }}
+      - name: Install dependencies
+        run: npm ci
+      - name: Run Prettier
+        run: npm run check-format
+      - name: Run Linter (ESLint)
+        run: npm run lint
+      - name: Type Check
+        run: npx tsc --noEmit
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12.4"
+          cache: "pip"
+          cache-dependency-path: "./catalog/build/py/requirements.txt"
+      - name: Install Python dependencies
+        run: pip install -r ./catalog/build/py/requirements.txt
+      - name: Run linkml-lint
+        # Run linting on the LinkML schemas, to enforce conventions such as in naming, and to catch simple errors.
+        run: linkml-lint ./catalog/schema --validate --verbose
+      - name: Test LinkML Python generation
+        # Generate Python code from the main LinkML schemas, discarding the output; this will catch more subtle errors such as references to nonexistent elements.
+        run: npm run test-gen-python
+      - name: Validate catalog files
+        # Validate the catalog source files against their corresponding LinkML schemas.
+        run: npm run validate-catalog
@@ -0,0 +1,4 @@
+extends: recommended
+rules:
+  standard_naming:
+    permissible_values_upper_case: true
@@ -16,6 +16,8 @@ Run `npm run build:local` to build. The built app can be run using `npm start`,
 
 ## Building the data source files
 
+Using Python version 3.12.4 is recommended.
+
 Create a Python virtual environment and install requirements:
 
 ```shell
@@ -55,3 +57,11 @@ the `# Anopheles gambiae` comment:
 ```yaml
 - accession: XXX_000000000.0
 ```
+
+## Overview of automated checks on catalog content
+
+The `run-checks` GitHub workflow performs checks to ensure that the catalog data and schemas are well-formed; this is done by:
+
+- Linting the schemas via `linkml-lint`.
+- Converting the schemas to Python, to catch any errors that occur.
+- Validating the catalog source files against their corresponding schemas.
@@ -44,9 +44,9 @@ export interface EntitiesResponsePagination {
 }
 
 export interface WorkflowCategory {
+  category: string;
   description: string;
   name: string;
-  type: string;
   workflows: Workflow[];
 }
 
@@ -58,6 +58,8 @@ export interface Workflow {
 }
 
 export enum WORKFLOW_PLOIDY {
-  ANY = "any",
-  HAPLOID = "haploid",
+  ANY = "ANY",
+  DIPLOID = "DIPLOID",
+  HAPLOID = "HAPLOID",
+  POLYPLOID = "POLYPLOID",
 }
@@ -21,7 +21,7 @@ export const AnalysisMethodsCatalog = ({
         );
         return (
           <AnalysisMethod
-            key={workflowCategory.type}
+            key={workflowCategory.category}
             geneModelUrl={geneModelUrl}
             genomeVersionAssemblyId={genomeVersionAssemblyId}
             content={
 
@@ -7,4 +7,5 @@ This directory provides the catalog data (information on genome assemblies, orga
   - `py` - Python scripts.
   - `ts` - Typescript scripts.
 - `output` - JSON files output by the catalog build process, to be consumed by the app.
+- `schema` - LinkML schemas for source files.
 - `source` - YAML files providing data used as input for building the catalog.
@@ -1,12 +1,77 @@
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+arrow==1.3.0
+attrs==25.1.0
 certifi==2024.8.30
+CFGraph==0.2.1
+chardet==5.2.0
 charset-normalizer==3.3.2
+click==8.1.8
+curies==0.10.4
+Deprecated==1.2.18
+et_xmlfile==2.0.0
+fqdn==1.5.1
+graphviz==0.20.3
+hbreader==0.9.1
 idna==3.8
+iniconfig==2.0.0
+isodate==0.7.2
+isoduration==20.11.0
+Jinja2==3.1.5
+json-flattener==0.1.9
+jsonasobj==1.3.1
+jsonasobj2==1.0.4
+jsonpatch==1.33
+jsonpath-ng==1.7.0
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+linkml==1.8.6
+linkml-dataops==0.1.0
+linkml-runtime==1.8.3
+MarkupSafe==3.0.2
 numpy==2.1.0
+openpyxl==3.1.5
+packaging==24.2
 pandas==2.2.2
+parse==1.20.2
+pluggy==1.5.0
+ply==3.11
+prefixcommons==0.1.12
+prefixmaps==0.2.6
+pydantic==2.10.6
+pydantic_core==2.27.2
+PyJSG==0.11.10
+pyparsing==3.2.1
+PyShEx==0.8.1
+PyShExC==0.9.1
+pytest==8.3.4
+pytest-logging==2015.11.4
 python-dateutil==2.9.0.post0
+PyTrie==0.4.0
 pytz==2024.1
 PyYAML==6.0.2
+rdflib==7.1.3
+rdflib-jsonld==0.6.1
+rdflib-shim==1.0.3
+referencing==0.36.2
 requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3987==1.3.8
+rpds-py==0.22.3
+ruamel.yaml==0.18.10
+ruamel.yaml.clib==0.2.12
+ShExJSG==0.8.2
 six==1.16.0
+sortedcontainers==2.4.0
+sparqlslurper==0.5.1
+SPARQLWrapper==2.0.0
+SQLAlchemy==2.0.38
+types-python-dateutil==2.9.0.20241206
+typing_extensions==4.12.2
 tzdata==2024.1
+uri-template==1.3.0
 urllib3==2.2.2
+watchdog==6.0.0
+webcolors==24.11.1
+wrapt==1.17.2
@@ -105,10 +105,10 @@ async function buildWorkflows(): Promise<WorkflowCategory[]> {
 
   const workflowCategories: WorkflowCategory[] =
     sourceWorkflowCategories.workflow_categories.map(
-      ({ description, name, type }) => ({
+      ({ category, description, name }) => ({
+        category,
         description,
         name,
-        type,
         workflows: [],
       })
     );
@@ -123,21 +123,23 @@ async function buildWorkflows(): Promise<WorkflowCategory[]> {
 function buildWorkflow(
   workflowCategories: WorkflowCategory[],
   {
+    categories,
     ploidy,
     trs_id: trsId,
-    type,
     workflow_description: workflowDescription,
     workflow_name: workflowName,
   }: SourceWorkflow
 ): void {
-  const category = workflowCategories.find((c) => c.type === type);
-  if (!category) throw new Error(`Unknown workflow category: ${type}`);
-  category.workflows.push({
-    ploidy,
-    trsId,
-    workflowDescription,
-    workflowName,
-  });
+  for (const category of categories) {
+    const workflowCategory = workflowCategories.find((c) => c.category === category);
+    if (!workflowCategory) throw new Error(`Unknown workflow category: ${category}`);
+    workflowCategory.workflows.push({
+      ploidy,
+      trsId,
+      workflowDescription,
+      workflowName,
+    });
+  }
 }
 
 async function readValuesFile<T>(
 
@@ -23,9 +23,9 @@ export interface SourceGenome {
 
 export interface SourceWorkflowCategories {
   workflow_categories: {
+    category: string;
     description: string;
     name: string;
-    type: string;
   }[];
 }
 
@@ -34,9 +34,9 @@ export interface SourceWorkflows {
 }
 
 export interface SourceWorkflow {
+  categories: string[];
   ploidy: WORKFLOW_PLOIDY;
   trs_id: string;
-  type: string;
   workflow_description: string;
   workflow_name: string;
 }
@@ -1,59 +1,59 @@
 [
   {
+    "category": "VARIANT_CALLING",
     "description": "Identify nucleotide polymorphisms and short indels from Illumina and Element data.",
     "name": "Variant calling",
-    "type": "VARIANT_CALLING",
     "workflows": [
       {
-        "ploidy": "haploid",
+        "ploidy": "HAPLOID",
         "trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/haploid-variant-calling-wgs-pe/main/versions/v0.1",
         "workflowDescription": "Workflow for variant analysis against a reference genome in GenBank format",
         "workflowName": "Paired end variant calling in haploid system"
       }
     ]
   },
   {
+    "category": "TRANSCRIPTOMICS",
     "description": "Analyze bulk or single-cell RNA seq data using a variety of approaches.",
     "name": "Transcriptomics",
-    "type": "TRANSCRIPTOMICS",
     "workflows": [
       {
-        "ploidy": "any",
+        "ploidy": "ANY",
         "trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/rnaseq-pe/main/versions/v0.9",
         "workflowDescription": "This workflow takes as input a list of paired-end fastqs. Adapters and bad quality bases are removed with cutadapt. Reads are mapped with STAR with ENCODE parameters and genes are counted simultaneously as well as normalized coverage (per million mapped reads) on uniquely mapped reads. The counts are reprocessed to be similar to HTSeq-count output. FPKM are computed with cufflinks and/or with StringTie. The unstranded normalized coverage is computed with bedtools.",
         "workflowName": "RNAseq_PE"
       }
     ]
   },
   {
+    "category": "REGULATION",
     "description": "Workflows for the analysis of ChIP-seq, ATAC-Seq, and beyond.",
     "name": "Regulation",
-    "type": "REGULATION",
     "workflows": [
       {
-        "ploidy": "any",
+        "ploidy": "ANY",
         "trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/chipseq-pe/main/versions/v0.12",
         "workflowDescription": "This workflow takes as input a collection of paired fastqs. Remove adapters with cutadapt, map pairs with bowtie2. Keep MAPQ30 and concordant pairs. MACS2 for paired bam.",
         "workflowName": "ChIPseq_PE"
       }
     ]
   },
   {
+    "category": "ASSEMBLY",
     "description": "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies.",
     "name": "Assembly",
-    "type": "ASSEMBLY",
     "workflows": []
   },
   {
+    "category": "GENOME_COMPARISONS",
     "description": "Workflows for creation of pairwise and multiple genome alignments.",
     "name": "Genome comparisons",
-    "type": "GENOME_COMPARISONS",
     "workflows": []
   },
   {
+    "category": "PROTEIN_FOLDING",
     "description": "Analysis of protein folding using the ColabFold framework.",
     "name": "Protein folding",
-    "type": "PROTEIN_FOLDING",
     "workflows": []
   }
 ]
Original file line number	Diff line number	Diff line change
`@@ -44,9 +44,9 @@ export interface EntitiesResponsePagination {`
`44`	`44`	`}`
`45`	`45`
`46`	`46`	`export interface WorkflowCategory {`
	`47`	`+ category: string;`
`47`	`48`	`description: string;`
`48`	`49`	`name: string;`
`49`		`- type: string;`
`50`	`50`	`workflows: Workflow[];`
`51`	`51`	`}`
`52`	`52`
`@@ -58,6 +58,8 @@ export interface Workflow {`
`58`	`58`	`}`
`59`	`59`
`60`	`60`	`export enum WORKFLOW_PLOIDY {`
`61`		`- ANY = "any",`
`62`		`- HAPLOID = "haploid",`
	`61`	`+ ANY = "ANY",`
	`62`	`+ DIPLOID = "DIPLOID",`
	`63`	`+ HAPLOID = "HAPLOID",`
	`64`	`+ POLYPLOID = "POLYPLOID",`
`63`	`65`	`}`
Original file line number	Diff line number	Diff line change
`@@ -23,9 +23,9 @@ export interface SourceGenome {`
`23`	`23`
`24`	`24`	`export interface SourceWorkflowCategories {`
`25`	`25`	`workflow_categories: {`
	`26`	`+ category: string;`
`26`	`27`	`description: string;`
`27`	`28`	`name: string;`
`28`		`- type: string;`
`29`	`29`	`}[];`
`30`	`30`	`}`
`31`	`31`
`@@ -34,9 +34,9 @@ export interface SourceWorkflows {`
`34`	`34`	`}`
`35`	`35`
`36`	`36`	`export interface SourceWorkflow {`
	`37`	`+ categories: string[];`
`37`	`38`	`ploidy: WORKFLOW_PLOIDY;`
`38`	`39`	`trs_id: string;`
`39`		`- type: string;`
`40`	`40`	`workflow_description: string;`
`41`	`41`	`workflow_name: string;`
`42`	`42`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,59 +1,59 @@`
`1`	`1`	`[`
`2`	`2`	`{`
	`3`	`+ "category": "VARIANT_CALLING",`
`3`	`4`	`"description": "Identify nucleotide polymorphisms and short indels from Illumina and Element data.",`
`4`	`5`	`"name": "Variant calling",`
`5`		`- "type": "VARIANT_CALLING",`
`6`	`6`	`"workflows": [`
`7`	`7`	`{`
`8`		`- "ploidy": "haploid",`
	`8`	`+ "ploidy": "HAPLOID",`
`9`	`9`	`"trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/haploid-variant-calling-wgs-pe/main/versions/v0.1",`
`10`	`10`	`"workflowDescription": "Workflow for variant analysis against a reference genome in GenBank format",`
`11`	`11`	`"workflowName": "Paired end variant calling in haploid system"`
`12`	`12`	`}`
`13`	`13`	`]`
`14`	`14`	`},`
`15`	`15`	`{`
	`16`	`+ "category": "TRANSCRIPTOMICS",`
`16`	`17`	`"description": "Analyze bulk or single-cell RNA seq data using a variety of approaches.",`
`17`	`18`	`"name": "Transcriptomics",`
`18`		`- "type": "TRANSCRIPTOMICS",`
`19`	`19`	`"workflows": [`
`20`	`20`	`{`
`21`		`- "ploidy": "any",`
	`21`	`+ "ploidy": "ANY",`
`22`	`22`	`"trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/rnaseq-pe/main/versions/v0.9",`
`23`	`23`	`"workflowDescription": "This workflow takes as input a list of paired-end fastqs. Adapters and bad quality bases are removed with cutadapt. Reads are mapped with STAR with ENCODE parameters and genes are counted simultaneously as well as normalized coverage (per million mapped reads) on uniquely mapped reads. The counts are reprocessed to be similar to HTSeq-count output. FPKM are computed with cufflinks and/or with StringTie. The unstranded normalized coverage is computed with bedtools.",`
`24`	`24`	`"workflowName": "RNAseq_PE"`
`25`	`25`	`}`
`26`	`26`	`]`
`27`	`27`	`},`
`28`	`28`	`{`
	`29`	`+ "category": "REGULATION",`
`29`	`30`	`"description": "Workflows for the analysis of ChIP-seq, ATAC-Seq, and beyond.",`
`30`	`31`	`"name": "Regulation",`
`31`		`- "type": "REGULATION",`
`32`	`32`	`"workflows": [`
`33`	`33`	`{`
`34`		`- "ploidy": "any",`
	`34`	`+ "ploidy": "ANY",`
`35`	`35`	`"trsId": "https://dockstore.org/api/ga4gh/trs/v2/tools/#workflow/github.com/iwc-workflows/chipseq-pe/main/versions/v0.12",`
`36`	`36`	`"workflowDescription": "This workflow takes as input a collection of paired fastqs. Remove adapters with cutadapt, map pairs with bowtie2. Keep MAPQ30 and concordant pairs. MACS2 for paired bam.",`
`37`	`37`	`"workflowName": "ChIPseq_PE"`
`38`	`38`	`}`
`39`	`39`	`]`
`40`	`40`	`},`
`41`	`41`	`{`
	`42`	`+ "category": "ASSEMBLY",`
`42`	`43`	`"description": "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies.",`
`43`	`44`	`"name": "Assembly",`
`44`		`- "type": "ASSEMBLY",`
`45`	`45`	`"workflows": []`
`46`	`46`	`},`
`47`	`47`	`{`
	`48`	`+ "category": "GENOME_COMPARISONS",`
`48`	`49`	`"description": "Workflows for creation of pairwise and multiple genome alignments.",`
`49`	`50`	`"name": "Genome comparisons",`
`50`		`- "type": "GENOME_COMPARISONS",`
`51`	`51`	`"workflows": []`
`52`	`52`	`},`
`53`	`53`	`{`
	`54`	`+ "category": "PROTEIN_FOLDING",`
`54`	`55`	`"description": "Analysis of protein folding using the ColabFold framework.",`
`55`	`56`	`"name": "Protein folding",`
`56`		`- "type": "PROTEIN_FOLDING",`
`57`	`57`	`"workflows": []`
`58`	`58`	`}`
`59`	`59`	`]`