Merge branch 'main' into 944-longitudinal-normalization

Zethson · web-flow · commit e8808a74b08e · 2025-10-22T11:57:29.000+02:00
diff --git a/.github/labels.yml b/.github/labels.yml
@@ -1,9 +1,4 @@
 ---
-# Labels names are important as they are used by Release Drafter to decide
-# regarding where to record them in changelog or if to skip them.
-#
-# The repository labels will be automatically configured using this file and
-# the GitHub Action https://github.com/marketplace/actions/github-labeler.
 - name: breaking
   description: Breaking Changes
   color: bfd4f2
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,11 +7,11 @@ default_stages:
 minimum_pre_commit_version: 2.16.0
 repos:
     - repo: https://github.com/biomejs/pre-commit
-      rev: v2.2.5
+      rev: v2.2.6
       hooks:
           - id: biome-format
     - repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.14.0
+      rev: v0.14.1
       hooks:
           - id: ruff-check
             args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
diff --git a/ehrapy/preprocessing/_scanpy_pp_api.py b/ehrapy/preprocessing/_scanpy_pp_api.py
@@ -70,25 +70,22 @@ def pca(
         chunk_size: Number of observations to include in each chunk. Required if `chunked=True` was passed.
 
     Returns:
-        :X_pca: :class:`~scipy.sparse.spmatrix`, :class:`~numpy.ndarray`
-
-        If `data` is array-like and `return_info=False` was passed, this function only returns `X_pca`...
-
-        edata : :class:`~ehrdata.EHRData` or :class:`~anndata.AnnData`
-
-        …otherwise if `copy=True` it returns or else adds fields to `edata`:
-
-        `.obsm['X_pca']`
-        PCA representation of data.
-
-        `.varm['PCs']`
-        The principal components containing the loadings.
-
-        `.uns['pca']['variance_ratio']`
-        Ratio of explained variance.
-
-        `.uns['pca']['variance']`
-        Explained variance, equivalent to the eigenvalues of the covariance matrix.
+        If `data` is array-like and `return_info=False` was passed,
+        this function returns the PCA representation of `data` as an
+        array of the same type as the input array.
+
+        Otherwise, it returns `None` if `copy=False`, else an updated `AnnData` object.
+        Sets the following fields:
+
+        `.obsm['X_pca' | key_added]` : :class:`~scipy.sparse.csr_matrix` | :class:`~scipy.sparse.csc_matrix` | :class:`~numpy.ndarray` (shape `(adata.n_obs, n_comps)`)
+            PCA representation of data.
+        `.varm['PCs' | key_added]` : :class:`~numpy.ndarray` (shape `(adata.n_vars, n_comps)`)
+            The principal components containing the loadings.
+        `.uns['pca' | key_added]['variance_ratio']` : :class:`~numpy.ndarray` (shape `(n_comps,)`)
+            Ratio of explained variance.
+        `.uns['pca' | key_added]['variance']` : :class:`~numpy.ndarray` (shape `(n_comps,)`)
+            Explained variance, equivalent to the eigenvalues of the
+            covariance matrix.
     """
     return sc.pp.pca(
         data=data,
diff --git a/ehrapy/tools/_scanpy_tl_api.py b/ehrapy/tools/_scanpy_tl_api.py
@@ -604,17 +604,15 @@ def paga(
 ) -> EHRData | AnnData | None:  # pragma: no cover
     """Mapping out the coarse-grained connectivity structures of complex manifolds :cite:p:`Wolf2019`.
 
-    By quantifying the connectivity of partitions (groups, clusters),
-    partition-based graph abstraction (PAGA) generates a much
+    By quantifying the connectivity of partitions (groups, clusters), partition-based graph abstraction (PAGA) generates a much
     simpler abstracted graph (*PAGA graph*) of partitions, in which edge weights
     represent confidence in the presence of connections. By tresholding this
     confidence in :func:`~ehrapy.plot.paga`, a much simpler representation of the
     manifold data is obtained, which is nonetheless faithful to the topology of the manifold.
     The confidence should be interpreted as the ratio of the actual versus the
-    expected value of connections under the null model of randomly connecting
-    partitions. We do not provide a p-value as this null model does not
-    precisely capture what one would consider "connected" in real data, hence it
-    strongly overestimates the expected value. See an extensive discussion of this in :cite:p:`Wolf2019`.
+    expected value of connections under the null model of randomly connecting partitions.
+    We do not provide a p-value as this null model does not precisely capture what one would consider "connected" in real data, hence it strongly overestimates the expected value.
+    See an extensive discussion of this in :cite:p:`Wolf2019`.
 
     .. note::
         Note that you can use the result of :func:`~ehrapy.plot.paga` in
@@ -675,11 +673,9 @@ def ingest(
     The function uses a knn classifier for mapping labels and the UMAP package :cite:p:`McInnes2018` for mapping the embeddings.
 
     .. note::
-        We refer to this *asymmetric* dataset integration as *ingesting*
-        annotations from reference data to new data. This is different from
-        learning a joint representation that integrates both datasets in an
-        unbiased way, as CCA (e.g. in Seurat) or a conditional VAE (e.g. in
-        scVI) would do.
+        We refer to this *asymmetric* dataset integration as *ingesting* annotations from reference data to new data.
+        This is different from learning a joint representation that integrates both datasets in an
+        unbiased way, as CCA (e.g. in Seurat) or a conditional VAE (e.g. in scVI) would do.
 
     You need to run :func:`~ehrapy.preprocessing.neighbors` on `edata_ref` before passing it.