@@ -275,6 +275,38 @@ def scanpy_cellanno_from_dict(adata:anndata.AnnData,
275
275
adata .obs [anno_name + '_celltype' ] = adata .obs [clustertype ].map (anno_dict ).astype ('category' )
276
276
print ('...cell type added to {}_celltype on obs of anndata' .format (anno_name ))
277
277
278
+ def get_celltype_marker (adata :anndata .AnnData ,
279
+ clustertype :str = 'leiden' ,
280
+ log2fc_min :int = 2 ,scores_type = 'scores' ,
281
+ pval_cutoff :float = 0.05 ,rank :bool = False )-> dict :
282
+ r"""Get marker genes for each clusters.
283
+
284
+ Arguments:
285
+ adata: anndata object
286
+ clustertype: Clustering name used in scanpy. (leiden)
287
+ log2fc_min: Minimum log2 fold change of marker genes. (2)
288
+ pval_cutoff: Maximum p value of marker genes. (0.05)
289
+ rank: Whether to rank genes by wilcoxon test. (True)
290
+ scores_type: The type of scores. can be selected from `scores` and `logfoldchanges`
291
+
292
+ Returns:
293
+ cellmarker: A dictionary of marker genes for each clusters.
294
+ """
295
+ print ('...get cell type marker' )
296
+ celltypes = sorted (adata .obs [clustertype ].unique ())
297
+ cell_marker_dict = {}
298
+ if rank == False :
299
+ sc .tl .rank_genes_groups (adata , clustertype , method = 'wilcoxon' )
300
+ for celltype in celltypes :
301
+ degs = sc .get .rank_genes_groups_df (adata , group = celltype , key = 'rank_genes_groups' , log2fc_min = log2fc_min ,
302
+ pval_cutoff = pval_cutoff )
303
+ foldp = np .histogram (degs [scores_type ])
304
+ foldchange = (foldp [1 ][np .where (foldp [1 ]> 0 )[0 ][- 5 ]]+ foldp [1 ][np .where (foldp [1 ]> 0 )[0 ][- 6 ]])/ 2
305
+
306
+ cellmarker = degs .loc [degs [scores_type ]> foldchange ]['names' ].values
307
+ cell_marker_dict [celltype ]= cellmarker
308
+
309
+ return cell_marker_dict
278
310
279
311
class pySCSA (object ):
280
312
@@ -433,7 +465,7 @@ def cell_auto_anno(self,adata:anndata.AnnData,clustertype:str='leiden')->None:
433
465
434
466
def get_celltype_marker (self ,adata :anndata .AnnData ,
435
467
clustertype :str = 'leiden' ,
436
- log2fc_min :int = 2 ,
468
+ log2fc_min :int = 2 ,scores_type = 'scores' ,
437
469
pval_cutoff :float = 0.05 ,rank :bool = True )-> dict :
438
470
r"""Get marker genes for each clusters.
439
471
@@ -443,22 +475,15 @@ def get_celltype_marker(self,adata:anndata.AnnData,
443
475
log2fc_min: Minimum log2 fold change of marker genes. (2)
444
476
pval_cutoff: Maximum p value of marker genes. (0.05)
445
477
rank: Whether to rank genes by wilcoxon test. (True)
478
+ scores_type: The type of scores. can be selected from `scores` and `logfoldchanges`
446
479
447
480
Returns:
448
481
cellmarker: A dictionary of marker genes for each clusters.
449
482
"""
450
483
print ('...get cell type marker' )
451
- celltypes = sorted (adata .obs [clustertype ].unique ())
452
- cell_marker_dict = {}
453
- if rank == False and 'rank_genes_groups' in adata .uns .keys ():
454
- sc .tl .rank_genes_groups (adata , clustertype , method = 'wilcoxon' )
455
- for celltype in celltypes :
456
- degs = sc .get .rank_genes_groups_df (adata , group = celltype , key = 'rank_genes_groups' , log2fc_min = log2fc_min ,
457
- pval_cutoff = pval_cutoff )
458
- foldp = np .histogram (degs ['scores' ])
459
- foldchange = (foldp [1 ][np .where (foldp [1 ]> 0 )[0 ][- 5 ]]+ foldp [1 ][np .where (foldp [1 ]> 0 )[0 ][- 6 ]])/ 2
460
-
461
- cellmarker = degs .loc [degs ['scores' ]> foldchange ]['names' ].values
462
- cell_marker_dict [celltype ]= cellmarker
484
+ cell_marker_dict = get_celltype_marker (adata = adata ,
485
+ clustertype = clustertype ,
486
+ log2fc_min = log2fc_min ,scores_type = scores_type ,
487
+ pval_cutoff = pval_cutoff ,rank = rank )
463
488
464
489
return cell_marker_dict
0 commit comments