Loading...
Loading...
Guide Claude through SCSA, MetaTiME, CellVote, CellMatch, GPTAnno, and weighted KNN transfer workflows for annotating single-cell modalities.
npx skill4agent add starlitnightly/omicverse single-cell-annotation-skills-with-omicverset_cellanno.ipynbt_metatime.ipynbt_cellvote.mdt_cellvote_pbmc3k.ipynbt_cellmatch.ipynbt_gptanno.ipynbt_anno_trans.ipynbpbmc3k_filtered_gene_bc_matrices.tar.gzsample/rna.h5addata/filtered_gene_bc_matrices/hg19/pySCSA_2024_v1_plus.dbmodel_pathsc.read_10x_mtxov.pp.qcov.pp.preprocessov.pp.scaleov.pp.pcasc.tl.rank_genes_groupsscsa = ov.single.pySCSA(...)target='cellmarker''panglaodb'foldchangepvaluescsa.cell_anno(clustertype='leiden', result_key='scsa_celltype_cellmarker')scsa.cell_auto_annoadata.obsov.utils.embeddingsc.pl.dotplotov.single.get_celltype_markerscsa.get_model_tissue()ov.utils.roeov.utils.plot_cellproportionTiME_adata_scvi.h5adhttps://figshare.com/ndownloader/files/41440050scvi.model.SCVIadata.obsm['X_scVI']adata.obs['isTME']sc.pp.neighbors(adata, use_rep="X_scVI")adata.obsm['X_mde'] = ov.utils.mde(...)TiME_object = ov.single.MetaTiME(adata, mode='table')TiME_object.overcluster(resolution=8, clustercol='overcluster')TiME_object.predictTiME(save_obs_name='MetaTiME')Major_MetaTiMETiME_object.plotsc.pl.embeddingCELLVOTE_PBMC3Kdata/pbmc3k.h5adscsa_annotationgpt_celltypegbi_celltypesc.tl.rank_genes_groupsmarker_dict = top_markers_from_rgg(adata, 'leiden', topn=10)ov.single.get_celltype_markercv = ov.single.CellVote(adata)cv.vote(clusters_key='leiden', cluster_markers=marker_dict, celltype_keys=[...], species='human', organization='PBMC', provider='openai', model='gpt-4o-mini')adata.obs['CellVote_celltype']adata.obs[['leiden', 'scsa_annotation', ...]]pertpy.dt.haber_2017_regions()adata.obs['cell_label']cl.jsonov.single.download_cl(...)Cell_Taxonomy_resource.txtsentence-transformers/all-MiniLM-L6-v2BAAI/bge-base-en-v1.5local_model_dirov.single.CellOntologyMapper(cl_obo_file='new_ontology/cl.json', model_name='sentence-transformers/all-MiniLM-L6-v2', local_model_dir='./my_models')mapper.map_adata(...)use_taxonomy=Trueload_cell_taxonomy_resourcemapper.print_mapping_summary_taxonomycell_ontologycell_ontology_cl_idenhanced_cell_ontologymapper.find_similar_cells('T helper cell')mapper.get_cell_info(...).h5adAGI_API_KEYprovider='openai''qwen''kimi'ov.single.gptcelltype_localov.single.get_celltype_marker(adata, clustertype='leiden', rank=True, key='rank_genes_groups', foldchange=2, topgenenumber=5)ov.single.gptcelltype(...)result[key].split(': ')[-1]...adata.obs['gpt_celltype']ov.pl.embedding(..., color=['leiden','gpt_celltype'])ov.single.gptcelltype_localdata/analysis_lymph/rna-emb.h5addata/analysis_lymph/atac-emb.h5adobsm['X_glue']ov.utils.mdeov.utils.weighted_knn_trainer(train_adata=rna, train_adata_emb='X_glue', n_neighbors=15)labels, uncert = ov.utils.weighted_knn_transfer(query_adata=atac, query_adata_emb='X_glue', label_keys='major_celltype', knn_model=knn_transformer, ref_adata_obs=rna.obs)atac.obs['transf_celltype']atac.obs['transf_celltype_unc']major_celltypeov.utils.embeddingclustertypecluster# Step 1: Initialize pySCSA
scsa = ov.single.pySCSA(
adata,
foldchange=1.5,
pvalue=0.01,
species='Human',
tissue='All',
target='cellmarker' # or 'panglaodb'
)
# Step 2: Run annotation - NOTE: use clustertype='leiden', NOT cluster='leiden'!
anno_result = scsa.cell_anno(clustertype='leiden', cluster='all')
# Step 3: Add cell type labels to adata.obs
scsa.cell_auto_anno(adata, clustertype='leiden', key='scsa_celltype')
# Results are stored in adata.obs['scsa_celltype']# WRONG! 'cluster' is NOT a valid parameter for cell_auto_anno!
# scsa.cell_auto_anno(adata, cluster='leiden') # ERROR!# Step 1: Run COSG marker gene identification
ov.single.cosg(adata, groupby='leiden', n_genes_user=50)
# Step 2: Access results from adata.uns (NOT adata.obs!)
marker_names = adata.uns['rank_genes_groups']['names'] # DataFrame with cluster columns
marker_scores = adata.uns['rank_genes_groups']['scores']
# Step 3: Get top markers for specific cluster
cluster_0_markers = adata.uns['rank_genes_groups']['names']['0'][:10].tolist()
# Step 4: To create celltype column, manually map clusters to cell types
cluster_to_celltype = {
'0': 'T cells',
'1': 'B cells',
'2': 'Monocytes',
}
adata.obs['cosg_celltype'] = adata.obs['leiden'].map(cluster_to_celltype)# WRONG! COSG does NOT create adata.obs columns directly!
# adata.obs['cosg_celltype'] # This key does NOT exist after running COSG!
# adata.uns['cosg_celltype'] # This key also does NOT exist!clustertypeclusteradata.uns['rank_genes_groups']adata.obs['<key>']adata.uns['<key>']adata.uns['rank_genes_groups']haber_2017_regionsmajor_celltypet_cellanno.ipynbt_metatime.ipynbt_cellvote.mdt_cellvote_pbmc3k.ipynbt_cellmatch.ipynbt_gptanno.ipynbt_anno_trans.ipynbTiME_adata_scvi.h5addata/analysis_lymph/cl.jsonreference.md