Python implementation of simspec (for RSS/CSS). The paper about the method detailed is published in Genome Biology in 2020. The original R implementation is available at GitHub.
First, clone the codebase to your local environment
git clone https://github.com/quadbio/pysimspec.gitNext, install the package with pip
cd pysimspec
pip install .Just to mention, this project uses uv for fast Python package management.
uv venv
uv pip install -e '.[dev]'import scanpy as sc
import anndata
from pysimspec import Simspec, set_log_level, load
# Set up logging
set_log_level("INFO")
# Load and concatenate data
adata_DS1 = sc.read_h5ad('DS1_raw.h5ad')
adata_DS2 = sc.read_h5ad('DS2_raw.h5ad')
adata_DS1.obs['batch'] = 'DS1'
adata_DS2.obs['batch'] = 'DS2'
adata = anndata.concat([adata_DS1, adata_DS2], join='inner', keys=['DS1','DS2'], index_unique="_")
# Data preprocessing
adata.layers['counts'] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, flavor='seurat_v3', layer='counts', n_top_genes=3000, batch_key='batch')
sc.pp.pca(adata, n_comps=20, mask_var='highly_variable')
# Run CSS
simspec = Simspec()
simspec.compute_references(adata, batch = 'batch', use_rep = 'X_pca')
simspec.compute_simspec(adata)
simspec.compute_PCA(n_pcs = 10)
adata.obsm['X_css'] = simspec.get_result()
adata.obsm['X_csspca'] = simspec.get_transformed_result()
# Use CSS representation for followup analysis
sc.pp.neighbors(adata, use_rep='X_css')
sc.tl.umap(adata)
sc.pl.umap(adata, color='batch')
# Save the Simspec object
simspec.save('simspec.pkl')
# Calculate projected CSS representation for the new data
adata_DS3 = sc.read_h5ad('DS3_raw.h5ad')
adata_DS3.layers['counts'] = adata_DS3.X.copy()
sc.pp.normalize_total(adata_DS3, target_sum=1e4)
sc.pp.log1p(adata_DS3)
simspec = load('simspec.pkl') # load the saved Simspec object
simspec.compute_simspec(adata_DS3)
adata_DS3.obsm['X_css_proj'] = simspec.get_result()
adata_DS3.obsm['X_csspca_proj'] = simspec.get_transformed_result()MIT