Skip to content

Commit f34ec2f

Browse files
authored
Refine comments and add optimization notes in notebook
1 parent c33bad6 commit f34ec2f

1 file changed

Lines changed: 6 additions & 1 deletion

File tree

notebooks/scRNAseq_in_Python.ipynb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1902,9 +1902,10 @@
19021902
"\n",
19031903
"if _gseapy_ok and _integration_ok:\n",
19041904
" # ── Run one-vs-rest Wilcoxon marker analysis on Harmony clusters ──────────\n",
1905-
" _cluster_key = 'leiden_harmony_r0.5' # adjust to your chosen resolution from §16\n",
1905+
" _cluster_key = 'leiden_harmony_r0.5' # adjust to your chosen resolution\n",
19061906
" _mk_key = f'rank_genes_{_cluster_key}'\n",
19071907
"\n",
1908+
" # Optimization: Only run the computationally heavy Wilcoxon test if the results don't already exist in the AnnData object\n",
19081909
" if _mk_key not in adata_int.uns:\n",
19091910
" print(f\"Running Wilcoxon one-vs-rest for {_cluster_key}...\")\n",
19101911
" sc.tl.rank_genes_groups(\n",
@@ -1942,6 +1943,7 @@
19421943
" _ora_padj_cutoff = 0.05\n",
19431944
" _ora_logfc_cutoff = 0.0 # keep genes with logFC > 0 (upregulated)\n",
19441945
"\n",
1946+
" # Extract marker gene results structure from the AnnData object\n",
19451947
" _mk = adata_int.uns[_mk_key]\n",
19461948
" _groups = _mk['names'].dtype.names\n",
19471949
" _n_genes = _mk['names'].shape[0]\n",
@@ -1957,6 +1959,7 @@
19571959
"\n",
19581960
" _all_ora = {lib: [] for lib in _gene_set_libraries}\n",
19591961
"\n",
1962+
" # Loop through each cluster and filter genes that are significant and upregulated\n",
19601963
" for _cl in _groups:\n",
19611964
" _sig_genes = [\n",
19621965
" _mk['names'][_cl][r]\n",
@@ -1968,6 +1971,7 @@
19681971
" if not _sig_genes:\n",
19691972
" continue\n",
19701973
"\n",
1974+
" # Query databases for this cell cluster\n",
19711975
" for _lib_name, _lib_id in _gene_set_libraries.items():\n",
19721976
" try:\n",
19731977
" _enr = gp.enrich(\n",
@@ -1980,6 +1984,7 @@
19801984
" _df = _enr.results.copy()\n",
19811985
" _df.insert(0, 'cluster', _cl)\n",
19821986
" _df['query_size'] = len(_sig_genes)\n",
1987+
" # Parse the 'Overlap' string (e.g., "5/45") to compute ratios\n",
19831988
" if 'Overlap' in _df.columns:\n",
19841989
" _ov = _df['Overlap'].str.split('/')\n",
19851990
" _df['count'] = _ov.str[0].astype(int)\n",

0 commit comments

Comments
 (0)