|
1902 | 1902 | "\n", |
1903 | 1903 | "if _gseapy_ok and _integration_ok:\n", |
1904 | 1904 | " # ── Run one-vs-rest Wilcoxon marker analysis on Harmony clusters ──────────\n", |
1905 | | - " _cluster_key = 'leiden_harmony_r0.5' # adjust to your chosen resolution from §16\n", |
| 1905 | + " _cluster_key = 'leiden_harmony_r0.5' # adjust to your chosen resolution\n", |
1906 | 1906 | " _mk_key = f'rank_genes_{_cluster_key}'\n", |
1907 | 1907 | "\n", |
| 1908 | + " # Optimization: Only run the computationally heavy Wilcoxon test if the results don't already exist in the AnnData object\n", |
1908 | 1909 | " if _mk_key not in adata_int.uns:\n", |
1909 | 1910 | " print(f\"Running Wilcoxon one-vs-rest for {_cluster_key}...\")\n", |
1910 | 1911 | " sc.tl.rank_genes_groups(\n", |
|
1942 | 1943 | " _ora_padj_cutoff = 0.05\n", |
1943 | 1944 | " _ora_logfc_cutoff = 0.0 # keep genes with logFC > 0 (upregulated)\n", |
1944 | 1945 | "\n", |
| 1946 | + " # Extract marker gene results structure from the AnnData object\n", |
1945 | 1947 | " _mk = adata_int.uns[_mk_key]\n", |
1946 | 1948 | " _groups = _mk['names'].dtype.names\n", |
1947 | 1949 | " _n_genes = _mk['names'].shape[0]\n", |
|
1957 | 1959 | "\n", |
1958 | 1960 | " _all_ora = {lib: [] for lib in _gene_set_libraries}\n", |
1959 | 1961 | "\n", |
| 1962 | + " # Loop through each cluster and filter genes that are significant and upregulated\n", |
1960 | 1963 | " for _cl in _groups:\n", |
1961 | 1964 | " _sig_genes = [\n", |
1962 | 1965 | " _mk['names'][_cl][r]\n", |
|
1968 | 1971 | " if not _sig_genes:\n", |
1969 | 1972 | " continue\n", |
1970 | 1973 | "\n", |
| 1974 | + " # Query databases for this cell cluster\n", |
1971 | 1975 | " for _lib_name, _lib_id in _gene_set_libraries.items():\n", |
1972 | 1976 | " try:\n", |
1973 | 1977 | " _enr = gp.enrich(\n", |
|
1980 | 1984 | " _df = _enr.results.copy()\n", |
1981 | 1985 | " _df.insert(0, 'cluster', _cl)\n", |
1982 | 1986 | " _df['query_size'] = len(_sig_genes)\n", |
| 1987 | + " # Parse the 'Overlap' string (e.g., "5/45") to compute ratios\n", |
1983 | 1988 | " if 'Overlap' in _df.columns:\n", |
1984 | 1989 | " _ov = _df['Overlap'].str.split('/')\n", |
1985 | 1990 | " _df['count'] = _ov.str[0].astype(int)\n", |
|
0 commit comments