diff --git a/README.md b/README.md
index d7ac6c4..c8110e8 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@
---
Example Notebook |
-Example output |
+Example output |
Atlas scale results
Switch to R/Seurat package: CyteTypeR
@@ -47,19 +47,25 @@ sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1000)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
-sc.tl.leiden(adata, key_added="clusters")
-sc.tl.rank_genes_groups(adata, groupby="clusters", method="t-test")
+
+group_key = 'clusters' # Wherever you want to store or already have clusters in adata.obs
+
+sc.tl.leiden(adata, key_added=group_key)
+sc.tl.umap(adata)
+sc.tl.rank_genes_groups(adata, groupby=group_key, method="t-test")
# ------ Example Scanpy Pipeline ------
# ------ CyteType ------
-annotator = CyteType(adata, group_key="clusters")
+annotator = CyteType(adata, group_key=group_key)
adata = annotator.run(
study_context="Brief study description (e.g., Human brain tissue ...)",
)
-# View results
-print(adata.obs.cytetype_annotation_clusters)
-print(adata.obs.cytetype_cellOntologyTerm_clusters)
+# Visualize results
+sc.pl.embedding(adata, basis='umap', color=f'cytetype_annotation_{group_key}')
+sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellOntologyTerm_{group_key}')
+sc.pl.embedding(adata, basis='umap', color=f'cytetype_ontologyTermID_{group_key}')
+sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellState_{group_key}')
```
## Documentation
diff --git a/cytetype/__init__.py b/cytetype/__init__.py
index 826b03c..2296966 100644
--- a/cytetype/__init__.py
+++ b/cytetype/__init__.py
@@ -1,4 +1,4 @@
from .main import CyteType
__all__ = ["CyteType"]
-__version__ = "0.10.0"
+__version__ = "0.11.0"
diff --git a/cytetype/api.py b/cytetype/api.py
index ca498eb..4676db6 100644
--- a/cytetype/api.py
+++ b/cytetype/api.py
@@ -157,6 +157,9 @@ def _transform_results(results_data: Dict[str, Any]) -> Dict[str, Any]:
"clusterId": annotation_data.get("clusterId", cluster_id),
"annotation": annotation_data.get("annotation", "Unknown"),
"ontologyTerm": annotation_data.get(
+ "cellOntologyTermName", "Unknown"
+ ),
+ "ontologyTermID": annotation_data.get(
"cellOntologyTerm", "Unknown"
),
# Include additional fields from new format
diff --git a/cytetype/config.py b/cytetype/config.py
index 521e279..da8df69 100644
--- a/cytetype/config.py
+++ b/cytetype/config.py
@@ -11,6 +11,6 @@
)
-DEFAULT_API_URL = "https://nygen-labs-prod--cytetype-api.modal.run"
+DEFAULT_API_URL = "https://prod.cytetype.nygen.io"
DEFAULT_POLL_INTERVAL = 10
DEFAULT_TIMEOUT = 7200
diff --git a/cytetype/main.py b/cytetype/main.py
index 2fe9ccf..d8bc494 100644
--- a/cytetype/main.py
+++ b/cytetype/main.py
@@ -218,6 +218,31 @@ def _store_results_and_annotations(
).astype("category")
)
+ # Update ontology term IDs
+ ontology_id_map = {
+ item["clusterId"]: item["ontologyTermID"]
+ for item in result_data.get("annotations", [])
+ }
+ self.adata.obs[f"{results_prefix}_cellOntologyTermID_{self.group_key}"] = (
+ pd.Series(
+ [
+ ontology_id_map.get(cluster_id, "Unknown")
+ for cluster_id in self.clusters
+ ],
+ index=self.adata.obs.index,
+ ).astype("category")
+ )
+
+ # Update cell states
+ cell_state_map = {
+ item["clusterId"]: item.get("cellState", "")
+ for item in result_data.get("annotations", [])
+ }
+ self.adata.obs[f"{results_prefix}_cellState_{self.group_key}"] = pd.Series(
+ [cell_state_map.get(cluster_id, "") for cluster_id in self.clusters],
+ index=self.adata.obs.index,
+ ).astype("category")
+
# Check for unannotated clusters if requested
if check_unannotated:
unannotated_clusters = set(
@@ -238,6 +263,8 @@ def _store_results_and_annotations(
logger.success(
f"Annotations successfully added to `adata.obs['{results_prefix}_annotation_{self.group_key}']`\n"
f"Ontology terms added to `adata.obs['{results_prefix}_cellOntologyTerm_{self.group_key}']`\n"
+ f"Ontology term IDs added to `adata.obs['{results_prefix}_ontologyTermID_{self.group_key}']`\n"
+ f"Cell states added to `adata.obs['{results_prefix}_cellState_{self.group_key}']`\n"
f"Full results added to `adata.uns['{results_prefix}_results']`."
)
diff --git a/docs/examples.md b/docs/examples.md
index 1a6e6f5..53e7bff 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -5,13 +5,13 @@ The following are notebooks used to run CyteType on all the single-cell datasets
| Dataset | Links |
| --- | --- |
-| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) |
-| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) |
-| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) |
-| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) |
-| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) |
-| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) |
-| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) |
+| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) |
+| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) |
+| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) |
+| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) |
+| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) |
+| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) |
+| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) |
## CellHint Organ Atlases
@@ -20,22 +20,22 @@ Data was annotated in across three notebooks: [Colab 1/3](https://colab.research
| Tissue | Links |
| --- | --- |
-| **Blood** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) |
-| **Bone Marrow** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) |
-| **Heart** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) |
-| **Hippocampus** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/60b98429-2338-4408-a07c-bb60e82ac793) |
-| **Intestine** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/e0a2ca37-872f-489c-8de1-d84434d409fe) |
-| **Kidney** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) |
-| **Liver** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/a429348c-530a-486c-8980-3349a583b8c4) |
-| **Lung** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/75e41a21-f771-4ebc-829a-82f93529a147) |
-| **Lymph Node** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) |
-| **Pancreas** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) |
-| **Skeletal Muscle** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) |
-| **Spleen** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) |
+| **Blood** | [CyteType report](https://prod.cytetype.nygen.io/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) |
+| **Bone Marrow** | [CyteType report](https://prod.cytetype.nygen.io/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) |
+| **Heart** | [CyteType report](https://prod.cytetype.nygen.io/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) |
+| **Hippocampus** | [CyteType report](https://prod.cytetype.nygen.io/report/60b98429-2338-4408-a07c-bb60e82ac793) |
+| **Intestine** | [CyteType report](https://prod.cytetype.nygen.io/report/e0a2ca37-872f-489c-8de1-d84434d409fe) |
+| **Kidney** | [CyteType report](https://prod.cytetype.nygen.io/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) |
+| **Liver** | [CyteType report](https://prod.cytetype.nygen.io/report/a429348c-530a-486c-8980-3349a583b8c4) |
+| **Lung** | [CyteType report](https://prod.cytetype.nygen.io/report/75e41a21-f771-4ebc-829a-82f93529a147) |
+| **Lymph Node** | [CyteType report](https://prod.cytetype.nygen.io/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) |
+| **Pancreas** | [CyteType report](https://prod.cytetype.nygen.io/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) |
+| **Skeletal Muscle** | [CyteType report](https://prod.cytetype.nygen.io/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) |
+| **Spleen** | [CyteType report](https://prod.cytetype.nygen.io/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) |
## Cell Landscapes from BIS
Cell atlases hosted by [BIS](https://bis.zju.edu.cn/) from various organims and specific tissues
| Tissue | Links |
| --- | --- |
-| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) |
+| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) |
diff --git a/tests/test_main.py b/tests/test_main.py
index 7068483..d244716 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -93,16 +93,19 @@ def test_cytetype_success(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
+ "ontologyTermID": "CL:0000001",
}, # Corresponds to '0'
{
"clusterId": "2",
"annotation": "Cell Type B",
"ontologyTerm": "CL:0000002",
+ "ontologyTermID": "CL:0000002",
}, # Corresponds to '1'
{
"clusterId": "3",
"annotation": "Cell Type C",
"ontologyTerm": "CL:0000003",
+ "ontologyTermID": "CL:0000003",
}, # Corresponds to '2'
]
}
@@ -265,16 +268,19 @@ def test_cytetype_with_auth_token(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
+ "ontologyTermID": "CL:0000001",
},
{
"clusterId": "2",
"annotation": "Cell Type B",
"ontologyTerm": "CL:0000002",
+ "ontologyTermID": "CL:0000002",
},
{
"clusterId": "3",
"annotation": "Cell Type C",
"ontologyTerm": "CL:0000003",
+ "ontologyTermID": "CL:0000003",
},
]
}
@@ -312,6 +318,7 @@ def test_cytetype_get_results_helper(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
+ "ontologyTermID": "CL:0000001",
},
]
}
@@ -362,6 +369,7 @@ def test_cytetype_with_metadata(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
+ "ontologyTermID": "CL:0000001",
},
]
}
@@ -407,6 +415,7 @@ def test_cytetype_without_metadata(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
+ "ontologyTermID": "CL:0000001",
},
]
}
@@ -421,6 +430,113 @@ def test_cytetype_without_metadata(
assert "metadata" not in query_arg
+@patch("cytetype.main.submit_job")
+@patch("cytetype.main.poll_for_results")
+def test_cytetype_obs_columns(
+ mock_poll: MagicMock, mock_submit: MagicMock, mock_adata: anndata.AnnData
+) -> None:
+ """Test that all expected obs columns are created with correct names and values."""
+ job_id = "mock_job_obs_columns"
+ mock_submit.return_value = job_id
+ mock_result: dict[str, list[dict[str, str]]] = {
+ "annotations": [
+ {
+ "clusterId": "1",
+ "annotation": "T cell",
+ "ontologyTerm": "T cell",
+ "ontologyTermID": "CL:0000084",
+ "cellState": "activated",
+ },
+ {
+ "clusterId": "2",
+ "annotation": "B cell",
+ "ontologyTerm": "B cell",
+ "ontologyTermID": "CL:0000236",
+ "cellState": "naive",
+ },
+ {
+ "clusterId": "3",
+ "annotation": "Monocyte",
+ "ontologyTerm": "monocyte",
+ "ontologyTermID": "CL:0000576",
+ "cellState": "", # Empty cell state
+ },
+ ]
+ }
+ mock_poll.return_value = mock_result
+
+ group_key = "leiden"
+ results_prefix = "cytetype"
+
+ cytetype = CyteType(mock_adata, group_key=group_key)
+ adata_result = cytetype.run(study_context="Test study context")
+
+ # Check all expected obs columns exist
+ expected_columns = [
+ f"{results_prefix}_annotation_{group_key}",
+ f"{results_prefix}_cellOntologyTerm_{group_key}",
+ f"{results_prefix}_cellOntologyTermID_{group_key}",
+ f"{results_prefix}_cellState_{group_key}",
+ ]
+
+ for col in expected_columns:
+ assert col in adata_result.obs, f"Column {col} not found in obs"
+ assert isinstance(adata_result.obs[col].dtype, pd.CategoricalDtype), (
+ f"Column {col} is not categorical"
+ )
+
+ # Check annotation values are correctly mapped
+ anno_col = f"{results_prefix}_annotation_{group_key}"
+ ct_map = {"0": "1", "1": "2", "2": "3"} # cluster label -> cluster ID mapping
+ anno_map = {"1": "T cell", "2": "B cell", "3": "Monocyte"}
+ expected_annotations = [
+ anno_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
+ ]
+ pd.testing.assert_series_equal(
+ adata_result.obs[anno_col],
+ pd.Series(expected_annotations, index=mock_adata.obs.index, dtype="category"),
+ check_names=False,
+ )
+
+ # Check ontologyTerm values are correctly mapped
+ ontology_term_col = f"{results_prefix}_cellOntologyTerm_{group_key}"
+ ontology_term_map = {"1": "T cell", "2": "B cell", "3": "monocyte"}
+ expected_ontology_terms = [
+ ontology_term_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
+ ]
+ pd.testing.assert_series_equal(
+ adata_result.obs[ontology_term_col],
+ pd.Series(
+ expected_ontology_terms, index=mock_adata.obs.index, dtype="category"
+ ),
+ check_names=False,
+ )
+
+ # Check ontologyTermID values are correctly mapped
+ ontology_id_col = f"{results_prefix}_cellOntologyTermID_{group_key}"
+ ontology_id_map = {"1": "CL:0000084", "2": "CL:0000236", "3": "CL:0000576"}
+ expected_ontology_ids = [
+ ontology_id_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
+ ]
+ pd.testing.assert_series_equal(
+ adata_result.obs[ontology_id_col],
+ pd.Series(expected_ontology_ids, index=mock_adata.obs.index, dtype="category"),
+ check_names=False,
+ )
+
+ # Check cellState values are correctly mapped (including empty string)
+ cell_state_col = f"{results_prefix}_cellState_{group_key}"
+ cell_state_map = {"1": "activated", "2": "naive", "3": ""}
+ expected_cell_states = [
+ cell_state_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
+ ]
+ pd.testing.assert_series_equal(
+ adata_result.obs[cell_state_col],
+ pd.Series(expected_cell_states, index=mock_adata.obs.index, dtype="category"),
+ check_names=False,
+ )
+
+
# --- TODO ---
# - Add tests specifically for cytetype/anndata_helpers.py
# - Add tests specifically for cytetype/client.py (e.g., more nuanced API responses)