diff --git a/README.md b/README.md index d7ac6c4..c8110e8 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ --- Example Notebook | -Example output | +Example output | Atlas scale results Switch to R/Seurat package: CyteTypeR @@ -47,19 +47,25 @@ sc.pp.log1p(adata) sc.pp.highly_variable_genes(adata, n_top_genes=1000) sc.pp.pca(adata) sc.pp.neighbors(adata) -sc.tl.leiden(adata, key_added="clusters") -sc.tl.rank_genes_groups(adata, groupby="clusters", method="t-test") + +group_key = 'clusters' # Wherever you want to store or already have clusters in adata.obs + +sc.tl.leiden(adata, key_added=group_key) +sc.tl.umap(adata) +sc.tl.rank_genes_groups(adata, groupby=group_key, method="t-test") # ------ Example Scanpy Pipeline ------ # ------ CyteType ------ -annotator = CyteType(adata, group_key="clusters") +annotator = CyteType(adata, group_key=group_key) adata = annotator.run( study_context="Brief study description (e.g., Human brain tissue ...)", ) -# View results -print(adata.obs.cytetype_annotation_clusters) -print(adata.obs.cytetype_cellOntologyTerm_clusters) +# Visualize results +sc.pl.embedding(adata, basis='umap', color=f'cytetype_annotation_{group_key}') +sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellOntologyTerm_{group_key}') +sc.pl.embedding(adata, basis='umap', color=f'cytetype_ontologyTermID_{group_key}') +sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellState_{group_key}') ``` ## Documentation diff --git a/cytetype/__init__.py b/cytetype/__init__.py index 826b03c..2296966 100644 --- a/cytetype/__init__.py +++ b/cytetype/__init__.py @@ -1,4 +1,4 @@ from .main import CyteType __all__ = ["CyteType"] -__version__ = "0.10.0" +__version__ = "0.11.0" diff --git a/cytetype/api.py b/cytetype/api.py index ca498eb..4676db6 100644 --- a/cytetype/api.py +++ b/cytetype/api.py @@ -157,6 +157,9 @@ def _transform_results(results_data: Dict[str, Any]) -> Dict[str, Any]: "clusterId": annotation_data.get("clusterId", cluster_id), "annotation": annotation_data.get("annotation", "Unknown"), "ontologyTerm": annotation_data.get( + "cellOntologyTermName", "Unknown" + ), + "ontologyTermID": annotation_data.get( "cellOntologyTerm", "Unknown" ), # Include additional fields from new format diff --git a/cytetype/config.py b/cytetype/config.py index 521e279..da8df69 100644 --- a/cytetype/config.py +++ b/cytetype/config.py @@ -11,6 +11,6 @@ ) -DEFAULT_API_URL = "https://nygen-labs-prod--cytetype-api.modal.run" +DEFAULT_API_URL = "https://prod.cytetype.nygen.io" DEFAULT_POLL_INTERVAL = 10 DEFAULT_TIMEOUT = 7200 diff --git a/cytetype/main.py b/cytetype/main.py index 2fe9ccf..d8bc494 100644 --- a/cytetype/main.py +++ b/cytetype/main.py @@ -218,6 +218,31 @@ def _store_results_and_annotations( ).astype("category") ) + # Update ontology term IDs + ontology_id_map = { + item["clusterId"]: item["ontologyTermID"] + for item in result_data.get("annotations", []) + } + self.adata.obs[f"{results_prefix}_cellOntologyTermID_{self.group_key}"] = ( + pd.Series( + [ + ontology_id_map.get(cluster_id, "Unknown") + for cluster_id in self.clusters + ], + index=self.adata.obs.index, + ).astype("category") + ) + + # Update cell states + cell_state_map = { + item["clusterId"]: item.get("cellState", "") + for item in result_data.get("annotations", []) + } + self.adata.obs[f"{results_prefix}_cellState_{self.group_key}"] = pd.Series( + [cell_state_map.get(cluster_id, "") for cluster_id in self.clusters], + index=self.adata.obs.index, + ).astype("category") + # Check for unannotated clusters if requested if check_unannotated: unannotated_clusters = set( @@ -238,6 +263,8 @@ def _store_results_and_annotations( logger.success( f"Annotations successfully added to `adata.obs['{results_prefix}_annotation_{self.group_key}']`\n" f"Ontology terms added to `adata.obs['{results_prefix}_cellOntologyTerm_{self.group_key}']`\n" + f"Ontology term IDs added to `adata.obs['{results_prefix}_ontologyTermID_{self.group_key}']`\n" + f"Cell states added to `adata.obs['{results_prefix}_cellState_{self.group_key}']`\n" f"Full results added to `adata.uns['{results_prefix}_results']`." ) diff --git a/docs/examples.md b/docs/examples.md index 1a6e6f5..53e7bff 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -5,13 +5,13 @@ The following are notebooks used to run CyteType on all the single-cell datasets | Dataset | Links | | --- | --- | -| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) | -| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) | -| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) | -| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) | -| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) | -| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) | -| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) | +| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) | +| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) | +| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) | +| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) | +| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) | +| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) | +| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) | ## CellHint Organ Atlases @@ -20,22 +20,22 @@ Data was annotated in across three notebooks: [Colab 1/3](https://colab.research | Tissue | Links | | --- | --- | -| **Blood** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) | -| **Bone Marrow** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) | -| **Heart** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) | -| **Hippocampus** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/60b98429-2338-4408-a07c-bb60e82ac793) | -| **Intestine** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/e0a2ca37-872f-489c-8de1-d84434d409fe) | -| **Kidney** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) | -| **Liver** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/a429348c-530a-486c-8980-3349a583b8c4) | -| **Lung** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/75e41a21-f771-4ebc-829a-82f93529a147) | -| **Lymph Node** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) | -| **Pancreas** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) | -| **Skeletal Muscle** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) | -| **Spleen** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) | +| **Blood** | [CyteType report](https://prod.cytetype.nygen.io/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) | +| **Bone Marrow** | [CyteType report](https://prod.cytetype.nygen.io/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) | +| **Heart** | [CyteType report](https://prod.cytetype.nygen.io/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) | +| **Hippocampus** | [CyteType report](https://prod.cytetype.nygen.io/report/60b98429-2338-4408-a07c-bb60e82ac793) | +| **Intestine** | [CyteType report](https://prod.cytetype.nygen.io/report/e0a2ca37-872f-489c-8de1-d84434d409fe) | +| **Kidney** | [CyteType report](https://prod.cytetype.nygen.io/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) | +| **Liver** | [CyteType report](https://prod.cytetype.nygen.io/report/a429348c-530a-486c-8980-3349a583b8c4) | +| **Lung** | [CyteType report](https://prod.cytetype.nygen.io/report/75e41a21-f771-4ebc-829a-82f93529a147) | +| **Lymph Node** | [CyteType report](https://prod.cytetype.nygen.io/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) | +| **Pancreas** | [CyteType report](https://prod.cytetype.nygen.io/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) | +| **Skeletal Muscle** | [CyteType report](https://prod.cytetype.nygen.io/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) | +| **Spleen** | [CyteType report](https://prod.cytetype.nygen.io/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) | ## Cell Landscapes from BIS Cell atlases hosted by [BIS](https://bis.zju.edu.cn/) from various organims and specific tissues | Tissue | Links | | --- | --- | -| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) | +| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) | diff --git a/tests/test_main.py b/tests/test_main.py index 7068483..d244716 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -93,16 +93,19 @@ def test_cytetype_success( "clusterId": "1", "annotation": "Cell Type A", "ontologyTerm": "CL:0000001", + "ontologyTermID": "CL:0000001", }, # Corresponds to '0' { "clusterId": "2", "annotation": "Cell Type B", "ontologyTerm": "CL:0000002", + "ontologyTermID": "CL:0000002", }, # Corresponds to '1' { "clusterId": "3", "annotation": "Cell Type C", "ontologyTerm": "CL:0000003", + "ontologyTermID": "CL:0000003", }, # Corresponds to '2' ] } @@ -265,16 +268,19 @@ def test_cytetype_with_auth_token( "clusterId": "1", "annotation": "Cell Type A", "ontologyTerm": "CL:0000001", + "ontologyTermID": "CL:0000001", }, { "clusterId": "2", "annotation": "Cell Type B", "ontologyTerm": "CL:0000002", + "ontologyTermID": "CL:0000002", }, { "clusterId": "3", "annotation": "Cell Type C", "ontologyTerm": "CL:0000003", + "ontologyTermID": "CL:0000003", }, ] } @@ -312,6 +318,7 @@ def test_cytetype_get_results_helper( "clusterId": "1", "annotation": "Cell Type A", "ontologyTerm": "CL:0000001", + "ontologyTermID": "CL:0000001", }, ] } @@ -362,6 +369,7 @@ def test_cytetype_with_metadata( "clusterId": "1", "annotation": "Cell Type A", "ontologyTerm": "CL:0000001", + "ontologyTermID": "CL:0000001", }, ] } @@ -407,6 +415,7 @@ def test_cytetype_without_metadata( "clusterId": "1", "annotation": "Cell Type A", "ontologyTerm": "CL:0000001", + "ontologyTermID": "CL:0000001", }, ] } @@ -421,6 +430,113 @@ def test_cytetype_without_metadata( assert "metadata" not in query_arg +@patch("cytetype.main.submit_job") +@patch("cytetype.main.poll_for_results") +def test_cytetype_obs_columns( + mock_poll: MagicMock, mock_submit: MagicMock, mock_adata: anndata.AnnData +) -> None: + """Test that all expected obs columns are created with correct names and values.""" + job_id = "mock_job_obs_columns" + mock_submit.return_value = job_id + mock_result: dict[str, list[dict[str, str]]] = { + "annotations": [ + { + "clusterId": "1", + "annotation": "T cell", + "ontologyTerm": "T cell", + "ontologyTermID": "CL:0000084", + "cellState": "activated", + }, + { + "clusterId": "2", + "annotation": "B cell", + "ontologyTerm": "B cell", + "ontologyTermID": "CL:0000236", + "cellState": "naive", + }, + { + "clusterId": "3", + "annotation": "Monocyte", + "ontologyTerm": "monocyte", + "ontologyTermID": "CL:0000576", + "cellState": "", # Empty cell state + }, + ] + } + mock_poll.return_value = mock_result + + group_key = "leiden" + results_prefix = "cytetype" + + cytetype = CyteType(mock_adata, group_key=group_key) + adata_result = cytetype.run(study_context="Test study context") + + # Check all expected obs columns exist + expected_columns = [ + f"{results_prefix}_annotation_{group_key}", + f"{results_prefix}_cellOntologyTerm_{group_key}", + f"{results_prefix}_cellOntologyTermID_{group_key}", + f"{results_prefix}_cellState_{group_key}", + ] + + for col in expected_columns: + assert col in adata_result.obs, f"Column {col} not found in obs" + assert isinstance(adata_result.obs[col].dtype, pd.CategoricalDtype), ( + f"Column {col} is not categorical" + ) + + # Check annotation values are correctly mapped + anno_col = f"{results_prefix}_annotation_{group_key}" + ct_map = {"0": "1", "1": "2", "2": "3"} # cluster label -> cluster ID mapping + anno_map = {"1": "T cell", "2": "B cell", "3": "Monocyte"} + expected_annotations = [ + anno_map[ct_map[str(label)]] for label in mock_adata.obs[group_key] + ] + pd.testing.assert_series_equal( + adata_result.obs[anno_col], + pd.Series(expected_annotations, index=mock_adata.obs.index, dtype="category"), + check_names=False, + ) + + # Check ontologyTerm values are correctly mapped + ontology_term_col = f"{results_prefix}_cellOntologyTerm_{group_key}" + ontology_term_map = {"1": "T cell", "2": "B cell", "3": "monocyte"} + expected_ontology_terms = [ + ontology_term_map[ct_map[str(label)]] for label in mock_adata.obs[group_key] + ] + pd.testing.assert_series_equal( + adata_result.obs[ontology_term_col], + pd.Series( + expected_ontology_terms, index=mock_adata.obs.index, dtype="category" + ), + check_names=False, + ) + + # Check ontologyTermID values are correctly mapped + ontology_id_col = f"{results_prefix}_cellOntologyTermID_{group_key}" + ontology_id_map = {"1": "CL:0000084", "2": "CL:0000236", "3": "CL:0000576"} + expected_ontology_ids = [ + ontology_id_map[ct_map[str(label)]] for label in mock_adata.obs[group_key] + ] + pd.testing.assert_series_equal( + adata_result.obs[ontology_id_col], + pd.Series(expected_ontology_ids, index=mock_adata.obs.index, dtype="category"), + check_names=False, + ) + + # Check cellState values are correctly mapped (including empty string) + cell_state_col = f"{results_prefix}_cellState_{group_key}" + cell_state_map = {"1": "activated", "2": "naive", "3": ""} + expected_cell_states = [ + cell_state_map[ct_map[str(label)]] for label in mock_adata.obs[group_key] + ] + pd.testing.assert_series_equal( + adata_result.obs[cell_state_col], + pd.Series(expected_cell_states, index=mock_adata.obs.index, dtype="category"), + check_names=False, + ) + + # --- TODO --- # - Add tests specifically for cytetype/anndata_helpers.py # - Add tests specifically for cytetype/client.py (e.g., more nuanced API responses)