Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

---
<a href="https://colab.research.google.com/drive/1aRLsI3mx8JR8u5BKHs48YUbLsqRsh2N7?usp=sharing" target="_blank">Example Notebook</a> |
<a href="https://nygen-labs-prod--cytetype-api.modal.run/report/2b514924-334f-4f5c-aa25-347155586634?v=251123" target="_blank">Example output</a> |
<a href="https://prod.cytetype.nygen.io/report/2b514924-334f-4f5c-aa25-347155586634?v=251123" target="_blank">Example output</a> |
<a href="docs/examples.md">Atlas scale results</a>

Switch to R/Seurat package: <a href="https://github.com/NygenAnalytics/CyteTypeR">CyteTypeR</a>
Expand All @@ -47,19 +47,25 @@ sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=1000)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.leiden(adata, key_added="clusters")
sc.tl.rank_genes_groups(adata, groupby="clusters", method="t-test")

group_key = 'clusters' # Wherever you want to store or already have clusters in adata.obs

sc.tl.leiden(adata, key_added=group_key)
sc.tl.umap(adata)
sc.tl.rank_genes_groups(adata, groupby=group_key, method="t-test")
# ------ Example Scanpy Pipeline ------

# ------ CyteType ------
annotator = CyteType(adata, group_key="clusters")
annotator = CyteType(adata, group_key=group_key)
adata = annotator.run(
study_context="Brief study description (e.g., Human brain tissue ...)",
)

# View results
print(adata.obs.cytetype_annotation_clusters)
print(adata.obs.cytetype_cellOntologyTerm_clusters)
# Visualize results
sc.pl.embedding(adata, basis='umap', color=f'cytetype_annotation_{group_key}')
sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellOntologyTerm_{group_key}')
sc.pl.embedding(adata, basis='umap', color=f'cytetype_ontologyTermID_{group_key}')
sc.pl.embedding(adata, basis='umap', color=f'cytetype_cellState_{group_key}')
```

## Documentation
Expand Down
2 changes: 1 addition & 1 deletion cytetype/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .main import CyteType

__all__ = ["CyteType"]
__version__ = "0.10.0"
__version__ = "0.11.0"
3 changes: 3 additions & 0 deletions cytetype/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ def _transform_results(results_data: Dict[str, Any]) -> Dict[str, Any]:
"clusterId": annotation_data.get("clusterId", cluster_id),
"annotation": annotation_data.get("annotation", "Unknown"),
"ontologyTerm": annotation_data.get(
"cellOntologyTermName", "Unknown"
),
"ontologyTermID": annotation_data.get(
"cellOntologyTerm", "Unknown"
),
# Include additional fields from new format
Expand Down
2 changes: 1 addition & 1 deletion cytetype/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@
)


DEFAULT_API_URL = "https://nygen-labs-prod--cytetype-api.modal.run"
DEFAULT_API_URL = "https://prod.cytetype.nygen.io"
DEFAULT_POLL_INTERVAL = 10
DEFAULT_TIMEOUT = 7200
27 changes: 27 additions & 0 deletions cytetype/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,31 @@ def _store_results_and_annotations(
).astype("category")
)

# Update ontology term IDs
ontology_id_map = {
item["clusterId"]: item["ontologyTermID"]
for item in result_data.get("annotations", [])
}
self.adata.obs[f"{results_prefix}_cellOntologyTermID_{self.group_key}"] = (
pd.Series(
[
ontology_id_map.get(cluster_id, "Unknown")
for cluster_id in self.clusters
],
index=self.adata.obs.index,
).astype("category")
)

# Update cell states
cell_state_map = {
item["clusterId"]: item.get("cellState", "")
for item in result_data.get("annotations", [])
}
self.adata.obs[f"{results_prefix}_cellState_{self.group_key}"] = pd.Series(
[cell_state_map.get(cluster_id, "") for cluster_id in self.clusters],
index=self.adata.obs.index,
).astype("category")

# Check for unannotated clusters if requested
if check_unannotated:
unannotated_clusters = set(
Expand All @@ -238,6 +263,8 @@ def _store_results_and_annotations(
logger.success(
f"Annotations successfully added to `adata.obs['{results_prefix}_annotation_{self.group_key}']`\n"
f"Ontology terms added to `adata.obs['{results_prefix}_cellOntologyTerm_{self.group_key}']`\n"
f"Ontology term IDs added to `adata.obs['{results_prefix}_ontologyTermID_{self.group_key}']`\n"
f"Cell states added to `adata.obs['{results_prefix}_cellState_{self.group_key}']`\n"
f"Full results added to `adata.uns['{results_prefix}_results']`."
)

Expand Down
40 changes: 20 additions & 20 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ The following are notebooks used to run CyteType on all the single-cell datasets

| Dataset | Links |
| --- | --- |
| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) |
| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) |
| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) |
| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) |
| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) |
| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) |
| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) |
| **Tabula Sapiens** | [Colab](https://colab.research.google.com/drive/1EyQXaruDJBPICUvlUY1E19zxOm_L4_VU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/15332f10-2048-4099-ab1e-baf2ab9e39c3) - [H5ad](https://drive.google.com/file/d/1URo7niPqAo-9HGVH8f3QJfqll9lc8JN_/view?usp=drive_link) |
| **GTEX v9** | [Colab](https://colab.research.google.com/drive/1uvqG2eVaUuNe66e0_7bp682uCdKx6-KL?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/5242f3b8-0078-417d-954e-00d1bb19bdf6) - [H5ad](https://drive.google.com/file/d/1EIpudRyasLUHR6J2v8fdpmBTbCE2__UF/view?usp=drive_link) |
| **Hypomap** | [Colab](https://colab.research.google.com/drive/1OuTnh8xHoXaINCGcgu_1q-jANwXL8ggF?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/3840b662-bacf-4067-b93d-4e57c1f21187) - [H5ad](https://drive.google.com/file/d/1QMvZNdoDlKpOmyguAXSk45-YVz97v4tM/view?usp=drive_link) |
| **Human Lung Cell Atlas (Core)** | [Colab](https://colab.research.google.com/drive/1FoTD-XzLNDPgYSlgVsxnLwPnWF5YiKny?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6da1458a-392f-4bce-b6c9-4ccb308c8797) - [H5ad](https://drive.google.com/file/d/13O0dyUnwJKLPm8fncRt597S5hs2COsxx/view?usp=drive_link) |
| **Immune Cell Atlas** | [Colab](https://colab.research.google.com/drive/1Kum9S_kU76QvS__42ABd-Xp1GpH4c9jU?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/05ff7629-8f0c-4b95-ac65-30bba9b384c5) - [H5ad](https://drive.google.com/file/d/1iqkC7dG1ovgKsU_8HdZ2eyELIxB0sM3t/view?usp=drive_link) |
| **Mouse Pancreatic Cell Atlas** | [Colab](https://colab.research.google.com/drive/1fg9W3Lz-E_yAVoqs_6XrQsYkfsfnzFey?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/6d248cd2-6b61-4beb-bc58-1d63c7a2fc34) - [H5ad](https://drive.google.com/file/d/19qpRfz4WGuUsRNl0YKuy3YENfHKI6pz-/view?usp=drive_link) |
| **Diabetic Kidney Disease** | [Colab](https://colab.research.google.com/drive/1kb3urFbl0PEPW4T_ti0DBTAmi5YK_-t1?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/0da4eaef-f165-4800-a4e3-c5cf8ec165ad) - [H5ad](https://drive.google.com/file/d/1yZXYlfZHLYcPL18Jy25J4v8kWQYhSsd7/view?usp=drive_link) |

## CellHint Organ Atlases

Expand All @@ -20,22 +20,22 @@ Data was annotated in across three notebooks: [Colab 1/3](https://colab.research

| Tissue | Links |
| --- | --- |
| **Blood** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) |
| **Bone Marrow** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) |
| **Heart** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) |
| **Hippocampus** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/60b98429-2338-4408-a07c-bb60e82ac793) |
| **Intestine** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/e0a2ca37-872f-489c-8de1-d84434d409fe) |
| **Kidney** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) |
| **Liver** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/a429348c-530a-486c-8980-3349a583b8c4) |
| **Lung** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/75e41a21-f771-4ebc-829a-82f93529a147) |
| **Lymph Node** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) |
| **Pancreas** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) |
| **Skeletal Muscle** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) |
| **Spleen** | [CyteType report](https://nygen-labs-prod--cytetype-api.modal.run/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) |
| **Blood** | [CyteType report](https://prod.cytetype.nygen.io/report/d0c219b4-2b4a-4b27-bac9-aea280a972f1) |
| **Bone Marrow** | [CyteType report](https://prod.cytetype.nygen.io/report/bc5099b5-42c2-4ba7-8fdd-bc7b5dd3e84d) |
| **Heart** | [CyteType report](https://prod.cytetype.nygen.io/report/2ffd6cf1-ec98-43d1-82d7-1fc3e9a11b8c) |
| **Hippocampus** | [CyteType report](https://prod.cytetype.nygen.io/report/60b98429-2338-4408-a07c-bb60e82ac793) |
| **Intestine** | [CyteType report](https://prod.cytetype.nygen.io/report/e0a2ca37-872f-489c-8de1-d84434d409fe) |
| **Kidney** | [CyteType report](https://prod.cytetype.nygen.io/report/7dd1f0ea-7eec-4968-b353-8b52707de5ac) |
| **Liver** | [CyteType report](https://prod.cytetype.nygen.io/report/a429348c-530a-486c-8980-3349a583b8c4) |
| **Lung** | [CyteType report](https://prod.cytetype.nygen.io/report/75e41a21-f771-4ebc-829a-82f93529a147) |
| **Lymph Node** | [CyteType report](https://prod.cytetype.nygen.io/report/b911e212-fe37-4bdc-a7f3-51e9146bf8cc) |
| **Pancreas** | [CyteType report](https://prod.cytetype.nygen.io/report/b620245a-1ae0-4025-aab7-52ada6dcc6cb) |
| **Skeletal Muscle** | [CyteType report](https://prod.cytetype.nygen.io/report/3f35a45d-aa1b-42cb-92d2-e739623a402b) |
| **Spleen** | [CyteType report](https://prod.cytetype.nygen.io/report/4b64ec02-ac01-45b5-84b9-0f16708cbd85) |

## Cell Landscapes from BIS
Cell atlases hosted by [BIS](https://bis.zju.edu.cn/) from various organims and specific tissues

| Tissue | Links |
| --- | --- |
| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://nygen-labs-prod--cytetype-api.modal.run/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) |
| Human Cell Landscape | [Colab](https://colab.research.google.com/drive/1czLW33FYbnPOmPvnfddsvehXM491UDGq?usp=sharing) - [CyteType Report](https://prod.cytetype.nygen.io/report/581616bf-3c96-4e58-a290-881b40378309) - [Homepage](https://bis.zju.edu.cn/HCL/) |
116 changes: 116 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,19 @@ def test_cytetype_success(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
"ontologyTermID": "CL:0000001",
}, # Corresponds to '0'
{
"clusterId": "2",
"annotation": "Cell Type B",
"ontologyTerm": "CL:0000002",
"ontologyTermID": "CL:0000002",
}, # Corresponds to '1'
{
"clusterId": "3",
"annotation": "Cell Type C",
"ontologyTerm": "CL:0000003",
"ontologyTermID": "CL:0000003",
}, # Corresponds to '2'
]
}
Expand Down Expand Up @@ -265,16 +268,19 @@ def test_cytetype_with_auth_token(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
"ontologyTermID": "CL:0000001",
},
{
"clusterId": "2",
"annotation": "Cell Type B",
"ontologyTerm": "CL:0000002",
"ontologyTermID": "CL:0000002",
},
{
"clusterId": "3",
"annotation": "Cell Type C",
"ontologyTerm": "CL:0000003",
"ontologyTermID": "CL:0000003",
},
]
}
Expand Down Expand Up @@ -312,6 +318,7 @@ def test_cytetype_get_results_helper(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
"ontologyTermID": "CL:0000001",
},
]
}
Expand Down Expand Up @@ -362,6 +369,7 @@ def test_cytetype_with_metadata(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
"ontologyTermID": "CL:0000001",
},
]
}
Expand Down Expand Up @@ -407,6 +415,7 @@ def test_cytetype_without_metadata(
"clusterId": "1",
"annotation": "Cell Type A",
"ontologyTerm": "CL:0000001",
"ontologyTermID": "CL:0000001",
},
]
}
Expand All @@ -421,6 +430,113 @@ def test_cytetype_without_metadata(
assert "metadata" not in query_arg


@patch("cytetype.main.submit_job")
@patch("cytetype.main.poll_for_results")
def test_cytetype_obs_columns(
mock_poll: MagicMock, mock_submit: MagicMock, mock_adata: anndata.AnnData
) -> None:
"""Test that all expected obs columns are created with correct names and values."""
job_id = "mock_job_obs_columns"
mock_submit.return_value = job_id
mock_result: dict[str, list[dict[str, str]]] = {
"annotations": [
{
"clusterId": "1",
"annotation": "T cell",
"ontologyTerm": "T cell",
"ontologyTermID": "CL:0000084",
"cellState": "activated",
},
{
"clusterId": "2",
"annotation": "B cell",
"ontologyTerm": "B cell",
"ontologyTermID": "CL:0000236",
"cellState": "naive",
},
{
"clusterId": "3",
"annotation": "Monocyte",
"ontologyTerm": "monocyte",
"ontologyTermID": "CL:0000576",
"cellState": "", # Empty cell state
},
]
}
mock_poll.return_value = mock_result

group_key = "leiden"
results_prefix = "cytetype"

cytetype = CyteType(mock_adata, group_key=group_key)
adata_result = cytetype.run(study_context="Test study context")

# Check all expected obs columns exist
expected_columns = [
f"{results_prefix}_annotation_{group_key}",
f"{results_prefix}_cellOntologyTerm_{group_key}",
f"{results_prefix}_cellOntologyTermID_{group_key}",
f"{results_prefix}_cellState_{group_key}",
]

for col in expected_columns:
assert col in adata_result.obs, f"Column {col} not found in obs"
assert isinstance(adata_result.obs[col].dtype, pd.CategoricalDtype), (
f"Column {col} is not categorical"
)

# Check annotation values are correctly mapped
anno_col = f"{results_prefix}_annotation_{group_key}"
ct_map = {"0": "1", "1": "2", "2": "3"} # cluster label -> cluster ID mapping
anno_map = {"1": "T cell", "2": "B cell", "3": "Monocyte"}
expected_annotations = [
anno_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
]
pd.testing.assert_series_equal(
adata_result.obs[anno_col],
pd.Series(expected_annotations, index=mock_adata.obs.index, dtype="category"),
check_names=False,
)

# Check ontologyTerm values are correctly mapped
ontology_term_col = f"{results_prefix}_cellOntologyTerm_{group_key}"
ontology_term_map = {"1": "T cell", "2": "B cell", "3": "monocyte"}
expected_ontology_terms = [
ontology_term_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
]
pd.testing.assert_series_equal(
adata_result.obs[ontology_term_col],
pd.Series(
expected_ontology_terms, index=mock_adata.obs.index, dtype="category"
),
check_names=False,
)

# Check ontologyTermID values are correctly mapped
ontology_id_col = f"{results_prefix}_cellOntologyTermID_{group_key}"
ontology_id_map = {"1": "CL:0000084", "2": "CL:0000236", "3": "CL:0000576"}
expected_ontology_ids = [
ontology_id_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
]
pd.testing.assert_series_equal(
adata_result.obs[ontology_id_col],
pd.Series(expected_ontology_ids, index=mock_adata.obs.index, dtype="category"),
check_names=False,
)

# Check cellState values are correctly mapped (including empty string)
cell_state_col = f"{results_prefix}_cellState_{group_key}"
cell_state_map = {"1": "activated", "2": "naive", "3": ""}
expected_cell_states = [
cell_state_map[ct_map[str(label)]] for label in mock_adata.obs[group_key]
]
pd.testing.assert_series_equal(
adata_result.obs[cell_state_col],
pd.Series(expected_cell_states, index=mock_adata.obs.index, dtype="category"),
check_names=False,
)


# --- TODO ---
# - Add tests specifically for cytetype/anndata_helpers.py
# - Add tests specifically for cytetype/client.py (e.g., more nuanced API responses)
Expand Down