diff --git a/src/datalab_api/__init__.py b/src/datalab_api/__init__.py index 73f70e9..3377946 100644 --- a/src/datalab_api/__init__.py +++ b/src/datalab_api/__init__.py @@ -5,13 +5,9 @@ from pathlib import Path from typing import Any, Optional, Union -from ._base import BaseDatalabClient, __version__ +from ._base import BaseDatalabClient, DuplicateItemError, __version__ -__all__ = ("DatalabClient", "__version__") - - -class DuplicateItemError(ValueError): - """Raised when the API operation would create a duplicate item.""" +__all__ = ("DatalabClient", "DuplicateItemError", "__version__") class DatalabClient(BaseDatalabClient): @@ -38,8 +34,8 @@ def get_info(self) -> dict[str, Any]: """ info_url = f"{self.datalab_api_url}/info" - info_resp = self.session.get(info_url, follow_redirects=True) - self.info = info_resp.json()["data"] + info_data = self._get(info_url) + self.info = info_data["data"] return self.info def authenticate(self): @@ -64,12 +60,8 @@ def get_block_info(self) -> list[dict[str, Any]]: """ block_info_url = f"{self.datalab_api_url}/info/blocks" - block_info_resp = self.session.get(block_info_url, follow_redirects=True) - if block_info_resp.status_code != 200: - raise RuntimeError( - f"Failed to list block information at {block_info_url}: {block_info_resp.status_code=}." - ) - self.block_info = block_info_resp.json()["data"] + block_info_data = self._get(block_info_url) + self.block_info = block_info_data["data"] return self.block_info def get_items(self, item_type: str | None = "samples") -> list[dict[str, Any]]: @@ -94,23 +86,16 @@ def get_items(self, item_type: str | None = "samples") -> list[dict[str, Any]]: item_type = "samples" items_url = f"{self.datalab_api_url}/{endpoint_type_map.get(item_type, item_type.replace('_', '-'))}" - items_resp = self.session.get(items_url, follow_redirects=True) - if items_resp.status_code != 200: - raise RuntimeError( - f"Failed to list items with {item_type=} at {items_url}: {items_resp.status_code=}. Check the item type is correct." - ) - items = items_resp.json() - if items["status"] != "success": - raise RuntimeError(f"Failed to list items at {items_url}: {items['status']!r}.") + items = self._get(items_url) - if item_type in items: - # Old approach - return items[item_type] - if "items" in items: - return items["items"] + if isinstance(items, dict): + if item_type in items: + # Old approach + return items[item_type] + if "items" in items: + return items["items"] - else: - return items + return items # type: ignore def search_items( self, query: str, item_types: Iterable[str] | str = ("samples", "cells") @@ -131,15 +116,7 @@ def search_items( search_items_url = ( f"{self.datalab_api_url}/search-items?query={query}&types={','.join(item_types)}" ) - items_resp = self.session.get(search_items_url, follow_redirects=True) - if items_resp.status_code != 200: - raise RuntimeError( - f"Failed to search items with {item_types=} at {search_items_url}: {items_resp.status_code=}" - ) - items = items_resp.json() - if items["status"] != "success": - raise RuntimeError(f"Failed to list items at {search_items_url}: {items['status']!r}.") - + items = self._get(search_items_url) return items["items"] def create_item( @@ -177,29 +154,11 @@ def create_item( new_item["collections"].append({"immutable_id": collection_immutable_id}) create_item_url = f"{self.datalab_api_url}/new-sample/" - create_item_resp = self.session.post( + created_item = self._post( create_item_url, json={"new_sample_data": new_item, "generate_id_automatically": item_id is None}, - follow_redirects=True, ) - try: - created_item = create_item_resp.json() - if create_item_resp.status_code == 409: - raise DuplicateItemError( - f"Item {item_id=} already exists at {create_item_url}: {created_item['status']!r}." - ) - if created_item["status"] != "success": - if "DuplicateKeyError" in created_item["message"]: - raise DuplicateItemError( - f"Item {item_id=} already exists at {create_item_url}: {created_item['status']!r}." - ) - raise RuntimeError(f"Failed to create item at {create_item_url}: {created_item}.") - return created_item["sample_list_entry"] - - except Exception as exc: - raise exc.__class__( - f"Failed to create item {item_id=} with data {item_data=} at {create_item_url}: {create_item_resp.status_code=}, {create_item_resp.content}. Check the item information is correct." - ) from exc + return created_item["sample_list_entry"] def update_item(self, item_id: str, item_data: dict[str, Any]) -> dict[str, Any]: """Update an item with the given item data. @@ -214,20 +173,7 @@ def update_item(self, item_id: str, item_data: dict[str, Any]) -> dict[str, Any] """ update_item_data = {"item_id": item_id, "data": item_data} update_item_url = f"{self.datalab_api_url}/save-item/" - update_item_resp = self.session.post( - update_item_url, - json=update_item_data, - follow_redirects=True, - ) - if update_item_resp.status_code != 200: - raise RuntimeError( - f"Failed to update item {item_id=} at {update_item_url}: {update_item_resp.status_code=}. Check the item information is correct." - ) - updated_item = update_item_resp.json() - if updated_item["status"] != "success": - raise RuntimeError( - f"Failed to update item at {update_item_url}: {updated_item['status']!r}." - ) + updated_item = self._post(update_item_url, json=update_item_data) return updated_item def get_item( @@ -259,16 +205,7 @@ def get_item( else: item_url = f"{self.datalab_api_url}/get-item-data/{item_id}" - item_resp = self.session.get(item_url, follow_redirects=True) - - if item_resp.status_code != 200: - raise RuntimeError( - f"Failed to find item {item_id=}, {refcode=} {item_url}: {item_resp.status_code=}. Check the item information is correct." - ) - - item = item_resp.json() - if item["status"] != "success": - raise RuntimeError(f"Failed to get item at {item_url}: {item['status']!r}.") + item = self._get(item_url) # Filter out any deleted blocks item["item_data"]["blocks_obj"] = { @@ -328,15 +265,7 @@ def get_block(self, item_id: str, block_id: str, block_data: dict[str, Any]) -> "block_id": block_id, "save_to_db": False, } - block_resp = self.session.post(block_url, json=block_request, follow_redirects=True) - if block_resp.status_code != 200: - raise RuntimeError( - f"Failed to find block {block_id=} for item {item_id=} at {block_url}: {block_resp.status_code=}. Check the block information is correct." - ) - - block = block_resp.json() - if block["status"] != "success": - raise RuntimeError(f"Failed to get block at {block_url}: {block['status']!r}.") + block = self._post(block_url, json=block_request) return block["new_block_data"] def upload_file(self, item_id: str, file_path: Path | str) -> dict[str, Any]: @@ -358,21 +287,12 @@ def upload_file(self, item_id: str, file_path: Path | str) -> dict[str, Any]: upload_url = f"{self.datalab_api_url}/upload-file/" with open(file_path, "rb") as file: files = {"file": (file_path.name, file)} - upload_resp = self.session.post( + upload = self._post( upload_url, files=files, data={"item_id": item_id, "replace_file": None}, - follow_redirects=True, - ) - if upload_resp.status_code != 201: - raise RuntimeError( - f"Failed to upload file {file_path=} to item {item_id=} at {upload_url}: {upload_resp.status_code=}. Check the file information is correct." + expected_status=201, ) - - upload = upload_resp.json() - if upload["status"] != "success": - raise RuntimeError(f"Failed to upload file at {upload_url}: {upload['status']!r}.") - return upload def create_data_block( @@ -422,12 +342,7 @@ def create_data_block( f"Not all IDs {file_ids=} are attached to item {item_id=}: {attached_file_ids=}" ) - block_resp = self.session.post(blocks_url, json=payload, follow_redirects=True) - if block_resp.status_code != 200: - raise RuntimeError( - f"Failed to create block {block_type=} for item {item_id=}:\n{block_resp.text}" - ) - block_data = block_resp.json()["new_block_obj"] + block_data = self._post(blocks_url, json=payload)["new_block_obj"] if file_ids: block_data = self._update_data_block( @@ -484,11 +399,8 @@ def _update_data_block( "save_to_db": True, } - resp = self.session.post(blocks_url, json=payload, follow_redirects=True) - if resp.status_code != 200: - raise RuntimeError(f"Failed to update block {block_type=}:\n{resp.text}") - - return resp.json()["new_block_data"] + resp = self._post(blocks_url, json=payload) + return resp["new_block_data"] def get_collection(self, collection_id: str) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Get a collection with a given ID. @@ -502,16 +414,7 @@ def get_collection(self, collection_id: str) -> tuple[dict[str, Any], list[dict[ """ collection_url = f"{self.datalab_api_url}/collections/{collection_id}" - collection_resp = self.session.get(collection_url, follow_redirects=True) - if collection_resp.status_code != 200: - raise RuntimeError( - f"Failed to find collection {collection_id=} at {collection_url}: {collection_resp.status_code=}. Check the collection ID is correct." - ) - collection = collection_resp.json() - if collection["status"] != "success": - raise RuntimeError( - f"Failed to get collection at {collection_url}: {collection['status']!r}." - ) + collection = self._get(collection_url) return collection["data"], collection["child_items"] def create_collection( @@ -531,16 +434,9 @@ def create_collection( new_collection = collection_data new_collection.update({"collection_id": collection_id, "type": "collections"}) - collection_resp = self.session.put( + created_collection = self._put( collection_url, json={"data": new_collection}, - follow_redirects=True, + expected_status=201, ) - - if collection_resp.status_code != 201 or collection_resp.json()["status"] != "success": - raise RuntimeError( - f"Failed to create collection {collection_id=} at {collection_url}: {collection_resp.status_code=}. Check the collection information is correct: {collection_resp.content}" - ) - - created_collection = collection_resp.json()["data"] - return created_collection + return created_collection["data"] diff --git a/src/datalab_api/_base.py b/src/datalab_api/_base.py index 242807a..7478421 100644 --- a/src/datalab_api/_base.py +++ b/src/datalab_api/_base.py @@ -10,6 +10,15 @@ from .utils import AutoPrettyPrint + +class DatalabAPIError(Exception): + """Base exception for Datalab API errors.""" + + +class DuplicateItemError(DatalabAPIError): + """Raised when the API operation would create a duplicate item.""" + + __version__ = version("datalab-api") __all__ = ("BaseDatalabClient", "__version__") @@ -50,7 +59,7 @@ def __init__(self, datalab_api_url: str, log_level: str = "WARNING"): Parameters: datalab_api_url: The URL of the Datalab API. - TODO: If the URL of a datalab *UI* is provided, a request will be made to attempt + If the URL of a datalab *UI* is provided, a request will be made to attempt to resolve the underlying API URL (e.g., `https://demo-api.datalab-org.io` will 'redirect' to `https://demo-api.datalab-org.io`). log_level: The logging level to use for the client. Defaults to "WARNING". @@ -204,3 +213,139 @@ def __enter__(self): def __exit__(self, *_): if self._session is not None: self._session.close() + + def _handle_response( + self, response: httpx.Response, url: str, expected_status: int = 200 + ) -> dict[str, Any]: + """Handle HTTP response with consistent error handling. + + Args: + response: The HTTP response object + url: The URL that was requested + expected_status: The expected HTTP status code (default: 200) + + Returns: + The JSON response data + + Raises: + DuplicateItemError: For 409 conflicts or duplicate key errors + DatalabAPIError: For other API errors + """ + # Handle authentication errors first + if response.status_code in (401, 403): + auth_help = ( + "Authentication failed. Please check your API key and ensure it's set correctly. " + "You can set your API key using the DATALAB_API_KEY environment variable or " + "the instance-specific _DATALAB_API_KEY variable." + ) + try: + error_data = response.json() + error_message = error_data.get("message", str(response.content)) + except ValueError: + error_message = str(response.content) + raise DatalabAPIError( + f"HTTP {response.status_code} error at {url}: {error_message}. {auth_help}" + ) + + # Handle other HTTP status code errors + if response.status_code != expected_status: + try: + error_data = response.json() + error_message = error_data.get("message", str(response.content)) + error_message += f"\nFull JSON: {error_data}" + except ValueError: + error_message = str(response.content) + + # Handle specific error cases with helpful messages + if response.status_code == 404: + raise DatalabAPIError( + f"HTTP 404 Not Found at {url}: {error_message}. " + "Please check the URL/endpoint is correct and the resource exists." + ) + elif response.status_code == 409: + raise DuplicateItemError(f"Duplicate item error at {url}: {error_message}") + elif response.status_code == 429: + raise DatalabAPIError( + f"HTTP 429 Too Many Requests at {url}: {error_message}. " + "You are being rate limited. Please wait before making more requests." + ) + elif response.status_code == 500: + server_info = "" + if hasattr(self, "_datalab_server_version"): + server_info = f" (Server version: {self._datalab_server_version})" + raise DatalabAPIError( + f"""HTTP 500 Internal Server Error at {url} {server_info}. +This is likely a server-side bug. Please report this issue to the datalab developers at https://github.com/datalab-org/datalab/issues with the full error message below: + +\n{error_message}""" + ) + elif response.status_code == 502: + raise DatalabAPIError( + f"HTTP 502 Bad Gateway at {url}: {error_message}. " + "The server is having connectivity issues. Please try again later." + ) + elif response.status_code == 503: + raise DatalabAPIError( + f"HTTP 503 Service Unavailable at {url}: {error_message}. " + "The datalab service is temporarily unavailable. Please report deployment " + "issues to your datalab instance maintainer." + ) + elif response.status_code == 504: + raise DatalabAPIError( + f"HTTP 504 Gateway Timeout at {url}: {error_message}. " + "The request timed out. Please try again or check if the operation is taking too long." + ) + else: + raise DatalabAPIError( + f"HTTP {response.status_code} error at {url}: {error_message}" + ) + + # Parse JSON response + try: + data = response.json() + except ValueError as e: + raise DatalabAPIError( + f"Invalid JSON response from {url}: {e}. Response content: {response.content}" + ) + + return data + + def _request( + self, method: str, url: str, expected_status: int = 200, **kwargs + ) -> dict[str, Any]: + """Make an HTTP request with consistent error handling. + + Args: + method: HTTP method (GET, POST, PUT, etc.) + url: The URL to request + expected_status: The expected HTTP status code (default: 200) + **kwargs: Additional arguments to pass to the request + + Returns: + The JSON response data + """ + try: + response = self.session.request(method, url, follow_redirects=True, **kwargs) + return self._handle_response(response, url, expected_status) + except (httpx.RequestError, httpx.HTTPStatusError) as e: + raise DatalabAPIError(f"Request failed for {url}: {e}") + + def _get(self, url: str, **kwargs) -> dict[str, Any]: + """Make a GET request with error handling.""" + return self._request("GET", url, **kwargs) + + def _post(self, url: str, expected_status: int = 200, **kwargs) -> dict[str, Any]: + """Make a POST request with error handling.""" + return self._request("POST", url, expected_status, **kwargs) + + def _put(self, url: str, expected_status: int = 201, **kwargs) -> dict[str, Any]: + """Make a PUT request with error handling.""" + return self._request("PUT", url, expected_status, **kwargs) + + def _patch(self, url: str, **kwargs) -> dict[str, Any]: + """Make a PATCH request with error handling.""" + return self._request("PATCH", url, **kwargs) + + def _delete(self, url: str, expected_status: int = 200, **kwargs) -> dict[str, Any]: + """Make a DELETE request with error handling.""" + return self._request("DELETE", url, expected_status, **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index 957b2b3..e43d170 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,82 @@ import json import os +import respx +from httpx import Response from pytest import fixture +@fixture +def fake_api_url(): + return "https://api.datalab.industries" + + +@fixture +def fake_ui_url(): + return "https://ui.datalab.industries" + + +@fixture +def mocked_api( + fake_api_url, + fake_user_json, + fake_info_json, + fake_block_info_json, + fake_samples_json, + fake_sample_json, + fake_collection_json, +): + with respx.mock(base_url=fake_api_url, assert_all_called=False) as respx_mock: + fake_api = respx_mock.get("/", name="api") + fake_api.return_value = Response(200, content="") + + fake_authentication = respx_mock.get("/get-current-user", name="current_user") + fake_authentication.return_value = Response(200, json=fake_user_json) + + fake_info_api = respx_mock.get("/info", name="info") + fake_info_api.return_value = Response(200, json=fake_info_json) + + fake_block_info_api = respx_mock.get("/info/blocks", name="info-blocks") + fake_block_info_api.return_value = Response(200, json=fake_block_info_json) + + fake_samples_api = respx_mock.get("/samples", name="samples") + fake_samples_api.return_value = Response(200, json=fake_samples_json) + + fake_item_api = respx_mock.get("/get-item-data/KUVEKJ", name="sample-KUVEKJ") + fake_item_api.return_value = Response(200, json=fake_sample_json) + + fake_missing_item_api = respx_mock.get("/get-item-data/TEST", name="sample-missing-TEST") + fake_missing_item_api.return_value = Response( + 404, + json={ + "message": "No matching items for match={'item_id': 'TEST'} with current authorization", + "status": "error", + }, + ) + + fake_collection_api = respx_mock.get( + "/collections/test_collection", name="collection-test_collection" + ) + fake_collection_api.return_value = Response(200, json=fake_collection_json) + + # Fake an unhandled exception + fake_internal_error = respx_mock.post("/save-item/", name="bad-save") + fake_internal_error.return_value = Response( + 500, json={"message": "('type',)", "title": "KeyError"} + ) + + yield respx_mock + + +@fixture +def mocked_ui(fake_ui_url, fake_ui_html): + with respx.mock(base_url=fake_ui_url, assert_all_called=False) as respx_mock: + fake_ui = respx_mock.get("/", name="ui-redirect") + fake_ui.return_value = Response(200, content=fake_ui_html) + + yield respx_mock + + @fixture def fake_samples_json(): """Returns a mocked JSON response for the API /samples endpoint.""" @@ -193,6 +266,35 @@ def fake_block_info_json(): ) +@fixture +def fake_user_json(): + """Returns a mocked JSON response from the /get-current-user endpoint.""" + + return json.loads( + """ +{ + "account_status": "active", + "contact_email": null, + "display_name": "Jane Doe", + "identities": [ + { + "display_name": "Jane Doe", + "identifier": "1111111", + "identity_type": "github", + "name": "jane-doe", + "verified": true + } + ], + "immutable_id": "6574f788aabb227db8d1b14e", + "last_modified": null, + "managers": null, + "relationships": null, + "role": "user", + "type": "people" +}""" + ) + + @fixture(scope="session") def fake_api_key(): """Returns a fake API key.""" diff --git a/tests/test_client.py b/tests/test_client.py index b091eac..0fc044d 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,81 +1,60 @@ import pytest import respx -from httpx import Response from datalab_api import DatalabClient +from datalab_api._base import DatalabAPIError -@respx.mock -def test_redirect_url(fake_ui_html, fake_info_json, fake_block_info_json): - fake_ui = respx.get("https://ui.datalab.industries").mock( - return_value=Response(200, content=fake_ui_html) - ) - fake_info_api = respx.get("https://api.datalab.industries/info").mock( - return_value=Response(200, json=fake_info_json) - ) - fake_block_info_api = respx.get("https://api.datalab.industries/info/blocks").mock( - return_value=Response(200, json=fake_block_info_json) - ) +def test_redirect_url(mocked_api, mocked_ui, fake_api_url, fake_ui_url): with pytest.warns( UserWarning, match="^Found API URL https://api.datalab.industries in HTML meta tag. Creating client with this URL instead.$", ): - client = DatalabClient("https://ui.datalab.industries") - assert fake_ui.called - assert fake_info_api.called - assert fake_block_info_api.called - assert client.datalab_api_url == "https://api.datalab.industries" + client = DatalabClient(fake_ui_url) + assert mocked_ui["ui-redirect"].called + assert mocked_api["info"].called + assert mocked_api["info-blocks"].called + assert client.datalab_api_url == fake_api_url + + +def test_sample_operations(mocked_api, fake_api_url): + with DatalabClient(fake_api_url) as client: + assert mocked_api["api"].called + assert mocked_api["info"].called + assert mocked_api["info-blocks"].called + + client.authenticate() + assert mocked_api["current_user"].called -@respx.mock -def test_sample_list(fake_samples_json, fake_info_json, fake_sample_json, fake_block_info_json): - fake_api = respx.get("https://api.datalab.industries/").mock( - return_value=Response(200, content="") - ) - fake_samples_api = respx.get("https://api.datalab.industries/samples").mock( - return_value=Response(200, json=fake_samples_json) - ) - fake_info_api = respx.get("https://api.datalab.industries/info").mock( - return_value=Response(200, json=fake_info_json) - ) - fake_block_info_api = respx.get("https://api.datalab.industries/info/blocks").mock( - return_value=Response(200, json=fake_block_info_json) - ) - fake_item_api = respx.get("https://api.datalab.industries/get-item-data/KUVEKJ").mock( - return_value=Response(200, json=fake_sample_json) - ) - with DatalabClient("https://api.datalab.industries") as client: samples = client.get_items(display=True) - assert fake_samples_api.called - assert fake_api.called - assert fake_info_api.called - assert fake_block_info_api.called + assert mocked_api["samples"].called assert len(samples) assert samples[0]["item_id"] == "test" + sample = client.get_item("KUVEKJ", display=True) - assert fake_item_api.called + assert mocked_api["sample-KUVEKJ"].called assert sample["item_id"] == "KUVEKJ" + # Check that the full server error message is shown for a missing item + with pytest.raises( + DatalabAPIError, + match="No matching items for match={'item_id': 'TEST'} with current authorization", + ): + client.get_item("TEST", display=True) + assert mocked_api["sample-missing-TEST"].called -@respx.mock -def test_collection_get(fake_info_json, fake_block_info_json, fake_collection_json): - fake_api = respx.get("https://api.datalab.industries/").mock( - return_value=Response(200, content="") - ) - fake_info_api = respx.get("https://api.datalab.industries/info").mock( - return_value=Response(200, json=fake_info_json) - ) - fake_block_info_api = respx.get("https://api.datalab.industries/info/blocks").mock( - return_value=Response(200, json=fake_block_info_json) - ) + with pytest.raises(DatalabAPIError, match="KeyError"): + client.update_item(item_id="TEST", item_data={"name": "test"}) + assert mocked_api["bad-save"].called - fake_collection_api = respx.get( - "https://api.datalab.industries/collections/test_collection" - ).mock(return_value=Response(200, json=fake_collection_json)) +@respx.mock +def test_collection_get(fake_api_url, mocked_api): with DatalabClient("https://api.datalab.industries") as client: - collection, children = client.get_collection("test_collection") - assert fake_api.called - assert fake_info_api.called - assert fake_block_info_api.called - assert fake_collection_api.called + assert mocked_api["api"].called + assert mocked_api["info"].called + assert mocked_api["info-blocks"].called + + collection, children = client.get_collection("test_collection", display=True) + assert mocked_api["collection-test_collection"].called