From d00f937d0e2318570c7f7d4d7acb7529449dc847 Mon Sep 17 00:00:00 2001 From: Nirmit Damania Date: Tue, 17 Feb 2026 10:10:37 -0500 Subject: [PATCH 1/5] implement playwright sso authentication --- liminal/cli/utils.py | 2 +- liminal/connection/benchling_connection.py | 10 +- liminal/connection/benchling_service.py | 107 ++++++++++++++++++--- pyproject.toml | 1 + uv.lock | 33 +++++++ 5 files changed, 138 insertions(+), 15 deletions(-) diff --git a/liminal/cli/utils.py b/liminal/cli/utils.py index 9ebccf3..8efa76b 100644 --- a/liminal/cli/utils.py +++ b/liminal/cli/utils.py @@ -12,7 +12,7 @@ def _check_liminal_directory_initialized(liminal_dir_path: Path) -> None: """Raises an exception if the liminal directory does not exist at the given path.""" if not liminal_dir_path.exists() or not liminal_dir_path.is_dir(): raise Exception( - "Liminal directory not found at current working directory. Run `liminal init` or check your current working directory." + "/liminal directory not found at current working directory where `liminal` command was run. Run `liminal init` or ensure that your current working directory is where the /liminal environment is located." ) else: if not (liminal_dir_path / "env.py").exists(): diff --git a/liminal/connection/benchling_connection.py b/liminal/connection/benchling_connection.py index f75c009..e5d6ac6 100644 --- a/liminal/connection/benchling_connection.py +++ b/liminal/connection/benchling_connection.py @@ -37,9 +37,14 @@ class BenchlingConnection(BaseModel): warehouse_connection_string: str | None = None The connection string for the warehouse. internal_api_admin_email: str | None = None - The email of the internal API admin. + The email of the internal API admin. If SSO is not enabled or not required on your Benchling tenant, this email is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. internal_api_admin_password: str | None = None - The password of the internal API admin. + The password of the internal API admin. If SSO is not enabled or not required on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. + chrome_profile_data_dir: str | None = ".liminal_chrome_data/" + The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, + Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. + This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. + Set this to None in order to disable playwright's persistent context. fieldsets: bool = False Whether your Benchling tenant has access to fieldsets. config_flags: TenantConfigFlags = TenantConfigFlags() @@ -54,6 +59,7 @@ class BenchlingConnection(BaseModel): warehouse_connection_string: str | None = None internal_api_admin_email: str | None = None internal_api_admin_password: str | None = None + chrome_profile_data_dir: str | None = "liminal/.liminal_chrome_data/" fieldsets: bool = False config_flags: TenantConfigFlags = TenantConfigFlags() diff --git a/liminal/connection/benchling_service.py b/liminal/connection/benchling_service.py index c2e4b3b..61fde35 100644 --- a/liminal/connection/benchling_service.py +++ b/liminal/connection/benchling_service.py @@ -1,6 +1,8 @@ +import asyncio import logging from typing import Any +from playwright.async_api import async_playwright import requests from benchling_sdk.auth.client_credentials_oauth2 import ClientCredentialsOAuth2 from benchling_sdk.benchling import Benchling @@ -92,16 +94,23 @@ def __init__( connection.internal_api_admin_email and connection.internal_api_admin_password ): - csrf_token, session = self.autogenerate_auth( - connection.tenant_name, - connection.internal_api_admin_email, - connection.internal_api_admin_password, - ) + try: + authenticated_session = asyncio.get_event_loop().run_until_complete( + self.autogenerate_auth( + connection.tenant_name, + connection.internal_api_admin_email, + connection.internal_api_admin_password, + connection.chrome_profile_data_dir, + ) + ) + except RuntimeError as e: + raise RuntimeError( + f"{e}. If you are running this in a Jupyter notebook, use `nest_asyncio.apply()` to allow the async playwright login to run." + ) self.custom_post_cookies = { - "session": session, + "session": authenticated_session, } self.custom_post_headers = { - "X-Csrftoken": csrf_token, "Referer": f"https://{connection.tenant_name}.benchling.com/", "Content-Type": "application/json", } @@ -249,6 +258,78 @@ def upsert_remote_revision_id(self, revision_id: str) -> bool: f"Error finding field on {REMOTE_LIMINAL_SCHEMA_NAME} schema with warehouse_name {REMOTE_REVISION_ID_FIELD_WH_NAME}. Check schema fields to ensure this field exists and is defined according to documentation." ) + @classmethod + async def autogenerate_auth( + cls, + benchling_tenant: str, + email: str, + password: str, + chrome_profile_data_dir: str | None = None, + ) -> str: + with requests.Session() as session: + signin_page = session.get( + f"https://{benchling_tenant}.benchling.com/signin", + allow_redirects=False, + ) + if signin_page.status_code == 302: + authenticated_session = ( + await cls.get_authenticated_session_sso_login_playwright( + benchling_tenant, chrome_profile_data_dir + ) + ) + elif signin_page.status_code == 200: + authenticated_session = ( + cls.get_authenticated_session_benchling_admin_login( + benchling_tenant, email, password + ) + ) + else: + raise ValueError( + f"Unexpected response: Status code {signin_page.status_code}: {signin_page.text}" + ) + return authenticated_session + + @classmethod + async def get_authenticated_session_sso_login_playwright( + cls, benchling_tenant: str, chrome_profile_data_dir: str | None = None + ) -> str: + LOGGER.info(f"Log into your {benchling_tenant} Benchling tenant...") + async with async_playwright() as playwright: + if chrome_profile_data_dir: + context = await playwright.chromium.launch_persistent_context( + channel="chrome", + headless=False, + user_data_dir=chrome_profile_data_dir, + ) + else: + browser = await playwright.chromium.launch( + channel="chrome", headless=False + ) + context = await browser.new_context() + page = await context.new_page() + try: + await page.goto(f"https://{benchling_tenant}.benchling.com") + except Exception: + raise ValueError( + f"Error navigating to https://{benchling_tenant}.benchling.com" + ) + try: + await page.wait_for_url( + f"**/{benchling_tenant}.benchling.com/**", timeout=120_000 + ) + except Exception: + raise TimeoutError( + f"Log in cancelled or timed out (2 min timeout). Did not detect SSO log in for https://{benchling_tenant}.benchling.com." + ) + + cookies = await context.cookies() + session_cookie = next( + (c["value"] for c in cookies if c["name"] == "session"), None + ) + if not session_cookie: + raise ValueError("No session cookie found.") + return session_cookie + @classmethod @retry( stop=stop_after_attempt(3), @@ -256,9 +337,9 @@ def upsert_remote_revision_id(self, revision_id: str) -> bool: wait=wait_exponential(multiplier=1, min=1, max=8), reraise=True, ) - def autogenerate_auth( + def get_authenticated_session_benchling_admin_login( cls, benchling_tenant: str, email: str, password: str - ) -> tuple[str, str]: + ) -> str: with requests.Session() as session: homepage = session.get(f"https://{benchling_tenant}.benchling.com/signin") soup = BeautifulSoup(homepage.content, features="lxml") @@ -286,6 +367,8 @@ def autogenerate_auth( raise ValueError( f"Failed to sign in to Benchling: {signin_response.text}" ) - return csrf_token, signin_response.headers["Set-Cookie"].split("; Secure")[ - 0 - ].removeprefix("session=") + return ( + signin_response.headers["Set-Cookie"] + .split("; Secure")[0] + .removeprefix("session=") + ) diff --git a/pyproject.toml b/pyproject.toml index c646392..57c5f8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "psycopg2-binary>=2.9.10,<3", "tornado==6.5.0", "click>=8.0.0,<8.2.0", + "playwright>=1.58.0", ] [virtualenvs] diff --git a/uv.lock b/uv.lock index 0192a2a..faafd1f 100644 --- a/uv.lock +++ b/uv.lock @@ -746,6 +746,7 @@ dependencies = [ { name = "lxml" }, { name = "numpy" }, { name = "pandas" }, + { name = "playwright" }, { name = "psycopg2-binary" }, { name = "pydantic" }, { name = "requests" }, @@ -777,6 +778,7 @@ requires-dist = [ { name = "lxml", specifier = ">=5.3.0,<6" }, { name = "numpy", specifier = ">=1.23.5,<2" }, { name = "pandas", specifier = ">=1.5.3,<3" }, + { name = "playwright", specifier = ">=1.58.0" }, { name = "psycopg2-binary", specifier = ">=2.9.10,<3" }, { name = "pydantic", specifier = ">=2,<=2.11" }, { name = "requests", specifier = ">=2.32.3,<3" }, @@ -1329,6 +1331,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439, upload-time = "2024-09-17T19:06:49.212Z" }, ] +[[package]] +name = "playwright" +version = "1.58.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/c9/9c6061d5703267f1baae6a4647bfd1862e386fbfdb97d889f6f6ae9e3f64/playwright-1.58.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:96e3204aac292ee639edbfdef6298b4be2ea0a55a16b7068df91adac077cc606", size = 42251098, upload-time = "2026-01-30T15:09:24.028Z" }, + { url = "https://files.pythonhosted.org/packages/e0/40/59d34a756e02f8c670f0fee987d46f7ee53d05447d43cd114ca015cb168c/playwright-1.58.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:70c763694739d28df71ed578b9c8202bb83e8fe8fb9268c04dd13afe36301f71", size = 41039625, upload-time = "2026-01-30T15:09:27.558Z" }, + { url = "https://files.pythonhosted.org/packages/e1/ee/3ce6209c9c74a650aac9028c621f357a34ea5cd4d950700f8e2c4b7fe2c4/playwright-1.58.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:185e0132578733d02802dfddfbbc35f42be23a45ff49ccae5081f25952238117", size = 42251098, upload-time = "2026-01-30T15:09:30.461Z" }, + { url = "https://files.pythonhosted.org/packages/f1/af/009958cbf23fac551a940d34e3206e6c7eed2b8c940d0c3afd1feb0b0589/playwright-1.58.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:c95568ba1eda83812598c1dc9be60b4406dffd60b149bc1536180ad108723d6b", size = 46235268, upload-time = "2026-01-30T15:09:33.787Z" }, + { url = "https://files.pythonhosted.org/packages/d9/a6/0e66ad04b6d3440dae73efb39540c5685c5fc95b17c8b29340b62abbd952/playwright-1.58.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f9999948f1ab541d98812de25e3a8c410776aa516d948807140aff797b4bffa", size = 45964214, upload-time = "2026-01-30T15:09:36.751Z" }, + { url = "https://files.pythonhosted.org/packages/0e/4b/236e60ab9f6d62ed0fd32150d61f1f494cefbf02304c0061e78ed80c1c32/playwright-1.58.0-py3-none-win32.whl", hash = "sha256:1e03be090e75a0fabbdaeab65ce17c308c425d879fa48bb1d7986f96bfad0b99", size = 36815998, upload-time = "2026-01-30T15:09:39.627Z" }, + { url = "https://files.pythonhosted.org/packages/41/f8/5ec599c5e59d2f2f336a05b4f318e733077cd5044f24adb6f86900c3e6a7/playwright-1.58.0-py3-none-win_amd64.whl", hash = "sha256:a2bf639d0ce33b3ba38de777e08697b0d8f3dc07ab6802e4ac53fb65e3907af8", size = 36816005, upload-time = "2026-01-30T15:09:42.449Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c4/cc0229fea55c87d6c9c67fe44a21e2cd28d1d558a5478ed4d617e9fb0c93/playwright-1.58.0-py3-none-win_arm64.whl", hash = "sha256:32ffe5c303901a13a0ecab91d1c3f74baf73b84f4bedbb6b935f5bc11cc98e1b", size = 33085919, upload-time = "2026-01-30T15:09:45.71Z" }, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -1597,6 +1618,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/a2/2670964d7046025b96f8c6d35c38e5310ec6aa1681e4158ef31ab21a4727/pydantic_core-2.33.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:68504959253303d3ae9406b634997a2123a0b0c1da86459abbd0ffc921695eac", size = 2082790, upload-time = "2025-03-26T20:30:03.619Z" }, ] +[[package]] +name = "pyee" +version = "13.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/04/e7c1fe4dc78a6fdbfd6c337b1c3732ff543b8a397683ab38378447baa331/pyee-13.0.1.tar.gz", hash = "sha256:0b931f7c14535667ed4c7e0d531716368715e860b988770fc7eb8578d1f67fc8", size = 31655, upload-time = "2026-02-14T21:12:28.044Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/c4/b4d4827c93ef43c01f599ef31453ccc1c132b353284fc6c87d535c233129/pyee-13.0.1-py3-none-any.whl", hash = "sha256:af2f8fede4171ef667dfded53f96e2ed0d6e6bd7ee3bb46437f77e3b57689228", size = 15659, upload-time = "2026-02-14T21:12:26.263Z" }, +] + [[package]] name = "pygments" version = "2.18.0" From cd1784af7ba50a0332654604b2d3590eec0b2ad8 Mon Sep 17 00:00:00 2001 From: Nirmit Damania Date: Mon, 30 Mar 2026 09:32:56 -0400 Subject: [PATCH 2/5] clean up logic, store playwright in root directory, and update docs --- docs/getting-started/installation.md | 6 ++ docs/getting-started/prerequisites.md | 8 +- docs/getting-started/setup.md | 10 +-- liminal/cli/utils.py | 4 - liminal/connection/benchling_connection.py | 8 +- liminal/connection/benchling_service.py | 97 ++++++++++++---------- 6 files changed, 68 insertions(+), 65 deletions(-) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 20c3bf0..c613e4b 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -4,6 +4,12 @@ Install Liminal via pip: pip install liminal-orm ``` +Install Liminal via uv: + +```bash +uv add liminal-orm +``` + Install Liminal via github: ```bash diff --git a/docs/getting-started/prerequisites.md b/docs/getting-started/prerequisites.md index cacf78d..a40e4fa 100644 --- a/docs/getting-started/prerequisites.md +++ b/docs/getting-started/prerequisites.md @@ -1,10 +1,6 @@ -1. **Benchling Admin Account**: Liminal builds on top of Benchling's LIMS system. You will need access and credentials to an admin account for your Benchling tenant(s). Liminal needs credentials with full admin priveleges in order to manipulate Benchling schemas through their API. +1. **Benchling Admin Account**: Liminal builds on top of Benchling's LIMS system. You will need access and credentials to an admin account for your Benchling tenant(s). Liminal needs admin priveleges in order to manipulate Benchling schemas through their API. -2. **SSO optional**: A requirement for Liminal's migration service to work is for your Benchling tenant to have SSO optional (or disabled). At the moment, a part of Liminal's API connection requires an admin email and password login (non-SSO). You can message Benchling support to request that your tenant be configured to be SSO optional (or disabled). - -Note that as a Benchling admin, you can enforce SSO for all users and create only a single non-SSO user for Liminal to use. This is what we recommend to maintain the highest level of security. - -3. **Python**: Liminal is built using Python. You will need Python 3.9 or later installed on your machine. +2. **Python**: Liminal is built using Python. You will need Python 3.9 or later installed on your machine. ### Notes diff --git a/docs/getting-started/setup.md b/docs/getting-started/setup.md index 17dc46b..7357d9e 100644 --- a/docs/getting-started/setup.md +++ b/docs/getting-started/setup.md @@ -9,18 +9,15 @@ 3. Populate the `env.py` file with your Benchling connection information, following the instructions in the file. For example: ```python - from liminal.connection import BenchlingConnection, TenantConfigFlags + from liminal.connection import BenchlingConnection # It is highly recommended to use a secrets manager to store your credentials. prod_connection = BenchlingConnection( - tenant_name="pizzahouse-prod", + tenant_name="pizzahouseprod", tenant_alias="prod", api_client_id="my-secret-api-client-id", api_client_secret="my-secret-api-client-secret", warehouse_connection_string="...", - internal_api_admin_email="my-secret-internal-api-admin-email", - internal_api_admin_password="my-secret-internal-api-admin-password", - config_flags=TenantConfigFlags(...) ) staging_connection = BenchlingConnection(...) @@ -29,7 +26,8 @@ ``` * **Required**: The `api_client_id` and `api_client_secret` are used to connect to Benchling's SDK. For more information, see the [Benchling API documentation](https://docs.benchling.com/docs/getting-started-benchling-apps#calling-the-api-as-an-app). - * **Required**: The `internal_api_admin_email` and `internal_api_admin_password` are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. + * **Required**: If your tenant has SSO turned off, `internal_api_admin_email` and `internal_api_admin_password` are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. + If your tenant has SSO set to optional or required, these properties are optional and Liminal will use playwright, which prompts the user to log in through a browser. * Optional: The `warehouse_connection_string` is used to connect to Benchling's read-only warehouse. If you have access, set this as the connection string for the warehouse. * Optional: The `config_flags` parameter is used to set tenant-specific configuration flags. For more information, see the [BenchlingConnection](../reference/benchling-connection.md) reference. * Set `schemas_enable_change_warehouse_name` to `True` if you want to enable changing schema and field warehouse names. diff --git a/liminal/cli/utils.py b/liminal/cli/utils.py index 8efa76b..f1f6b59 100644 --- a/liminal/cli/utils.py +++ b/liminal/cli/utils.py @@ -67,10 +67,6 @@ def _read_local_env_file( raise Exception( "api_client_id and api_client_secret must be provided in BenchlingConnection in liminal/env.py. This is necessary for the migration service." ) - if not bc.internal_api_admin_email or not bc.internal_api_admin_password: - raise Exception( - "internal_api_admin_email and internal_api_admin_password must be provided in BenchlingConnection in liminal/env.py. This is necessary for the migration service." - ) return bc raise Exception( f"BenchlingConnection with tenant name or alias {benchling_tenant} not found in liminal/env.py. Please update the env.py file with a correctly defined BenchlingConnection." diff --git a/liminal/connection/benchling_connection.py b/liminal/connection/benchling_connection.py index e5d6ac6..9158fec 100644 --- a/liminal/connection/benchling_connection.py +++ b/liminal/connection/benchling_connection.py @@ -37,10 +37,10 @@ class BenchlingConnection(BaseModel): warehouse_connection_string: str | None = None The connection string for the warehouse. internal_api_admin_email: str | None = None - The email of the internal API admin. If SSO is not enabled or not required on your Benchling tenant, this email is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. + The email of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this email is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. internal_api_admin_password: str | None = None - The password of the internal API admin. If SSO is not enabled or not required on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. - chrome_profile_data_dir: str | None = ".liminal_chrome_data/" + The password of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. + chrome_profile_data_dir: str | None = "~/.liminal/chrome_data/" The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. @@ -59,7 +59,7 @@ class BenchlingConnection(BaseModel): warehouse_connection_string: str | None = None internal_api_admin_email: str | None = None internal_api_admin_password: str | None = None - chrome_profile_data_dir: str | None = "liminal/.liminal_chrome_data/" + chrome_profile_data_dir: str | None = "~/.liminal/playwright_chrome_data/" fieldsets: bool = False config_flags: TenantConfigFlags = TenantConfigFlags() diff --git a/liminal/connection/benchling_service.py b/liminal/connection/benchling_service.py index 61fde35..969fffb 100644 --- a/liminal/connection/benchling_service.py +++ b/liminal/connection/benchling_service.py @@ -1,5 +1,6 @@ import asyncio import logging +import os from typing import Any from playwright.async_api import async_playwright @@ -34,6 +35,10 @@ REMOTE_REVISION_ID_FIELD_WH_NAME = "revision_id" +class SSODisabledError(ValueError): + pass + + class BenchlingService(Benchling): """ Class that creates a connection object that can be used to connect to Benchling's API, database, or internal API. @@ -90,37 +95,33 @@ def __init__( ) self.use_internal_api = use_internal_api if use_internal_api: - if ( - connection.internal_api_admin_email - and connection.internal_api_admin_password - ): - try: - authenticated_session = asyncio.get_event_loop().run_until_complete( - self.autogenerate_auth( - connection.tenant_name, - connection.internal_api_admin_email, - connection.internal_api_admin_password, - connection.chrome_profile_data_dir, - ) - ) - except RuntimeError as e: - raise RuntimeError( - f"{e}. If you are running this in a Jupyter notebook, use `nest_asyncio.apply()` to allow the async playwright login to run." + try: + authenticated_session = asyncio.get_event_loop().run_until_complete( + self.autogenerate_auth( + connection.tenant_name, + connection.internal_api_admin_email, + connection.internal_api_admin_password, + connection.chrome_profile_data_dir, ) - self.custom_post_cookies = { - "session": authenticated_session, - } - self.custom_post_headers = { - "Referer": f"https://{connection.tenant_name}.benchling.com/", - "Content-Type": "application/json", - } - LOGGER.info( - f"Tenant {connection.tenant_name}: Connected to Benchling internal API." ) - else: - raise ValueError( - "use_internal_api is True but internal_api_admin_email and internal_api_admin_password not provided in BenchlingConnection." + except SSODisabledError as e: + raise SSODisabledError( + f"{e} Please provide `internal_api_admin_email` and `internal_api_admin_password` in your BenchlingConnection." + ) + except RuntimeError as e: + raise RuntimeError( + f"{e}. If you are running this in a Jupyter notebook, use `nest_asyncio.apply()` to allow the async playwright login to run." ) + self.custom_post_cookies = { + "session": authenticated_session, + } + self.custom_post_headers = { + "Referer": f"https://{connection.tenant_name}.benchling.com/", + "Content-Type": "application/json", + } + LOGGER.info( + f"Tenant {connection.tenant_name}: Connected to Benchling internal API." + ) @property def session(self) -> Session: @@ -262,32 +263,38 @@ def upsert_remote_revision_id(self, revision_id: str) -> bool: async def autogenerate_auth( cls, benchling_tenant: str, - email: str, - password: str, + email: str | None = None, + password: str | None = None, chrome_profile_data_dir: str | None = None, ) -> str: with requests.Session() as session: - signin_page = session.get( - f"https://{benchling_tenant}.benchling.com/signin", - allow_redirects=False, - ) - if signin_page.status_code == 302: - authenticated_session = ( - await cls.get_authenticated_session_sso_login_playwright( - benchling_tenant, chrome_profile_data_dir - ) + if email and password: + signin_page = session.get( + f"https://{benchling_tenant}.benchling.com/signin", + allow_redirects=False, ) - elif signin_page.status_code == 200: - authenticated_session = ( - cls.get_authenticated_session_benchling_admin_login( + if signin_page.status_code == 200: + return cls.get_authenticated_session_benchling_admin_login( benchling_tenant, email, password ) + + else: + signin_page = session.get( + f"https://{benchling_tenant}.benchling.com/ext/saml/signin:begin", + allow_redirects=False, + ) + if signin_page.status_code == 403: + raise SSODisabledError( + f"admin_email and admin_password not provided when sso is turned off for Benchling tenant {benchling_tenant}." + ) + if signin_page.status_code == 302: + return await cls.get_authenticated_session_sso_login_playwright( + benchling_tenant, chrome_profile_data_dir ) else: raise ValueError( f"Unexpected response: Status code {signin_page.status_code}: {signin_page.text}" ) - return authenticated_session @classmethod async def get_authenticated_session_sso_login_playwright( @@ -299,7 +306,7 @@ async def get_authenticated_session_sso_login_playwright( context = await playwright.chromium.launch_persistent_context( channel="chrome", headless=False, - user_data_dir=chrome_profile_data_dir, + user_data_dir=os.path.expanduser(chrome_profile_data_dir), ) else: browser = await playwright.chromium.launch( @@ -315,7 +322,7 @@ async def get_authenticated_session_sso_login_playwright( ) try: await page.wait_for_url( - f"**/{benchling_tenant}.benchling.com/**", timeout=120_000 + f"**/{benchling_tenant}.benchling.com/**", timeout=300_000 ) except Exception: raise TimeoutError( From e503c3531a5df67a9614355108b76956f95a890b Mon Sep 17 00:00:00 2001 From: Nirmit Damania Date: Fri, 17 Apr 2026 09:07:56 -0400 Subject: [PATCH 3/5] update docs --- docs/getting-started/setup.md | 6 +++--- docs/reference/benchling-connection.md | 15 ++++++++++----- liminal/connection/benchling_connection.py | 2 +- liminal/connection/benchling_service.py | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/getting-started/setup.md b/docs/getting-started/setup.md index 7357d9e..88631b5 100644 --- a/docs/getting-started/setup.md +++ b/docs/getting-started/setup.md @@ -26,9 +26,9 @@ ``` * **Required**: The `api_client_id` and `api_client_secret` are used to connect to Benchling's SDK. For more information, see the [Benchling API documentation](https://docs.benchling.com/docs/getting-started-benchling-apps#calling-the-api-as-an-app). - * **Required**: If your tenant has SSO turned off, `internal_api_admin_email` and `internal_api_admin_password` are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. - If your tenant has SSO set to optional or required, these properties are optional and Liminal will use playwright, which prompts the user to log in through a browser. - * Optional: The `warehouse_connection_string` is used to connect to Benchling's read-only warehouse. If you have access, set this as the connection string for the warehouse. + * **Required**: If your tenant has SSO set to optional or required, `internal_api_admin_email` and `internal_api_admin_password` are optional and Liminal will use playwright, which prompts the user to log in through a browser session that pops up automatically when Liminal is run. + If your tenant has SSO turned off, these properties are required to be set are are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. + * Optional: The `warehouse_connection_string` is used to connect to Benchling's read-only warehouse. If you have warehouse access, set this as the connection string for the warehouse. * Optional: The `config_flags` parameter is used to set tenant-specific configuration flags. For more information, see the [BenchlingConnection](../reference/benchling-connection.md) reference. * Set `schemas_enable_change_warehouse_name` to `True` if you want to enable changing schema and field warehouse names. diff --git a/docs/reference/benchling-connection.md b/docs/reference/benchling-connection.md index 92bdd11..2d96701 100644 --- a/docs/reference/benchling-connection.md +++ b/docs/reference/benchling-connection.md @@ -1,6 +1,6 @@ ## BenchlingConnection: [class](https://github.com/dynotx/liminal-orm/blob/main/liminal/connection/benchling_connection.py) -The `BenchlingConnection` class is used to define the connection information for a particular Benchling tenant. The BenchlingConnection class is defined in your `env.py` file and it also used to create a BenchlingService object. In the `env.py` file, the api_client and internal_api parameters are required for the BenchlingConnection object in orderto be used in the migration service. The BenchlingService can be imported from the liminal pacakage and be used to connect to [Benchling's SDK](https://docs.benchling.com/docs/getting-started-with-the-sdk), internal API, and/or Postgres warehouse. +The `BenchlingConnection` class is used to define the connection information for a particular Benchling tenant. The BenchlingConnection class is defined in your `env.py` file and it also used to create a BenchlingService object. In the `env.py` file, the api_client is required for the BenchlingConnection object in orderto be used in the migration service. The BenchlingService can be imported from the liminal pacakage and be used to connect to [Benchling's SDK](https://docs.benchling.com/docs/getting-started-with-the-sdk), internal API, and/or Postgres warehouse. ```python # Example BenchlingConnection definition @@ -13,8 +13,6 @@ connection = BenchlingConnection( api_client_id="my-secret-api-client-id", api_client_secret="my-secret-api-client-secret", warehouse_connection_string="my-warehouse-connection-string", - internal_api_admin_email="my-secret-internal-api-admin-email", - internal_api_admin_password="my-secret-internal-api-admin-password", config_flags=TenantConfigFlags() ) ``` @@ -43,11 +41,18 @@ connection = BenchlingConnection( - **internal_api_admin_email: Optional[str] = None** - The email of the internal API admin. + The email of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this email is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. - **internal_api_admin_password: Optional[str] = None** - The password of the internal API admin. + The password of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. + +- **chrome_profile_data_dir: Optional[str] = "~/.liminal/playwright_chrome_data/"** + + The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, + Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. + This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. + Set this to None in order to disable playwright's persistent context which enables automatic login. - **fieldsets: bool = False** diff --git a/liminal/connection/benchling_connection.py b/liminal/connection/benchling_connection.py index 9158fec..4a70b4b 100644 --- a/liminal/connection/benchling_connection.py +++ b/liminal/connection/benchling_connection.py @@ -44,7 +44,7 @@ class BenchlingConnection(BaseModel): The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. - Set this to None in order to disable playwright's persistent context. + Set this to None in order to disable playwright's persistent context which enables automatic login. fieldsets: bool = False Whether your Benchling tenant has access to fieldsets. config_flags: TenantConfigFlags = TenantConfigFlags() diff --git a/liminal/connection/benchling_service.py b/liminal/connection/benchling_service.py index 969fffb..a64e2c8 100644 --- a/liminal/connection/benchling_service.py +++ b/liminal/connection/benchling_service.py @@ -322,7 +322,7 @@ async def get_authenticated_session_sso_login_playwright( ) try: await page.wait_for_url( - f"**/{benchling_tenant}.benchling.com/**", timeout=300_000 + f"**/{benchling_tenant}.benchling.com/**", timeout=600_000 ) except Exception: raise TimeoutError( From ccce045195fa0b7c22f54a110be734df0c591224 Mon Sep 17 00:00:00 2001 From: Nirmit Damania Date: Fri, 17 Apr 2026 09:31:58 -0400 Subject: [PATCH 4/5] rename property to playwright_data_dir --- docs/reference/benchling-connection.md | 6 +++--- liminal/connection/benchling_connection.py | 6 +++--- liminal/connection/benchling_service.py | 19 +++++++++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/docs/reference/benchling-connection.md b/docs/reference/benchling-connection.md index 2d96701..a3eac24 100644 --- a/docs/reference/benchling-connection.md +++ b/docs/reference/benchling-connection.md @@ -47,11 +47,11 @@ connection = BenchlingConnection( The password of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. -- **chrome_profile_data_dir: Optional[str] = "~/.liminal/playwright_chrome_data/"** +- **playwright_data_dir: Optional[str] = "~/.liminal/playwright_chrome_data/"** - The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, + The directory to store the playwright browser user data. If SSO is enabled and required on your Benchling tenant, Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. - This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. + This directory is used to store playwright's persistent context, allowing the user to set up a persistent Chrome user profile. Set this to None in order to disable playwright's persistent context which enables automatic login. - **fieldsets: bool = False** diff --git a/liminal/connection/benchling_connection.py b/liminal/connection/benchling_connection.py index 4a70b4b..1492fa6 100644 --- a/liminal/connection/benchling_connection.py +++ b/liminal/connection/benchling_connection.py @@ -40,8 +40,8 @@ class BenchlingConnection(BaseModel): The email of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this email is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. internal_api_admin_password: str | None = None The password of the internal API admin. If SSO is not enabled or optional on your Benchling tenant, this password is used to log in to Benchling, and give Liminal the authenticated internal API session cookie. - chrome_profile_data_dir: str | None = "~/.liminal/chrome_data/" - The directory to store the Chrome profile data for playwright. If SSO is enabled and required on your Benchling tenant, + playwright_data_dir: str | None = "~/.liminal/chrome_data/" + The directory to store the playwright browser user data. If SSO is enabled and required on your Benchling tenant, Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. This directory is used to store playwright's persistent context, allowing the user to set up a persistent chrome profile. Set this to None in order to disable playwright's persistent context which enables automatic login. @@ -59,7 +59,7 @@ class BenchlingConnection(BaseModel): warehouse_connection_string: str | None = None internal_api_admin_email: str | None = None internal_api_admin_password: str | None = None - chrome_profile_data_dir: str | None = "~/.liminal/playwright_chrome_data/" + playwright_data_dir: str | None = "~/.liminal/playwright_chrome_data/" fieldsets: bool = False config_flags: TenantConfigFlags = TenantConfigFlags() diff --git a/liminal/connection/benchling_service.py b/liminal/connection/benchling_service.py index a64e2c8..87faabc 100644 --- a/liminal/connection/benchling_service.py +++ b/liminal/connection/benchling_service.py @@ -101,7 +101,7 @@ def __init__( connection.tenant_name, connection.internal_api_admin_email, connection.internal_api_admin_password, - connection.chrome_profile_data_dir, + connection.playwright_data_dir, ) ) except SSODisabledError as e: @@ -265,8 +265,11 @@ async def autogenerate_auth( benchling_tenant: str, email: str | None = None, password: str | None = None, - chrome_profile_data_dir: str | None = None, + playwright_data_dir: str | None = None, ) -> str: + """Logs in to Benchling using the admin email and password or playwright and returns the session cookie. + If email and password are not passed in or if SSO is set to required on the Benchling tenant, playwright is used to log in. + Otherwise, the admin email and password are used to log in.""" with requests.Session() as session: if email and password: signin_page = session.get( @@ -289,7 +292,7 @@ async def autogenerate_auth( ) if signin_page.status_code == 302: return await cls.get_authenticated_session_sso_login_playwright( - benchling_tenant, chrome_profile_data_dir + benchling_tenant, playwright_data_dir ) else: raise ValueError( @@ -298,15 +301,17 @@ async def autogenerate_auth( @classmethod async def get_authenticated_session_sso_login_playwright( - cls, benchling_tenant: str, chrome_profile_data_dir: str | None = None + cls, benchling_tenant: str, playwright_data_dir: str | None = None ) -> str: + """Logs in to Benchling using playwright and returns the session cookie. + This can be used when SSO is enabled and required on the Benchling tenant.""" LOGGER.info(f"Log into your {benchling_tenant} Benchling tenant...") async with async_playwright() as playwright: - if chrome_profile_data_dir: + if playwright_data_dir: context = await playwright.chromium.launch_persistent_context( channel="chrome", headless=False, - user_data_dir=os.path.expanduser(chrome_profile_data_dir), + user_data_dir=os.path.expanduser(playwright_data_dir), ) else: browser = await playwright.chromium.launch( @@ -347,6 +352,8 @@ async def get_authenticated_session_sso_login_playwright( def get_authenticated_session_benchling_admin_login( cls, benchling_tenant: str, email: str, password: str ) -> str: + """Logs in to Benchling using the admin email and password and returns the session cookie. + This can be used when SSO is disabled or optional on the Benchling tenant.""" with requests.Session() as session: homepage = session.get(f"https://{benchling_tenant}.benchling.com/signin") soup = BeautifulSoup(homepage.content, features="lxml") From d55713d721334566332e97b2847e1e5975f19beb Mon Sep 17 00:00:00 2001 From: Nirmit Damania Date: Mon, 11 May 2026 20:34:39 -0400 Subject: [PATCH 5/5] update docs --- docs/getting-started/setup.md | 4 ++-- docs/reference/benchling-connection.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/getting-started/setup.md b/docs/getting-started/setup.md index 88631b5..c60eaa1 100644 --- a/docs/getting-started/setup.md +++ b/docs/getting-started/setup.md @@ -26,8 +26,8 @@ ``` * **Required**: The `api_client_id` and `api_client_secret` are used to connect to Benchling's SDK. For more information, see the [Benchling API documentation](https://docs.benchling.com/docs/getting-started-benchling-apps#calling-the-api-as-an-app). - * **Required**: If your tenant has SSO set to optional or required, `internal_api_admin_email` and `internal_api_admin_password` are optional and Liminal will use playwright, which prompts the user to log in through a browser session that pops up automatically when Liminal is run. - If your tenant has SSO turned off, these properties are required to be set are are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. + * **Required**: If your tenant has SSO set to optional or required, Liminal will prompt the user to log in through a playwright browser session that pops up automatically when Liminal is run. This ensures Liminal uses the user's Benchling authentication. + If your tenant has SSO turned off, `internal_api_admin_email` and `internal_api_admin_password` properties are required to be set and are used to connect to Benchling's API for the migration service. This must be the email and password used to log in to an Admin account. * Optional: The `warehouse_connection_string` is used to connect to Benchling's read-only warehouse. If you have warehouse access, set this as the connection string for the warehouse. * Optional: The `config_flags` parameter is used to set tenant-specific configuration flags. For more information, see the [BenchlingConnection](../reference/benchling-connection.md) reference. * Set `schemas_enable_change_warehouse_name` to `True` if you want to enable changing schema and field warehouse names. diff --git a/docs/reference/benchling-connection.md b/docs/reference/benchling-connection.md index a3eac24..96a5504 100644 --- a/docs/reference/benchling-connection.md +++ b/docs/reference/benchling-connection.md @@ -50,7 +50,7 @@ connection = BenchlingConnection( - **playwright_data_dir: Optional[str] = "~/.liminal/playwright_chrome_data/"** The directory to store the playwright browser user data. If SSO is enabled and required on your Benchling tenant, - Liminal uses playwright so the user can log into Benchling in order to give Liminal the authenticated internal API session cookie. + Liminal uses playwright so the user can log into Benchling in order to give Liminal the user's authenticated internal API session cookie. This directory is used to store playwright's persistent context, allowing the user to set up a persistent Chrome user profile. Set this to None in order to disable playwright's persistent context which enables automatic login.