diff --git a/web/src/utils/externalServices.ts b/web/src/utils/externalServices.ts index f24c0015c..880789015 100644 --- a/web/src/utils/externalServices.ts +++ b/web/src/utils/externalServices.ts @@ -13,6 +13,11 @@ interface ExternalService { endpoint: ExternalServiceEndpoint; } +// Constants for zarr ID and S3 URL parsing +const UUID_REGEX_PATTERN = /[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}/; +const ZARR_ID_REGEX = new RegExp(`/zarr/(${UUID_REGEX_PATTERN.source})`); +const DEFAULT_S3_BUCKET = 'dandiarchive'; + const EXTERNAL_SERVICES: ExternalService[] = [ { name: "Bioimagesuite/Viewer", @@ -79,9 +84,55 @@ const EXTERNAL_SERVICES: ExternalService[] = [ maxsize: Infinity, endpoint: "https://www.neuroglass.io/new?resource=$asset_dandi_metadata_url$", + }, + + { + name: "QuiltData: S3 Browser", + regex: /\.zarr$/, + maxsize: Infinity, + endpoint: (item: ServiceUrlData) => { + if (!item.zarr_id) { + return null; + } + const bucket = extractS3Bucket(item.assetS3Url); + return `https://open.quiltdata.com/b/${bucket}/tree/zarr/${item.zarr_id}/`; + }, } ]; +/** + * Extract zarr_id from contentUrl for zarr files. + * The zarr_id is the UUID that appears after '/zarr/' in the S3 URL path. + * Example: https://s3.amazonaws.com/dandiarchive/zarr/7b617177-ad57-4f7f-806b-060e18f42d15/ + * Returns: 7b617177-ad57-4f7f-806b-060e18f42d15 + */ +function extractZarrId(contentUrl: string): string | null { + const zarrMatch = contentUrl.match(ZARR_ID_REGEX); + return zarrMatch ? zarrMatch[1] : null; +} + +/** + * Extract S3 bucket name from S3 URL. + * Handles both path-style and virtual-hosted-style S3 URLs: + * - Path-style: https://s3.amazonaws.com/bucket/... + * - Virtual-hosted-style: https://bucket.s3.amazonaws.com/... + */ +function extractS3Bucket(s3Url: string): string { + // Try path-style: https://s3.amazonaws.com/bucket/... + const pathStyleMatch = s3Url.match(/https?:\/\/s3[^/]*\.amazonaws\.com\/([^/]+)/); + if (pathStyleMatch) { + return pathStyleMatch[1]; + } + + // Try virtual-hosted-style: https://bucket.s3.amazonaws.com/... + const virtualHostedMatch = s3Url.match(/https?:\/\/([^.]+)\.s3[^/]*\.amazonaws\.com/); + if (virtualHostedMatch) { + return virtualHostedMatch[1]; + } + + return DEFAULT_S3_BUCKET; +} + interface ServiceUrlData { dandisetId: string, dandisetVersion: string, @@ -90,6 +141,9 @@ interface ServiceUrlData { assetDandiUrl: string, assetDandiMetadataUrl: string, assetS3Url: string, + // zarr_id is extracted from contentUrl for zarr files + // See: https://github.com/dandi/dandi-schema/issues/356 for potential future improvements + zarr_id: string | null, } function serviceURL(endpoint: ExternalServiceEndpoint, data: ServiceUrlData): string | null { @@ -112,7 +166,8 @@ function serviceURL(endpoint: ExternalServiceEndpoint, data: ServiceUrlData): st .replaceAll('$asset_url$', data.assetUrl) .replaceAll('$asset_dandi_url$', data.assetDandiUrl) .replaceAll('$asset_dandi_metadata_url$', data.assetDandiMetadataUrl) - .replaceAll('$asset_s3_url$', data.assetS3Url); + .replaceAll('$asset_s3_url$', data.assetS3Url) + .replaceAll('$zarr_id$', data.zarr_id || ''); } export function getExternalServices(path: AssetPath, info: {dandisetId: string, dandisetVersion: string}) { @@ -147,6 +202,9 @@ export function getExternalServices(path: AssetPath, info: {dandisetId: string, // dandisets (since the ready-made S3 URL will prevent access in that case). const assetUrl = embargoed.value ? assetDandiUrl : assetS3Url; + // Extract zarr_id from contentUrl for zarr files + const zarr_id = extractZarrId(assetS3Url); + return EXTERNAL_SERVICES .filter((service) => servicePredicate(service, path)) .flatMap((service) => { @@ -158,6 +216,7 @@ export function getExternalServices(path: AssetPath, info: {dandisetId: string, assetDandiUrl, assetDandiMetadataUrl, assetS3Url, + zarr_id, }); return url ? [{ name: service.name, url }] : []; });