Skip to content
61 changes: 60 additions & 1 deletion web/src/utils/externalServices.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ interface ExternalService {
endpoint: ExternalServiceEndpoint;
}

// Constants for zarr ID and S3 URL parsing
const UUID_REGEX_PATTERN = /[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}/;
const ZARR_ID_REGEX = new RegExp(`/zarr/(${UUID_REGEX_PATTERN.source})`);
const DEFAULT_S3_BUCKET = 'dandiarchive';

const EXTERNAL_SERVICES: ExternalService[] = [
{
name: "Bioimagesuite/Viewer",
Expand Down Expand Up @@ -79,9 +84,55 @@ const EXTERNAL_SERVICES: ExternalService[] = [
maxsize: Infinity,
endpoint:
"https://www.neuroglass.io/new?resource=$asset_dandi_metadata_url$",
},

{
name: "QuiltData: S3 Browser",
regex: /\.zarr$/,
maxsize: Infinity,
endpoint: (item: ServiceUrlData) => {
if (!item.zarr_id) {
return null;
}
const bucket = extractS3Bucket(item.assetS3Url);
return `https://open.quiltdata.com/b/${bucket}/tree/zarr/${item.zarr_id}/`;
},
}
];

/**
* Extract zarr_id from contentUrl for zarr files.
* The zarr_id is the UUID that appears after '/zarr/' in the S3 URL path.
* Example: https://s3.amazonaws.com/dandiarchive/zarr/7b617177-ad57-4f7f-806b-060e18f42d15/
* Returns: 7b617177-ad57-4f7f-806b-060e18f42d15
*/
function extractZarrId(contentUrl: string): string | null {
const zarrMatch = contentUrl.match(ZARR_ID_REGEX);
return zarrMatch ? zarrMatch[1] : null;
}

/**
* Extract S3 bucket name from S3 URL.
* Handles both path-style and virtual-hosted-style S3 URLs:
* - Path-style: https://s3.amazonaws.com/bucket/...
* - Virtual-hosted-style: https://bucket.s3.amazonaws.com/...
*/
function extractS3Bucket(s3Url: string): string {
// Try path-style: https://s3.amazonaws.com/bucket/...
const pathStyleMatch = s3Url.match(/https?:\/\/s3[^/]*\.amazonaws\.com\/([^/]+)/);
if (pathStyleMatch) {
return pathStyleMatch[1];
}

// Try virtual-hosted-style: https://bucket.s3.amazonaws.com/...
const virtualHostedMatch = s3Url.match(/https?:\/\/([^.]+)\.s3[^/]*\.amazonaws\.com/);
if (virtualHostedMatch) {
return virtualHostedMatch[1];
}

return DEFAULT_S3_BUCKET;
}

interface ServiceUrlData {
dandisetId: string,
dandisetVersion: string,
Expand All @@ -90,6 +141,9 @@ interface ServiceUrlData {
assetDandiUrl: string,
assetDandiMetadataUrl: string,
assetS3Url: string,
// zarr_id is extracted from contentUrl for zarr files
// See: https://github.com/dandi/dandi-schema/issues/356 for potential future improvements
zarr_id: string | null,
}

function serviceURL(endpoint: ExternalServiceEndpoint, data: ServiceUrlData): string | null {
Expand All @@ -112,7 +166,8 @@ function serviceURL(endpoint: ExternalServiceEndpoint, data: ServiceUrlData): st
.replaceAll('$asset_url$', data.assetUrl)
.replaceAll('$asset_dandi_url$', data.assetDandiUrl)
.replaceAll('$asset_dandi_metadata_url$', data.assetDandiMetadataUrl)
.replaceAll('$asset_s3_url$', data.assetS3Url);
.replaceAll('$asset_s3_url$', data.assetS3Url)
.replaceAll('$zarr_id$', data.zarr_id || '');
}

export function getExternalServices(path: AssetPath, info: {dandisetId: string, dandisetVersion: string}) {
Expand Down Expand Up @@ -147,6 +202,9 @@ export function getExternalServices(path: AssetPath, info: {dandisetId: string,
// dandisets (since the ready-made S3 URL will prevent access in that case).
const assetUrl = embargoed.value ? assetDandiUrl : assetS3Url;

// Extract zarr_id from contentUrl for zarr files
const zarr_id = extractZarrId(assetS3Url);

return EXTERNAL_SERVICES
.filter((service) => servicePredicate(service, path))
.flatMap((service) => {
Expand All @@ -158,6 +216,7 @@ export function getExternalServices(path: AssetPath, info: {dandisetId: string,
assetDandiUrl,
assetDandiMetadataUrl,
assetS3Url,
zarr_id,
});
return url ? [{ name: service.name, url }] : [];
});
Expand Down