Skip to content

Commit 7d2aa78

Browse files
authored
[DAR-6238][External] Allow to import annotations without pixdims, affine of plane_map (#1039)
1 parent 503cb50 commit 7d2aa78

File tree

2 files changed

+478
-74
lines changed

2 files changed

+478
-74
lines changed

darwin/importer/importer.py

Lines changed: 135 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import concurrent.futures
2-
import uuid
3-
import json
42
import copy
3+
import json
4+
import uuid
55
from collections import defaultdict
66
from logging import getLogger
77
from multiprocessing import cpu_count
@@ -21,19 +21,18 @@
2121
Union,
2222
)
2323

24-
2524
from darwin.datatypes import (
2625
AnnotationFile,
2726
Property,
28-
parse_property_classes,
2927
PropertyClass,
28+
parse_property_classes,
3029
)
3130
from darwin.future.data_objects.properties import (
3231
FullProperty,
32+
PropertyGranularity,
3333
PropertyType,
3434
PropertyValue,
3535
SelectedProperty,
36-
PropertyGranularity,
3736
)
3837
from darwin.item import DatasetItem
3938
from darwin.path_utils import is_properties_enabled, parse_metadata
@@ -1299,7 +1298,9 @@ def import_annotations( # noqa: C901
12991298
(
13001299
legacy_remote_file_slot_affine_maps,
13011300
pixdims_and_primary_planes,
1302-
) = _get_remote_medical_file_transform_requirements(remote_files_targeted_by_import)
1301+
) = _get_remote_medical_file_transform_requirements(
1302+
remote_files_targeted_by_import, console
1303+
)
13031304

13041305
if importer.__module__ == "darwin.importer.formats.nifti":
13051306
maybe_parsed_files: Optional[Iterable[dt.AnnotationFile]] = _find_and_parse(
@@ -2451,63 +2452,156 @@ def _get_remote_files_targeted_by_import(
24512452
]
24522453

24532454

2455+
def _parse_plane_map(
2456+
medical_metadata: Dict[str, Any],
2457+
slot_name: str,
2458+
remote_file_path: str,
2459+
console: Console,
2460+
) -> Optional[str]:
2461+
"""Parse and validate the plane map from medical metadata.
2462+
2463+
Args:
2464+
medical_metadata: Medical metadata dictionary
2465+
slot_name: Name of the slot
2466+
remote_file_path: Path to the remote file
2467+
console: Console for logging warnings
2468+
2469+
Returns:
2470+
The plane map string if valid, None otherwise
2471+
"""
2472+
try:
2473+
return medical_metadata["plane_map"][slot_name]
2474+
except (KeyError, TypeError):
2475+
console.print(
2476+
f"Missing plane_map for slot {slot_name} in file {remote_file_path}",
2477+
style="warning",
2478+
)
2479+
return None
2480+
2481+
2482+
def _parse_pixdims(
2483+
medical_metadata: Dict[str, Any],
2484+
slot_name: str,
2485+
remote_file_path: str,
2486+
console: Console,
2487+
) -> Optional[List[float]]:
2488+
"""Parse and validate the pixdims from medical metadata.
2489+
2490+
Args:
2491+
medical_metadata: Medical metadata dictionary
2492+
slot_name: Name of the slot
2493+
remote_file_path: Path to the remote file
2494+
console: Console for logging warnings
2495+
2496+
Returns:
2497+
List of float pixdims if valid, None otherwise
2498+
"""
2499+
try:
2500+
raw_pixdims = medical_metadata.get("pixdims")
2501+
if not raw_pixdims:
2502+
console.print(
2503+
f"Missing pixdims for slot {slot_name} in file {remote_file_path}",
2504+
style="warning",
2505+
)
2506+
return None
2507+
return [float(dim) for dim in raw_pixdims]
2508+
except (ValueError, TypeError):
2509+
console.print(
2510+
f"Invalid pixdims format for slot {slot_name} in file {remote_file_path}",
2511+
style="warning",
2512+
)
2513+
return None
2514+
2515+
2516+
def _parse_affine(
2517+
medical_metadata: Dict[str, Any],
2518+
slot_name: str,
2519+
remote_file_path: str,
2520+
console: Console,
2521+
) -> Optional[np.ndarray]:
2522+
"""Parse and validate the affine matrix from medical metadata.
2523+
2524+
Args:
2525+
medical_metadata: Medical metadata dictionary
2526+
slot_name: Name of the slot
2527+
remote_file_path: Path to the remote file
2528+
console: Console for logging warnings
2529+
2530+
Returns:
2531+
Numpy array of affine matrix if valid, None otherwise
2532+
"""
2533+
try:
2534+
affine = medical_metadata.get("affine")
2535+
if affine:
2536+
return np.array(affine, dtype=np.float64)
2537+
return None
2538+
except (ValueError, TypeError):
2539+
console.print(
2540+
f"Invalid affine matrix format for slot {slot_name} in file {remote_file_path}",
2541+
style="warning",
2542+
)
2543+
return None
2544+
2545+
24542546
def _get_remote_medical_file_transform_requirements(
2455-
remote_files_targeted_by_import: List[DatasetItem],
2547+
remote_files_targeted_by_import: List[DatasetItem], console: Console
24562548
) -> Tuple[Dict[Path, Dict[str, Any]], Dict[Path, Dict[Path, Tuple[List[float], str]]]]:
24572549
"""
2458-
This function parses the remote files targeted by the import. If the remote file is
2459-
a medical file, it checks if it requires legacy NifTI scaling or not.
2460-
2461-
If the file requires legacy NifTI scaling, the affine matrix of the slot is returned
2462-
in the legacy_remote_file_slot_affine_map dictionary.
2550+
Parse remote files targeted by import and extract medical file transform requirements.
2551+
For medical files, extracts pixdims and primary plane information.
2552+
For legacy NifTI files, extracts affine matrix information.
24632553
2464-
If the file is medical, the pixdims and the primary plane are returned
2465-
in the pixdims_and_primary_planes dictionary.
2554+
Args:
2555+
remote_files_targeted_by_import: List of remote files targeted by the import
2556+
console: Console for logging warnings
24662557
2467-
Parameters
2468-
----------
2469-
remote_files_targeted_by_import: List[DatasetItem]
2470-
The remote files targeted by the import
2471-
Returns
2472-
-------
2473-
Tuple[Dict[Path, Dict[str, Any]], Dict[Path, Dict[str, Tuple[List[float], str]]]]
2474-
A tuple of 2 dictionaries:
2475-
- legacy_remote_file_slot_affine_map: A dictionary of remote files
2476-
that require legacy NifTI scaling and the slot name to affine matrix mapping
2477-
- pixdims_and_primary_planes: A dictionary of remote files
2478-
containing the (x, y, z) pixdims and the primary plane
2558+
Returns:
2559+
Tuple containing:
2560+
- Dictionary mapping remote files to slot affine matrices for legacy NifTI scaling
2561+
- Dictionary mapping remote files to pixdims and primary plane information
24792562
"""
24802563
legacy_remote_file_slot_affine_map = {}
24812564
pixdims_and_primary_planes = {}
2565+
24822566
for remote_file in remote_files_targeted_by_import:
24832567
if not remote_file.slots:
24842568
continue
2569+
24852570
slot_pixdim_primary_plane_map = {}
24862571
slot_affine_map = {}
2572+
remote_path = Path(remote_file.full_path)
2573+
24872574
for slot in remote_file.slots:
24882575
if not slot_is_medical(slot):
24892576
continue
2577+
24902578
slot_name = slot["slot_name"]
2491-
primary_plane = slot["metadata"]["medical"]["plane_map"][slot_name]
2492-
pixdims = [float(dim) for dim in slot["metadata"]["medical"]["pixdims"]]
2493-
slot_pixdim_primary_plane_map[slot_name] = (pixdims, primary_plane)
2579+
medical_metadata = slot["metadata"]["medical"]
2580+
2581+
primary_plane = _parse_plane_map(
2582+
medical_metadata, slot_name, remote_file.full_path, console
2583+
)
2584+
pixdims = _parse_pixdims(
2585+
medical_metadata, slot_name, remote_file.full_path, console
2586+
)
2587+
2588+
if primary_plane is not None and pixdims is not None:
2589+
slot_pixdim_primary_plane_map[slot_name] = (pixdims, primary_plane)
2590+
2591+
# Parse affine matrix for legacy NifTI files
24942592
if not slot_is_handled_by_monai(slot):
2495-
slot_affine_map[slot["slot_name"]] = np.array(
2496-
slot["metadata"]["medical"]["affine"], dtype=np.float64
2593+
affine = _parse_affine(
2594+
medical_metadata, slot_name, remote_file.full_path, console
24972595
)
2596+
if affine is not None:
2597+
slot_affine_map[slot_name] = affine
2598+
24982599
if slot_pixdim_primary_plane_map:
2499-
pixdims_and_primary_planes[Path(remote_file.full_path)] = (
2500-
slot_pixdim_primary_plane_map
2501-
)
2600+
pixdims_and_primary_planes[remote_path] = slot_pixdim_primary_plane_map
25022601
if slot_affine_map:
2503-
legacy_remote_file_slot_affine_map[Path(remote_file.full_path)] = (
2504-
slot_affine_map
2505-
)
2602+
legacy_remote_file_slot_affine_map[remote_path] = slot_affine_map
25062603

2507-
return (
2508-
legacy_remote_file_slot_affine_map,
2509-
pixdims_and_primary_planes,
2510-
)
2604+
return legacy_remote_file_slot_affine_map, pixdims_and_primary_planes
25112605

25122606

25132607
def slot_is_medical(slot: Dict[str, Any]) -> bool:

0 commit comments

Comments
 (0)