Skip to content

Commit 6ed5014

Browse files
authored
VFS pushdown to exportation (#132)
* VFS pushdown to exportation and minio testing * Misc * Support source dest configs ingestion (#134) * Support source and dest configurations * Adding backwards compatibility for config arg overriding the source and dest
1 parent 839477c commit 6ed5014

File tree

7 files changed

+105
-30
lines changed

7 files changed

+105
-30
lines changed

tiledb/bioimg/converters/base.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,10 @@
5050
)
5151
from ..openslide import TileDBOpenSlide
5252
from ..version import version as PKG_VERSION
53-
from . import DATASET_TYPE, FMT_VERSION
53+
from . import DATASET_TYPE, DEFAULT_SCRATCH_SPACE, FMT_VERSION
5454
from .axes import Axes
5555
from .tiles import iter_tiles, num_tiles
5656

57-
DEFAULT_SCRATCH_SPACE = "/dev/shm"
58-
5957

6058
class ImageReader(ABC):
6159
@abstractmethod
@@ -64,8 +62,10 @@ def __init__(
6462
input_path: str,
6563
*,
6664
logger: Optional[logging.Logger],
67-
config: Optional[tiledb.Config] = None,
68-
ctx: Optional[tiledb.Ctx] = None,
65+
source_config: Optional[tiledb.Config] = None,
66+
source_ctx: Optional[tiledb.Ctx] = None,
67+
dest_config: Optional[tiledb.Config] = None,
68+
dest_ctx: Optional[tiledb.Config] = None,
6969
**kwargs: Any,
7070
):
7171
"""Initialize this ImageReader"""
@@ -76,6 +76,16 @@ def __enter__(self) -> ImageReader:
7676
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
7777
pass
7878

79+
@property
80+
@abstractmethod
81+
def source_ctx(self) -> tiledb.Ctx:
82+
"""The ctx of the source path of this image reader."""
83+
84+
@property
85+
@abstractmethod
86+
def dest_ctx(self) -> tiledb.Ctx:
87+
"""The ctx of the dest path of this image reader."""
88+
7989
@property
8090
@abstractmethod
8191
def logger(self) -> Optional[logging.Logger]:
@@ -309,7 +319,7 @@ def to_tiledb(
309319
exclude_metadata: bool = False,
310320
compressor: Optional[Union[Mapping[int, Any], Any]] = None,
311321
log: Optional[Union[bool, logging.Logger]] = None,
312-
reader_kwargs: Optional[Mapping[str, Any]] = None,
322+
reader_kwargs: Optional[MutableMapping[str, Any]] = None,
313323
pyramid_kwargs: Optional[Mapping[str, Any]] = None,
314324
) -> Type[ImageConverter]:
315325
"""
@@ -360,6 +370,15 @@ def to_tiledb(
360370
default_verbose = False
361371
logger = get_logger_wrapper(default_verbose)
362372

373+
# Backwards compatibility config v0.2.13
374+
if reader_kwargs:
375+
common_cfg = reader_kwargs.get("config", None)
376+
if common_cfg:
377+
# Overwrite the source and destination configs with the common
378+
reader_kwargs["source_config"] = reader_kwargs[
379+
"dest_config"
380+
] = common_cfg
381+
363382
if isinstance(source, ImageReader):
364383
if cls._ImageReaderType != source.__class__:
365384
raise ValueError("Image reader should match converter on source format")
@@ -379,7 +398,7 @@ def to_tiledb(
379398
max_tiles.update(tiles)
380399
logger.debug(f"Updated max tiles:{max_tiles}")
381400

382-
rw_group = ReadWriteGroup(output_path)
401+
rw_group = ReadWriteGroup(output_path, ctx=reader.dest_ctx)
383402

384403
metadata = {}
385404
original_metadata = {}
@@ -498,7 +517,10 @@ def to_tiledb(
498517
fmt_version=FMT_VERSION,
499518
channels=json.dumps(reader.channels),
500519
levels=jsonpickle.encode(
501-
sorted(iter_levels_meta(rw_group.r_group), key=itemgetter("level")),
520+
sorted(
521+
iter_levels_meta(rw_group.r_group, ctx=reader.dest_ctx),
522+
key=itemgetter("level"),
523+
),
502524
unpicklable=False,
503525
),
504526
metadata=jsonpickle.encode(metadata, unpicklable=False),
@@ -579,9 +601,9 @@ def _convert_level_to_tiledb(
579601
# get or create TileDB array uri
580602
uri, created = rw_group.get_or_create(f"l_{level}.tdb", schema)
581603

582-
if created or not validate_ingestion(uri):
604+
if created or not validate_ingestion(uri, ctx=reader.dest_ctx):
583605
# write image and metadata to TileDB array
584-
with open_bioimg(uri, "w") as out_array:
606+
with open_bioimg(uri, "w", ctx=reader.dest_ctx) as out_array:
585607
out_array.meta.update(reader.level_metadata(level), level=level)
586608
inv_axes_mapper = axes_mapper.inverse
587609
if chunked:

tiledb/bioimg/converters/ome_tiff.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ def __init__(
3232
input_path: str,
3333
*,
3434
logger: Optional[logging.Logger] = None,
35-
config: Optional[Config] = None,
36-
ctx: Optional[Ctx] = None,
35+
source_config: Optional[Config] = None,
36+
source_ctx: Optional[Ctx] = None,
37+
dest_config: Optional[Config] = None,
38+
dest_ctx: Optional[Ctx] = None,
3739
extra_tags: Sequence[Union[str, int]] = (),
3840
):
3941
"""
@@ -47,9 +49,11 @@ def __init__(
4749

4850
# Use VFS for all paths local or remote for reading the input image
4951
self._input_path = input_path
50-
self._ctx = _get_ctx(ctx, config)
51-
self._cfg = self._ctx.config()
52-
self._vfs = VFS(config=self._cfg, ctx=self._ctx)
52+
self._source_ctx = _get_ctx(source_ctx, source_config)
53+
self._source_cfg = self._source_ctx.config()
54+
self._dest_ctx = _get_ctx(dest_ctx, dest_config)
55+
self._dest_cfg = self._dest_ctx.config()
56+
self._vfs = VFS(config=self._source_cfg, ctx=self._source_ctx)
5357
self._vfs_fh = self._vfs.open(input_path, mode="rb")
5458
self._tiff = tifffile.TiffFile(self._vfs_fh)
5559
# XXX ignore all but the first series
@@ -61,6 +65,14 @@ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
6165
self._tiff.close()
6266
self._vfs.close(file=self._vfs_fh)
6367

68+
@property
69+
def source_ctx(self) -> Ctx:
70+
return self._source_ctx
71+
72+
@property
73+
def dest_ctx(self) -> Ctx:
74+
return self._dest_ctx
75+
6476
@property
6577
def logger(self) -> Optional[logging.Logger]:
6678
return self._logger

tiledb/bioimg/converters/ome_zarr.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,36 @@ def __init__(
3636
input_path: str,
3737
*,
3838
logger: Optional[logging.Logger] = None,
39-
config: Optional[Config] = None,
40-
ctx: Optional[Ctx] = None,
39+
source_config: Optional[Config] = None,
40+
source_ctx: Optional[Ctx] = None,
41+
dest_config: Optional[Config] = None,
42+
dest_ctx: Optional[Ctx] = None,
4143
):
4244
"""
4345
OME-Zarr image reader
4446
:param input_path: The path to the Zarr image
4547
"""
4648
self._logger = get_logger_wrapper(False) if not logger else logger
47-
self._ctx = _get_ctx(ctx, config)
48-
self._cfg = self._ctx.config()
49-
storage_options = translate_config_to_s3fs(self._cfg)
49+
self._source_ctx = _get_ctx(source_ctx, source_config)
50+
self._source_cfg = self._source_ctx.config()
51+
self._dest_ctx = _get_ctx(dest_ctx, dest_config)
52+
self._dest_cfg = self._dest_ctx.config()
53+
storage_options = translate_config_to_s3fs(self._source_cfg)
5054
input_fh = zarr.storage.FSStore(
5155
input_path, check=True, create=True, **storage_options
5256
)
5357
self._root_node = next(Reader(ZarrLocation(input_fh))())
5458
self._multiscales = cast(Multiscales, self._root_node.load(Multiscales))
5559
self._omero = cast(Optional[OMERO], self._root_node.load(OMERO))
5660

61+
@property
62+
def source_ctx(self) -> Ctx:
63+
return self._source_ctx
64+
65+
@property
66+
def dest_ctx(self) -> Ctx:
67+
return self._dest_ctx
68+
5769
@property
5870
def logger(self) -> Optional[logging.Logger]:
5971
return self._logger

tiledb/bioimg/converters/openslide.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,25 @@ def __init__(
4343
input_path: str,
4444
*,
4545
logger: Optional[logging.Logger] = None,
46-
config: Optional[Config] = None,
47-
ctx: Optional[Ctx] = None,
46+
source_config: Optional[Config] = None,
47+
source_ctx: Optional[Ctx] = None,
48+
dest_config: Optional[Config] = None,
49+
dest_ctx: Optional[Ctx] = None,
4850
scratch_space: str = DEFAULT_SCRATCH_SPACE,
4951
):
5052
"""
5153
OpenSlide image reader
5254
:param input_path: The path to the OpenSlide image
5355
5456
"""
55-
self._ctx = _get_ctx(ctx, config)
56-
self._cfg = self._ctx.config()
57+
self._source_ctx = _get_ctx(source_ctx, source_config)
58+
self._source_cfg = self._source_ctx.config()
59+
self._dest_ctx = _get_ctx(dest_ctx, dest_config)
60+
self._dest_cfg = self._dest_ctx.config()
5761
self._logger = get_logger_wrapper(False) if not logger else logger
5862
if is_remote_protocol(input_path):
5963
resolved_path = cache_filepath(
60-
input_path, config, ctx, self._logger, scratch_space
64+
input_path, source_config, source_ctx, self._logger, scratch_space
6165
)
6266
else:
6367
resolved_path = input_path
@@ -66,6 +70,14 @@ def __init__(
6670
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
6771
self._osd.close()
6872

73+
@property
74+
def source_ctx(self) -> Ctx:
75+
return self._source_ctx
76+
77+
@property
78+
def dest_ctx(self) -> Ctx:
79+
return self._dest_ctx
80+
6981
@property
7082
def logger(self) -> Optional[logging.Logger]:
7183
return self._logger

tiledb/bioimg/helpers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def get_or_create(self, name: str, schema: tiledb.ArraySchema) -> Tuple[str, boo
6161
else:
6262
uri = os.path.join(self._uri, name).replace("\\", "/")
6363

64-
if not tiledb.array_exists(uri):
64+
if not tiledb.array_exists(uri, ctx=self._ctx):
6565
tiledb.Array.create(uri, schema, ctx=self._ctx)
6666
create = True
6767
else:
@@ -90,7 +90,7 @@ def get_or_create(self, name: str, schema: tiledb.ArraySchema) -> Tuple[str, boo
9090
return uri, create
9191

9292

93-
def validate_ingestion(uri: str) -> bool:
93+
def validate_ingestion(uri: str, ctx: tiledb.Ctx = None) -> bool:
9494
"""
9595
This function validates that they array has been stored properly
9696
by checking the existence of array fragments and
@@ -103,12 +103,12 @@ def validate_ingestion(uri: str) -> bool:
103103
Returns boolean
104104
-------
105105
"""
106-
fragments_list_info = tiledb.array_fragments(uri)
106+
fragments_list_info = tiledb.array_fragments(uri, ctx=ctx)
107107
if not len(fragments_list_info):
108108
# If no fragments are present
109109
return False
110110
else:
111-
with tiledb.open(uri) as validation_array:
111+
with tiledb.open(uri, ctx=ctx) as validation_array:
112112
ned = fragments_list_info.nonempty_domain
113113
consolidated_ranges = merge_ned_ranges(ned)
114114
domains = [d.domain for d in validation_array.schema.domain]

tiledb/bioimg/openslide.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import tiledb
1717
from tiledb import Config, Ctx
18+
from tiledb.highlevel import _get_ctx
1819

1920
from . import ATTR_NAME
2021
from .converters.axes import Axes
@@ -43,7 +44,9 @@ def __init__(
4344
4445
:param uri: uri of a tiledb.Group containing the image
4546
"""
46-
self._group = tiledb.Group(uri, config=config, ctx=ctx)
47+
self._ctx = _get_ctx(ctx, config)
48+
self._cfg = self._ctx.config()
49+
self._group = tiledb.Group(uri, ctx=self._ctx)
4750
pixel_depth = self._group.meta.get("pixel_depth", "")
4851
pixel_depth = dict(json.loads(pixel_depth)) if pixel_depth else {}
4952
self._levels = sorted(

tiledb/bioimg/wrappers.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,18 @@ def from_bioimg(
4747
:param kwargs: keyword arguments for custom ingestion behaviour
4848
:return: The converter class that was used for the ingestion
4949
"""
50+
5051
logger = get_logger_wrapper(verbose)
52+
reader_kwargs = kwargs.get("reader_kwargs", {})
53+
54+
# Get the config for the source
55+
reader_kwargs["source_config"] = kwargs.pop("source_config", None)
56+
57+
# Get the config for the destination (if exists) otherwise match it with source config
58+
reader_kwargs["dest_config"] = kwargs.pop(
59+
"dest_config", reader_kwargs["source_config"]
60+
)
61+
5162
if converter is Converters.OMETIFF:
5263
if not _tiff_exc:
5364
logger.info("Converting OME-TIFF file")
@@ -57,6 +68,7 @@ def from_bioimg(
5768
log=logger,
5869
exclude_metadata=exclude_metadata,
5970
tile_scale=tile_scale,
71+
reader_kwargs=reader_kwargs,
6072
**kwargs,
6173
)
6274
else:
@@ -70,6 +82,7 @@ def from_bioimg(
7082
log=logger,
7183
exclude_metadata=exclude_metadata,
7284
tile_scale=tile_scale,
85+
reader_kwargs=reader_kwargs,
7386
**kwargs,
7487
)
7588
else:
@@ -83,6 +96,7 @@ def from_bioimg(
8396
log=logger,
8497
exclude_metadata=exclude_metadata,
8598
tile_scale=tile_scale,
99+
reader_kwargs=reader_kwargs,
86100
**kwargs,
87101
)
88102
else:

0 commit comments

Comments
 (0)