diff --git a/.gitignore b/.gitignore index d989b197..729a236b 100644 --- a/.gitignore +++ b/.gitignore @@ -60,4 +60,4 @@ rsconnect/ vignettes/data/*.h5ad /doc/ /Meta/ -/data/ +/data/ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e2b55c38..ce303db3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,6 +49,7 @@ Suggests: knitr, processx, rhdf5 (>= 2.52.1), + Rarr, rmarkdown, S4Vectors, Seurat, diff --git a/NAMESPACE b/NAMESPACE index 0efc422a..bfcf0eb8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,10 +14,15 @@ S3method(r_to_py,AbstractAnnData) export(AnnData) export(AnnDataView) export(as_AnnData) +export(create_zarr) +export(create_zarr_group) export(generate_dataset) export(get_generator_types) +export(is_zarr_empty) export(read_h5ad) +export(read_zarr) export(write_h5ad) +export(write_zarr) importFrom(Matrix,as.matrix) importFrom(Matrix,sparseMatrix) importFrom(Matrix,t) @@ -35,3 +40,4 @@ importFrom(reticulate,r_to_py) importFrom(rlang,`%||%`) importFrom(rlang,caller_env) importFrom(stats,setNames) +importFrom(utils,tail) diff --git a/R/AbstractAnnData.R b/R/AbstractAnnData.R index 0a5f0aaa..bf912882 100644 --- a/R/AbstractAnnData.R +++ b/R/AbstractAnnData.R @@ -287,6 +287,28 @@ AbstractAnnData <- R6::R6Class( ) }, #' @description + #' Convert to an [`ZarrAnnData`] + #' + #' See [as_ZarrAnnData()] for more details on the conversion + #' + #' @param file See [as_ZarrAnnData()] + #' @param compression See [as_ZarrAnnData()] + #' @param mode See [as_ZarrAnnData()] + #' + #' @return An [`ZarrAnnData`] object + as_ZarrAnnData = function( + file, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") + ) { + as_ZarrAnnData( + adata = self, + file = file, + compression = compression, + mode = mode + ) + }, + #' @description #' Write the `AnnData` object to an H5AD file #' #' See [write_h5ad()] for details @@ -302,6 +324,23 @@ AbstractAnnData <- R6::R6Class( mode = c("w-", "r", "r+", "a", "w", "x") ) { write_h5ad(object = self, path, compression = compression, mode = mode) + }, + #' @description + #' Write the `AnnData` object to an H5AD file + #' + #' See [write_zarr()] for details + #' + #' @param path See [write_zarr()] + #' @param compression See [write_zarr()] + #' @param mode See [write_zarr()] + #' + #' @return `path` invisibly + write_zarr = function( + path, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") + ) { + write_zarr(object = self, path, compression = compression, mode = mode) } ), private = list( diff --git a/R/Rarr_utils.R b/R/Rarr_utils.R new file mode 100644 index 00000000..aa7ce462 --- /dev/null +++ b/R/Rarr_utils.R @@ -0,0 +1,75 @@ +#' create_zarr_group +#' +#' create zarr groups +#' +#' @param store the location of (zarr) store +#' @param name name of the group +#' @param version zarr version +#' @importFrom utils tail +#' @examples +#' store <- tempfile(fileext = ".zarr") +#' create_zarr(store) +#' create_zarr_group(store, "gp") +#' @export +#' @return `NULL` +create_zarr_group <- function(store, name, version = "v2") { + split.name <- strsplit(name, split = "\\/")[[1]] + if (length(split.name) > 1) { + split.name <- vapply( + seq_along(split.name), + function(x) paste(split.name[seq_len(x)], collapse = "/"), + FUN.VALUE = character(1) + ) + split.name <- rev(tail(split.name, 2)) + if (!dir.exists(file.path(store, split.name[2]))) { + create_zarr_group(store = store, name = split.name[2]) + } + } + dir.create(file.path(store, split.name[1]), showWarnings = FALSE) + switch( + version, + v2 = { + write( + "{\"zarr_format\":2}", + file = file.path(store, split.name[1], ".zgroup") + ) + }, + v3 = { + stop("Currently only zarr v2 is supported!") + }, + stop("only zarr v2 is supported. Use version = 'v2'") + ) +} + +#' create_zarr +#' +#' create zarr store +#' +#' @param store the location of zarr store +#' @param version zarr version +#' @examples +#' store <- tempfile(fileext = ".zarr") +#' create_zarr(store) +#' @export +#' @return `NULL` +create_zarr <- function(store, version = "v2") { + prefix <- basename(store) + dir <- gsub(paste0(prefix, "$"), "", store) + create_zarr_group(store = dir, name = prefix, version = version) +} + +#' create_zarr +#' +#' create zarr store +#' +#' @param store the location of zarr store +#' @examples +#' store <- tempfile(fileext = ".zarr") +#' create_zarr(store) +#' is_zarr_empty(store) +#' @export +#' @return returns TRUE if zarr store is not empty +is_zarr_empty <- function(store) { + files <- list.files(store, recursive = FALSE, full.names = FALSE) + all(files %in% c(".zarray", ".zattrs", ".zgroup")) +} diff --git a/R/ZarrAnnData.R b/R/ZarrAnnData.R new file mode 100644 index 00000000..b0076101 --- /dev/null +++ b/R/ZarrAnnData.R @@ -0,0 +1,497 @@ +#' @title ZarrAnnData +#' +#' @description +#' Implementation of an Zarr-backed `AnnData` object. This class provides an +#' interface to a Zarr file and minimal data is stored in memory until it is +#' requested by the user. It is primarily designed as an intermediate object +#' when reading/writing Zarr files but can be useful for accessing parts of +#' large files. +#' +#' See [AnnData-usage] for details on creating and using `AnnData` objects. +#' +#' @return An `ZarrAnnData` object +#' +#' @seealso [AnnData-usage] for details on creating and using `AnnData` objects +#' +#' @family AnnData classes +ZarrAnnData <- R6::R6Class( + "ZarrAnnData", # nolint + inherit = AbstractAnnData, + cloneable = FALSE, + private = list( + .zarrobj = NULL, + .close_on_finalize = FALSE, + .compression = NULL, + + .check_file_valid = function() { + if (!zarr_path_exists(private$.zarrobj, "/")) { + cli_abort( + paste( + "The Zarr file does not exist, or not a zarr file/store!" + ) + ) + } + }, + + #' @description Close the Zarr file when the object is garbage collected + finalize = function() { + if (private$.close_on_finalize) { + self$close() + } + invisible(self) + } + ), + active = list( + #' @field X See [AnnData-usage] + X = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_X, status=done + read_zarr_element(private$.zarrobj, "X") |> + private$.add_matrix_dimnames("X") + } else { + # trackstatus: class=ZarrAnnData, feature=set_X, status=done + private$.validate_aligned_array( + value, + "X", + shape = c(self$n_obs(), self$n_vars()), + expected_rownames = self$obs_names, + expected_colnames = self$var_names + ) |> + write_zarr_element( + private$.zarrobj, + "X", + private$.compression + ) + } + }, + #' @field layers See [AnnData-usage] + layers = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_layers, status=done + read_zarr_element(private$.zarrobj, "layers") |> + private$.add_mapping_dimnames("layers") + } else { + # trackstatus: class=ZarrAnnData, feature=set_layers, status=done + private$.validate_aligned_mapping( + value, + "layers", + c(self$n_obs(), self$n_vars()), + expected_rownames = self$obs_names, + expected_colnames = self$var_names + ) |> + write_zarr_element( + private$.zarrobj, + "layers", + private$.compression + ) + } + }, + #' @field obsm See [AnnData-usage] + obsm = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_obsm, status=done + read_zarr_element(private$.zarrobj, "obsm") |> + private$.add_mapping_dimnames("obsm") + } else { + # trackstatus: class=ZarrAnnData, feature=set_obsm, status=done + private$.validate_aligned_mapping( + value, + "obsm", + c(self$n_obs()), + expected_rownames = self$obs_names, + strip_rownames = TRUE, + strip_colnames = FALSE + ) |> + write_zarr_element( + private$.zarrobj, + "obsm", + private$.compression + ) + } + }, + #' @field varm See [AnnData-usage] + varm = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_varm, status=done + read_zarr_element(private$.zarrobj, "varm") |> + private$.add_mapping_dimnames("varm") + } else { + # trackstatus: class=ZarrAnnData, feature=set_varm, status=done + private$.validate_aligned_mapping( + value, + "varm", + c(self$n_vars()), + expected_rownames = self$var_names, + strip_rownames = TRUE, + strip_colnames = FALSE + ) |> + write_zarr_element( + private$.zarrobj, + "varm", + private$.compression + ) + } + }, + #' @field obsp See [AnnData-usage] + obsp = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_obsp, status=done + read_zarr_element(private$.zarrobj, "obsp") |> + private$.add_mapping_dimnames("obsp") + } else { + # trackstatus: class=ZarrAnnData, feature=set_obsp, status=done + private$.validate_aligned_mapping( + value, + "obsp", + c(self$n_obs(), self$n_obs()), + expected_rownames = self$obs_names, + expected_colnames = self$obs_names + ) |> + write_zarr_element( + private$.zarrobj, + "obsp", + private$.compression + ) + } + }, + #' @field varp See [AnnData-usage] + varp = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_varp, status=done + read_zarr_element(private$.zarrobj, "varp") |> + private$.add_mapping_dimnames("varp") + } else { + # trackstatus: class=ZarrAnnData, feature=set_varp, status=done + private$.validate_aligned_mapping( + value, + "varp", + c(self$n_vars(), self$n_vars()), + expected_rownames = self$var_names, + expected_colnames = self$var_names + ) |> + write_zarr_element( + private$.zarrobj, + "varp", + private$.compression + ) + } + }, + #' @field obs See [AnnData-usage] + obs = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_obs, status=done + read_zarr_element(private$.zarrobj, "obs") + } else { + # trackstatus: class=ZarrAnnData, feature=set_obs, status=done + private$.validate_obsvar_dataframe(value, "obs") |> + write_zarr_element( + private$.zarrobj, + "obs", + private$.compression + ) + } + }, + #' @field var See [AnnData-usage] + var = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_var, status=done + read_zarr_element(private$.zarrobj, "var") + } else { + # trackstatus: class=ZarrAnnData, feature=set_var, status=done + private$.validate_obsvar_dataframe(value, "var") |> + write_zarr_element( + private$.zarrobj, + "var", + private$.compression + ) + } + }, + #' @field obs_names See [AnnData-usage] + obs_names = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_obs_names, status=done + rownames(self$obs) + } else { + # trackstatus: class=ZarrAnnData, feature=set_obs_names, status=done + rownames(self$obs) <- value + } + }, + #' @field var_names See [AnnData-usage] + var_names = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_var_names, status=done + rownames(self$var) + } else { + # trackstatus: class=ZarrAnnData, feature=set_var_names, status=done + rownames(self$var) <- value + } + }, + #' @field uns See [AnnData-usage] + uns = function(value) { + private$.check_file_valid() + + if (missing(value)) { + # trackstatus: class=ZarrAnnData, feature=get_uns, status=done + read_zarr_element(private$.zarrobj, "uns") + } else { + # trackstatus: class=ZarrAnnData, feature=set_uns, status=done + private$.validate_named_list(value, "uns") |> + write_zarr_element( + private$.zarrobj, + "uns", + private$.compression + ) + } + } + ), + public = list( + #' @description + #' `ZarrAnnData` constructor + #' + #' @param file The file name (character) of the `.zarr` file. If this file + #' already exits, other arguments must be `NULL`. + #' @param X See the `X` slot in [AnnData-usage] + #' @param layers See the `layers` slot in [AnnData-usage] + #' @param obs See the `obs` slot in [AnnData-usage] + #' @param var See the `var` slot in [AnnData-usage] + #' @param obsm See the `obsm` slot in [AnnData-usage] + #' @param varm See the `varm` slot in [AnnData-usage] + #' @param obsp See the `obsp` slot in [AnnData-usage] + #' @param varp See the `varp` slot in [AnnData-usage] + #' @param uns See the `uns` slot in [AnnData-usage] + #' @param shape Shape tuple (e.g. `c(n_obs, n_vars)`). Can be provided if + #' both `X` or `obs` and `var` are not provided. + #' @param mode The mode to open the Zarr file. See [as_ZarrAnnData()] for + #' details + #' @param compression The compression algorithm to use. See + #' [as_ZarrAnnData()] for details + #' + #' @details + #' The constructor creates a new Zarr `AnnData` interface object. This can + #' either be used to either connect to an existing `.zarr` file or to + #' create a new one. If any additional slot arguments are set an existing + #' file will be overwritten. + initialize = function( + file, + X = NULL, + obs = NULL, + var = NULL, + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL, + uns = NULL, + shape = NULL, + mode = c("a", "r", "r+", "w", "w-", "x"), + compression = c("none", "gzip") + ) { + check_requires("ZarrAnnData", "Rarr", where = "Bioc") + + compression <- match.arg(compression) + mode <- match.arg(mode) + + private$.compression <- compression + + private$.close_on_finalize <- is.character(file) + + is_readonly <- FALSE + + if (is.character(file)) { + if (mode == "a") { + if (dir.exists(file)) { + mode <- "r+" + } else { + mode <- "w-" + } + } + + if (!dir.exists(file) && mode %in% c("r", "r+")) { + cli_abort( + paste( + "File {.file {file}} does not exist but mode is set to {.val {mode}}.", + "If you want to create a new file, use a different mode (e.g. 'w-').", + "See {.help read_zarr} or {.help write_zarr} for more information." + ), + call = rlang::caller_env() + ) + } + + if (dir.exists(file) && mode %in% c("w-", "x")) { + cli_abort( + paste( + "File {.file {file}} already exists but mode is set to {.val {mode}}.", + "If you want to overwrite the file, use a different mode (e.g. 'w').", + "See {.help read_zarr} or {.help write_zarr} for more information." + ), + call = rlang::caller_env() + ) + } + + if (mode %in% c("w", "w-", "x")) { + create_zarr(file) + } else if (mode == "r") { + is_readonly <- TRUE + } + } + + is_empty <- is_zarr_empty(file) + + if (!is_readonly) { + if (!is_empty) { + cli_warn( + paste( + "An non-empty file is opened in read/write mode.", + "Use with caution, as this can lead to data corruption." + ) + ) + } else { + shape <- get_shape(obs, var, X, shape) + obs <- get_initial_obs(obs, X, shape) + var <- get_initial_var(var, X, shape) + write_empty_zarr(file, obs, var, compression) + } + } + + # File is supposed to exist by now. Check if it is a valid Zarr file + attrs <- Rarr::read_zarr_attributes(file) + if (!all(c("encoding-type", "encoding-version") %in% names(attrs))) { + cli_abort(c( + "File {.file {file}} is not a valid Zarr file." + )) + } + + # Set the file path + private$.zarrobj <- file + + if (is_readonly) { + # if any of these variables are not NULL, throw an error + are_null <- vapply( + .anndata_slots, + function(x) is.null(get(x)), + logical(1) + ) + if (!all(are_null)) { + cli_abort( + paste0( + "Error trying to write data (", + paste(.anndata_slots[!are_null], collapse = ", "), + ") to an Zarr file opened in read-only mode." + ) + ) + } + } else { + for (slot in .anndata_slots) { + value <- get(slot) + if (!is.null(value)) { + self[[slot]] <- value + } + } + } + + self + }, + + # We don't close + #' @description Close the Zarr store/file + close = function() {}, + + #' @description See the `n_obs` field in [AnnData-usage] + n_obs = function() { + nrow(self$obs) + }, + + #' @description See the `n_vars` field in [AnnData-usage] + n_vars = function() { + nrow(self$var) + } + ) +) + +#' Convert an `AnnData` to an `ZarrAnnData` +#' +#' Convert another `AnnData` object to an [`ZarrAnnData`] object +#' +#' @param adata An `AnnData` object to be converted to [`ZarrAnnData`] +#' @param file The file name (character) of the `.zarr` file +#' @param compression The compression algorithm to use when writing the +#' Zarr file. Can be one of `"none"`, `"gzip"` or `"lzf"`. Defaults to +#' `"none"`. +#' @param mode The mode to open the Zarr file: +#' +#' * `a` creates a new file or opens an existing one for read/write +#' * `r` opens an existing file for reading +#' * `r+` opens an existing file for read/write +#' * `w` creates a file, truncating any existing ones +#' * `w-`/`x` are synonyms, creating a file and failing if it already exists +#' +#' @return An [`ZarrAnnData`] object with the same data as the input `AnnData` +#' object. +#' @keywords internal +#' +#' @family object converters +#' +# nolint start: object_name_linter +as_ZarrAnnData <- function( + # nolint end: object_name_linter + adata, + file, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") +) { + if (!(inherits(adata, "AbstractAnnData"))) { + cli_abort( + "{.arg adata} must be a {.cls AbstractAnnData} but has class {.cls {class(adata)}}" + ) + } + + mode <- match.arg(mode) + ZarrAnnData$new( + file = file, + X = adata$X, + obs = adata$obs, + var = adata$var, + obsm = adata$obsm, + varm = adata$varm, + layers = adata$layers, + obsp = adata$obsp, + varp = adata$varp, + uns = adata$uns, + shape = adata$shape(), + mode = mode, + compression = compression + ) +} + +# nolint start: object_name_linter +cleanup_ZarrAnnData <- function(...) { + # nolint end: object_name_linter + args <- list(...) + + if ( + !is.null(args$file) && is.character(args$file) && file.exists(args$file) + ) { + cli::cli_alert("Removing file: ", args$file) + unlink(args$file) + } +} diff --git a/R/anndata_constructors.R b/R/anndata_constructors.R index 5b6f3692..ec440df3 100644 --- a/R/anndata_constructors.R +++ b/R/anndata_constructors.R @@ -5,6 +5,7 @@ anndata_constructors <- function() { list( "HDF5AnnData" = HDF5AnnData, "InMemoryAnnData" = InMemoryAnnData, + "ZarrAnnData" = ZarrAnnData, "ReticulateAnnData" = ReticulateAnnData ) } @@ -16,7 +17,12 @@ anndata_constructors <- function() { #' #' @noRd get_anndata_constructor <- function( - class = c("HDF5AnnData", "InMemoryAnnData", "ReticulateAnnData") + class = c( + "HDF5AnnData", + "InMemoryAnnData", + "ZarrAnnData", + "ReticulateAnnData" + ) ) { # TODO: also support directly passing the correct class? class <- match.arg(class) diff --git a/R/as_AnnData.R b/R/as_AnnData.R index bb956c35..c5ca3970 100644 --- a/R/as_AnnData.R +++ b/R/as_AnnData.R @@ -175,7 +175,12 @@ as_AnnData <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c( + "InMemory", + "HDF5AnnData", + "ZarrAnnData", + "ReticulateAnnData" + ), ... ) { UseMethod("as_AnnData", x) @@ -195,7 +200,12 @@ as_AnnData.SingleCellExperiment <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c( + "InMemory", + "HDF5AnnData", + "ZarrAnnData", + "ReticulateAnnData" + ), ... ) { from_SingleCellExperiment( @@ -228,7 +238,12 @@ as_AnnData.Seurat <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c( + "InMemory", + "HDF5AnnData", + "ZarrAnnData", + "ReticulateAnnData" + ), ... ) { from_Seurat( diff --git a/R/from_Seurat.R b/R/from_Seurat.R index a40d110b..e6fa29f5 100644 --- a/R/from_Seurat.R +++ b/R/from_Seurat.R @@ -30,7 +30,12 @@ from_Seurat <- function( obsp_mapping = TRUE, varp_mapping = TRUE, uns_mapping = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c( + "InMemory", + "HDF5AnnData", + "ZarrAnnData", + "ReticulateAnnData" + ), ... ) { check_requires("Converting Seurat to AnnData", c("SeuratObject", "Seurat")) diff --git a/R/from_SingleCellExperiment.R b/R/from_SingleCellExperiment.R index 88a5e6a2..abf1d988 100644 --- a/R/from_SingleCellExperiment.R +++ b/R/from_SingleCellExperiment.R @@ -28,7 +28,12 @@ from_SingleCellExperiment <- function( obsp_mapping = TRUE, varp_mapping = TRUE, uns_mapping = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c( + "InMemory", + "HDF5AnnData", + "ZarrAnnData", + "ReticulateAnnData" + ), ... ) { check_requires( diff --git a/R/read_zarr.R b/R/read_zarr.R new file mode 100644 index 00000000..a3c911bb --- /dev/null +++ b/R/read_zarr.R @@ -0,0 +1,70 @@ +#' Read Zarr +#' +#' Read data from a Zarr store +#' +#' @param path Path to the Zarr store to read +#' @param as The type of object to return. One of: +#' +#' * `"InMemoryAnnData"`: Read the Zarr store into memory as an +#' [`InMemoryAnnData`] object +#' * `"ZarrAnnData"`: Read the Zarr store as an [`ZarrAnnData`] object +#' * `"SingleCellExperiment"`: Read the Zarr store as a +#' [`SingleCellExperiment::SingleCellExperiment`] object +#' * `"Seurat"`: Read the Zarr store as a +#' [`SeuratObject::Seurat`] object +#' @param mode The mode to open the Zarr file. +#' +#' * `a` creates a new file or opens an existing one for read/write. +#' * `r` opens an existing file for reading. +#' * `r+` opens an existing file for read/write. +#' * `w` creates a file, truncating any existing ones. +#' * `w-`/`x` are synonyms, creating a file and failing if it already exists. +#' @param ... Extra arguments provided to the `as_*` conversion function for the +#' object specified by `as` +#' +#' @return The object specified by `as` +#' @export +#' +#' @family AnnData creators +#' +#' @examples +#' zarr_dir <- system.file("extdata", "example.zarr.zip", package = "anndataR") +#' td <- tempdir(check = TRUE) +#' unzip(zarr_dir, exdir = td) +#' zarr_store <- file.path(td, "example.zarr") +#' +#' # Read the Zarr as a SingleCellExperiment object +#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { +#' sce <- read_zarr(zarr_store, as = "SingleCellExperiment") +#' } +#' +#' # Read the Zarr as a Seurat object +#' if (requireNamespace("SeuratObject", quietly = TRUE)) { +#' seurat <- read_zarr(zarr_store, as = "Seurat") +#' } +read_zarr <- function( + path, + as = c("InMemoryAnnData", "ZarrAnnData", "SingleCellExperiment", "Seurat"), + mode = c("r", "r+", "a", "w", "w-", "x"), + ... +) { + as <- match.arg(as) + mode <- match.arg(mode) + + zarr_adata <- ZarrAnnData$new(path, mode = mode) + + if (as == "ZarrAnnData") { + return(zarr_adata) + } + + adata <- switch( + as, + "SingleCellExperiment" = zarr_adata$as_SingleCellExperiment(...), + "Seurat" = zarr_adata$as_Seurat(...), + "InMemoryAnnData" = zarr_adata$as_InMemoryAnnData(...) + ) + + zarr_adata$close() + + adata +} diff --git a/R/read_zarr_helpers.R b/R/read_zarr_helpers.R new file mode 100644 index 00000000..f85842f3 --- /dev/null +++ b/R/read_zarr_helpers.R @@ -0,0 +1,521 @@ +#' Read Zarr encoding +#' +#' Read the encoding and version of an element in a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' +#' @return A named list with names type and version +#' +#' @noRd +read_zarr_encoding <- function(store, name, stop_on_error = TRUE) { + attrs <- Rarr::read_zarr_attributes(file.path(store, name)) + + if (!all(c("encoding-type", "encoding-version") %in% names(attrs))) { + if (stop_on_error) { + stop( + "Encoding attributes not found for element '", + name, + "' " + ) + } else { + return(NULL) + } + } + + list( + type = attrs[["encoding-type"]], + version = attrs[["encoding-version"]] + ) +} + +#' Read Zarr element +#' +#' Read an element from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param type The encoding type of the element to read +#' @param version The encoding version of the element to read +#' @param stop_on_error Whether to stop on error or generate a warning instead +#' @param ... Extra arguments passed to individual reading functions +#' +#' @details +#' Encoding is automatically determined from the element using +#' `read_zarr_encoding` and used to select the appropriate reading function. +#' +#' @return Value depending on the encoding +#' +#' @noRd +read_zarr_element <- function( + store, + name, + type = NULL, + version = NULL, + stop_on_error = FALSE, + ... +) { + if (is.null(type)) { + encoding_list <- read_zarr_encoding( + store, + name, + stop_on_error = stop_on_error + ) + if (is.null(encoding_list)) { + if (stop_on_error) { + stop("No encoding info found for element '", name, "'") + } else { + warning("No encoding found for element '", name, "'") + return(NULL) + } + } + type <- encoding_list$type + version <- encoding_list$version + } + + read_fun <- switch( + type, + "array" = read_zarr_dense_array, + "rec-array" = read_zarr_rec_array, + "csr_matrix" = read_zarr_csr_matrix, + "csc_matrix" = read_zarr_csc_matrix, + "dataframe" = read_zarr_data_frame, + "dict" = read_zarr_mapping, + "string" = read_zarr_string_scalar, + "numeric-scalar" = read_zarr_numeric_scalar, + "categorical" = read_zarr_categorical, + "string-array" = read_zarr_string_array, + "nullable-integer" = read_zarr_nullable_integer, + "nullable-boolean" = read_zarr_nullable_boolean, + stop( + "No function for reading H5AD encoding '", + type, + "' for element '", + name, + "'" + ) + ) + + tryCatch( + { + read_fun(store = store, name = name, version = version, ...) + }, + error = function(e) { + message <- paste0( + "Error reading element '", + name, + "' of type '", + type, + "':\n", + conditionMessage(e) + ) + if (stop_on_error) { + stop(message) + } else { + warning(message) + NULL + } + } + ) +} + +read_zarr_array <- function(store, name) { + Rarr::read_zarr_array(file.path(store, name)) +} + +#' Read Zarr dense array +#' +#' Read a dense array from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a matrix or a vector if 1D +#' +#' @noRd +read_zarr_dense_array <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + + # Extract the NestedArray contents as a base R array. + data <- read_zarr_array(store, name) + + # If the array is 1D, explicitly add a dimension + if (is.null(dim(data))) { + data <- as.vector(data) + dim(data) <- length(data) + } + + # Reverse {rhdf5} coercion to factors + if (is.factor(data) && all(levels(data) %in% c("TRUE", "FALSE"))) { + dims <- dim(data) + data <- as.logical(data) + dim(data) <- dims + } + + data +} + +read_zarr_csr_matrix <- function(store, name, version) { + read_zarr_sparse_array( + store = store, + name = name, + version = version, + type = "csr_matrix" + ) +} + +read_zarr_csc_matrix <- function(store, name, version) { + read_zarr_sparse_array( + store = store, + name = name, + version = version, + type = "csc_matrix" + ) +} + +#' Read Zarr sparse array +#' +#' Read a sparse array from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' @param type Type of the sparse matrix, either "csr_matrix" or "csc_matrix" +#' +#' @return a sparse matrix/DelayedArray???, or a vector if 1D +#' @importFrom Matrix sparseMatrix +#' +#' @noRd +read_zarr_sparse_array <- function( + store, + name, + version = "0.1.0", + type = c("csr_matrix", "csc_matrix") +) { + version <- match.arg(version) + type <- match.arg(type) + + attrs <- Rarr::read_zarr_attributes(file.path(store, name)) + + data <- as.vector(read_zarr_array(store, paste0(name, "/data"))) + indices <- as.vector(read_zarr_array(store, paste0(name, "/indices"))) + indptr <- as.vector(read_zarr_array(store, paste0(name, "/indptr"))) + shape <- as.vector(unlist(attrs$shape, use.names = FALSE)) + + if (type == "csc_matrix") { + mtx <- Matrix::sparseMatrix( + i = indices, + p = indptr, + x = data, + dims = shape, + repr = "C", + index1 = FALSE + ) + } else if (type == "csr_matrix") { + mtx <- Matrix::sparseMatrix( + j = indices, + p = indptr, + x = data, + dims = shape, + repr = "R", + index1 = FALSE + ) + } + + mtx +} + +#' Read Zarr recarray +#' +#' Read a recarray from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @details +#' A "record array" (recarray) is a Python NumPy array type that contains +#' "fields" that can be indexed using attributes (similar to columns in a +#' spreadsheet). See https://numpy.org/doc/stable/reference/generated/numpy.recarray.html +#' for details. +#' +#' They are used by **scanpy** to score marker gene testing results. +#' +#' @return a named list of 1D arrays +#' +#' @noRd +read_zarr_rec_array <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + + # read list of arrays + field_names <- list.dirs( + path = file.path(store, name), + recursive = FALSE, + full.names = FALSE + ) + setNames( + lapply(field_names, function(x) { + as.vector(Rarr::read_zarr_array(file.path(store, name, x))) + }), + field_names + ) +} + +#' Read Zarr nullable boolean +#' +#' Read a nullable boolean from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a boolean vector +#' +#' @noRd +read_zarr_nullable_boolean <- function(store, name, version = "0.1.0") { + as.logical(read_zarr_nullable(store, name, version)) +} + +#' Read Zarr nullable integer +#' +#' Read a nullable integer from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return an integer vector +#' +#' @noRd +read_zarr_nullable_integer <- function(store, name, version = "0.1.0") { + as.integer(read_zarr_nullable(store, name, version)) +} + +#' Read Zarr nullable +#' +#' Read a nullable vector (boolean or integer) from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a nullable vector +#' +#' @noRd +read_zarr_nullable <- function(store, name, version = "0.1.0") { + version <- match.arg(version) + + mask <- read_zarr_array(store, paste0(name, "/mask")) + values <- read_zarr_array(store, paste0(name, "/values")) + + # Get values and set missing + element <- values + element[mask] <- NA + + element +} + +#' Read Zarr string array +#' +#' Read a string array from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a character vector/matrix +#' +#' @noRd +read_zarr_string_array <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + data <- read_zarr_array(store, name) + + if (is.null(dim(data)) || length(dim(data)) == 1) { + data <- as.vector(data) + dim(data) <- length(data) + } + + # convert "NA" to NA (as in rhdf5:::.h5postProcessDataset) + data[data == "NA"] <- NA + + data +} + +#' Read Zarr categorical +#' +#' Read a categorical from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a factor +#' +#' @noRd +read_zarr_categorical <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + + codes <- read_zarr_array(store, paste0(name, "/codes")) + categories <- read_zarr_array(store, paste0(name, "/categories")) + + # Get codes and convert to 1-based indexing + codes <- codes + 1 + + if (!length(dim(codes)) == 1) { + stop( + "There is currently no support for multidimensional categorical arrays" + ) + } + + # Set missing values + codes[codes == 0] <- NA + + levels <- categories + + attributes <- Rarr::read_zarr_attributes(file.path(store, name)) + ordered <- attributes[["ordered"]] + if (is.null(ordered) || is.na(ordered)) { + warning( + "Unable to determine if categorical '", + name, + "' is ordered, assuming it isn't" + ) + + ordered <- FALSE + } + + factor(codes, labels = levels, ordered = ordered) +} + +#' Read Zarr string scalar +#' +#' Read a string scalar from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a character vector of length 1 +#' +#' @noRd +read_zarr_string_scalar <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + as.character(read_zarr_array(store, name)) +} + +#' Read Zarr numeric scalar +#' +#' Read a numeric scalar from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a numeric vector of length 1 +#' +#' @noRd +read_zarr_numeric_scalar <- function(store, name, version = "0.2.0") { + version <- match.arg(version) + + value <- read_zarr_array(store, name) + + # convert array to vector + value <- as.vector(value) + + value +} + +#' Read Zarr mapping +#' +#' Read a mapping from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' +#' @return a named list +#' +#' @noRd +read_zarr_mapping <- function(store, name, version = "0.1.0") { + version <- match.arg(version) + + columns <- list.dirs( + path = file.path(store, name), + recursive = FALSE, + full.names = FALSE + ) + + # Omit Zarr metadata files from the list of columns. + columns <- columns[!columns %in% c(".zgroup", ".zattrs", ".zarray")] + + read_zarr_collection(store, name, columns) +} + +#' Read Zarr data frame +#' +#' Read a data frame from a Zarr store +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param version Encoding version of the element to read +#' @param include_index Whether or not to include the index as a column +#' +#' @details +#' If `include_index == TRUE` the index stored in the Zarr store is added as a +#' column to output `data.frame` using the defined index name as the column +#' name and this is set as an attribute. If `include_index == FALSE` the index +#' is not provided in the output. In either case row names are not set. +#' +#' @return a data.frame +#' +#' @noRd +read_zarr_data_frame <- function( + store, + name, + include_index = TRUE, + version = "0.2.0" +) { + version <- match.arg(version) + + attrs <- Rarr::read_zarr_attributes(file.path(store, name)) + index_name <- attrs[["_index"]] + column_order <- attrs[["column-order"]] + + index <- read_zarr_element(store, file.path(name, index_name)) + data <- read_zarr_collection(store, name, column_order) + + as.data.frame( + row.names = index, + data, + check.names = FALSE, + fix.empty.names = FALSE + ) +} + +#' Read multiple Zarr datatypes +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param item_names Vector of item names (in order) +#' +#' @return a named list +#' +#' @noRd +read_zarr_collection <- function(store, name, item_names) { + items <- lapply( + item_names, + function(item_name) { + new_name <- paste0(name, "/", item_name) + encoding <- read_zarr_encoding(store, new_name) + read_zarr_element( + store = store, + name = new_name, + type = encoding$type, + version = encoding$version + ) + } + ) + names(items) <- item_names + items +} diff --git a/R/write_zarr.R b/R/write_zarr.R new file mode 100644 index 00000000..0be962c4 --- /dev/null +++ b/R/write_zarr.R @@ -0,0 +1,122 @@ +#' Write Zarr +#' +#' Write an Zarr file +#' +#' @param object The object to write, either a +#' [`SingleCellExperiment::SingleCellExperiment`] or a +#' [`SeuratObject::Seurat`] object +#' @param path Path of the file to write to +#' @param compression The compression algorithm to use when writing the Zarr +#' file. Can be one of `"none"` or `"gzip"`. Defaults to `"none"`. +#' @param mode The mode to open the Zarr file. +#' +#' * `a` creates a new file or opens an existing one for read/write +#' * `r+` opens an existing file for read/write +#' * `w` creates a file, truncating any existing ones +#' * `w-`/`x` are synonyms creating a file and failing if it already exists +#' @param ... Additional arguments passed to [as_AnnData()] +#' +#' @details +#' +#' ## Compression +#' +#' Compression is currently not supported for Boolean arrays, they will be +#' written uncompressed. +#' +#' ## `NULL` values +#' +#' For compatibility with changes in Python **anndata** 0.12.0, `NULL` values +#' in `uns` are written to Zarr files as a `NULL` dataset (instead of not being +#' written at all). To disable this behaviour, set +#' `option(anndataR.write_null = FALSE)`. This may be required to allow the file +#' to be read by older versions of Python **anndata**. +#' +#' @return `path` invisibly +#' @export +#' +#' @examples +#' adata <- AnnData( +#' X = matrix(1:5, 3L, 5L), +#' layers = list( +#' A = matrix(5:1, 3L, 5L), +#' B = matrix(letters[1:5], 3L, 5L) +#' ), +#' obs = data.frame(row.names = LETTERS[1:3], cell = 1:3), +#' var = data.frame(row.names = letters[1:5], gene = 1:5) +#' ) +#' zarr_store <- tempfile(fileext = ".zarr") +#' adata$write_zarr(zarr_store) +#' +#' # Write a SingleCellExperiment as an Zarr +#' if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { +#' ncells <- 100 +#' counts <- matrix(rpois(20000, 5), ncol = ncells) +#' logcounts <- log2(counts + 1) +#' +#' pca <- matrix(runif(ncells * 5), ncells) +#' tsne <- matrix(rnorm(ncells * 2), ncells) +#' +#' sce <- SingleCellExperiment::SingleCellExperiment( +#' assays = list(counts = counts, logcounts = logcounts), +#' reducedDims = list(PCA = pca, tSNE = tsne) +#' ) +#' +#' adata <- as_AnnData(sce) +#' zarr_store <- tempfile(fileext = ".zarr") +#' adata$write_zarr(zarr_store) +#' } +#' +#' # Write a Seurat as a Zarr +#' if (requireNamespace("Seurat", quietly = TRUE)) { +#' library(Seurat) +#' +#' counts <- matrix(1:15, 5L, 3L) +#' dimnames(counts) <- list( +#' LETTERS[1:5], +#' letters[1:3] +#' ) +#' cell.metadata <- data.frame( +#' row.names = letters[1:3], +#' cell = 1:3 +#' ) +#' obj <- CreateSeuratObject(counts, meta.data = cell.metadata) +#' gene.metadata <- data.frame( +#' row.names = LETTERS[1:5], +#' gene = 1:5 +#' ) +#' obj[["RNA"]] <- AddMetaData(GetAssay(obj), gene.metadata) +#' +#' adata <- as_AnnData(obj) +#' zarr_store <- tempfile(fileext = ".zarr") +#' adata$write_zarr(zarr_store) +#' } +write_zarr <- function( + object, + path, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x"), + ... +) { + mode <- match.arg(mode) + adata <- if (inherits(object, "AbstractAnnData")) { + object$as_ZarrAnnData( + path, + compression = compression, + mode = mode + ) + } else { + as_AnnData( + object, + output_class = "ZarrAnnData", + file = path, + compression = compression, + mode = mode, + ... + ) + } + + rm(adata) + gc() + + invisible(path) +} diff --git a/R/write_zarr_helpers.R b/R/write_zarr_helpers.R new file mode 100644 index 00000000..6773a6b7 --- /dev/null +++ b/R/write_zarr_helpers.R @@ -0,0 +1,697 @@ +#' Write Zarr element +#' +#' Write an element to a Zarr store +#' +#' @param value The value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"` or `"gzip"`. Defaults to `"none"`. +#' #' @param stop_on_error Whether to stop on error or generate a warning instead +#' @param ... Additional arguments passed to writing functions +#' +#' @noRd +#' +#' @details +#' `write_zarr_element()` should always be used instead of any of the specific +#' writing functions as it contains additional boilerplate to make sure +#' elements are written correctly. +write_zarr_element <- function( + value, + store, + name, + compression = c("none", "gzip"), + stop_on_error = FALSE, + ... +) { + compression <- match.arg(compression) + + # Sparse matrices + write_fun <- + if (inherits(value, "sparseMatrix")) { + # Sparse matrices + write_zarr_sparse_array + } else if (is.factor(value)) { + # Categoricals + write_zarr_categorical + } else if (is.list(value)) { + # Lists and data frames + if (is.data.frame(value)) { + write_zarr_data_frame + } else { + write_zarr_mapping + } + } else if (is.character(value)) { + # Character values + if (length(value) == 1 && !is.matrix(value)) { + write_zarr_string_scalar + } else { + write_zarr_string_array + } + } else if (is.numeric(value) || inherits(value, "denseMatrix")) { + # Numeric values + if (length(value) == 1 && !is.matrix(value)) { + write_zarr_numeric_scalar + } else if (is.integer(value) && any(is.na(value))) { + write_zarr_nullable_integer + } else { + write_zarr_dense_array + } + } else if (is.logical(value)) { + # Logical values + if (any(is.na(value))) { + write_zarr_nullable_boolean + } else if (length(value) == 1) { + # Single Booleans should be written as numeric scalars + write_zarr_numeric_scalar + } else { + write_zarr_dense_array + } + } else { + # Fail if unknown + stop( + "Writing '", + class(value), + "' objects to Zarr stores is not supported" + ) + } + + tryCatch( + { + write_fun( + value = value, + store = store, + name = name, + compression = compression, + ... + ) + }, + error = function(e) { + message <- paste0( + "Could not write element '", + name, + "' of type '", + class(value), + "':\n", + conditionMessage(e) + ) + if (stop_on_error) { + stop(message) + } else { + warning(message) + NULL + } + } + ) +} + +#' Write Zarr encoding +#' +#' Write Zarr encoding attributes to an element in a Zarr store +#' +#' @noRd +#' +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param encoding The encoding type to set +#' @param version The encoding version to set +write_zarr_encoding <- function(store, name, encoding, version) { + Rarr::write_zarr_attributes( + file.path(store, name), + new.zattrs = list(`encoding-type` = encoding, `encoding-version` = version) + ) +} + +#' Write Zarr dense array +#' +#' Write a dense array to a Zarr store +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +#' +#' @noRd +write_zarr_dense_array <- function( + value, + store, + name, + compression, + version = "0.2.0", + chunks = TRUE +) { + version <- match.arg(version) + + # matrices of type 'dgeMatrix' can simply be converted to a matrix + if (inherits(value, "denseMatrix")) { + value <- as.matrix(value) + } + + zarr_write_compressed( + store, + name, + value, + compression, + chunks = chunks + ) + + # Write attributes + write_zarr_encoding(store, name, "array", version) +} + +#' Write Zarr sparse array +#' +#' Write a sparse array to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_sparse_array <- function( + value, + store, + name, + compression, + version = "0.1.0" +) { + version <- match.arg(version) + + # check types + stopifnot(inherits(value, "sparseMatrix")) + + if (inherits(value, "RsparseMatrix")) { + type <- "csr_matrix" + indices_attr <- "j" + } else if (inherits(value, "CsparseMatrix")) { + type <- "csc_matrix" + indices_attr <- "i" + } else { + stop( + "Unsupported matrix format in ", + name, + ".", + "Supported formats are RsparseMatrix and CsparseMatrix", + "(and objects that inherit from those)." + ) + } + + # Write sparse matrix + create_zarr_group(store, name) + zarr_write_compressed( + store, + paste0(name, "/indices"), + attr(value, indices_attr), + compression + ) + zarr_write_compressed( + store, + paste0(name, "/indptr"), + value@p, + compression + ) + zarr_write_compressed( + store, + paste0(name, "/data"), + value@x, + compression + ) + + # Add encoding + write_zarr_encoding(store, name, type, version) + + # Write shape attribute + Rarr::write_zarr_attributes(file.path(store, name), list(shape = dim(value))) +} + +#' Write Zarr nullable boolean +#' +#' Write a nullable boolean to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_nullable_boolean <- function( + value, + store, + name, + compression, + version = "0.1.0" +) { + # write mask and values + create_zarr_group(store, name) + value_no_na <- value + value_no_na[is.na(value_no_na)] <- FALSE + + zarr_write_compressed( + store, + paste0(name, "/values"), + value_no_na, + compression + ) + zarr_write_compressed( + store, + paste0(name, "/mask"), + is.na(value), + compression + ) + + # Write attributes + write_zarr_encoding(store, name, "nullable-boolean", version) +} + +#' Write Zarr nullable integer +#' +#' Write a nullable integer to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_nullable_integer <- function( + value, + store, + name, + compression, + version = "0.1.0" +) { + # write mask and values + create_zarr_group(store, name) + value_no_na <- value + value_no_na[is.na(value_no_na)] <- -1L + + zarr_write_compressed( + store, + paste0(name, "/values"), + value_no_na, + compression + ) + zarr_write_compressed( + store, + paste0(name, "/mask"), + is.na(value), + compression + ) + + # Write attributes + write_zarr_encoding(store, name, "nullable-integer", version) +} + +#' Write Zarr string array +#' +#' Write a string array to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_string_array <- function( + value, + store, + name, + compression, + version = "0.2.0" +) { + if (!is.null(dim(value))) { + dims <- dim(value) + } else { + dims <- length(value) + } + + # replace NA to "NA" (as in rhdf5:::.h5postProcessDataset) + # to read as "NA" -> NA later after Rarr:read_zarr_array + value[is.na(value)] <- "NA" + + data <- array(data = value, dim = dims) + Rarr::write_zarr_array( + data, + zarr_array_path = file.path(store, name), + chunk_dim = dims, + compressor = .get_compressor(compression) + ) + + write_zarr_encoding(store, name, "string-array", version) +} + +#' Write Zarr categorical +#' +#' Write a categorical to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_categorical <- function( + value, + store, + name, + compression, + version = "0.2.0" +) { + create_zarr_group(store, name) + + categories <- levels(value) + + # Use zero-indexed values + codes <- as.integer(value) - 1L + + # Set missing values to -1 + codes[is.na(codes)] <- -1L + + # write values to file + write_zarr_string_array( + categories, + store, + paste0(name, "/categories"), + compression + ) + write_zarr_dense_array(codes, store, paste0(name, "/codes"), compression) + + # Write encoding + write_zarr_encoding( + store = store, + name = name, + encoding = "categorical", + version = version + ) + + # Write ordered attribute + Rarr::write_zarr_attributes( + file.path(store, name), + new.zattrs = list("ordered" = is.ordered(value)) + ) +} + +#' Write Zarr string scalar +#' +#' Write a string scalar to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_string_scalar <- function( + value, + store, + name, + compression, + version = "0.2.0" +) { + # Write scalar + value <- array(data = value, dim = 1) + Rarr::write_zarr_array( + value, + zarr_array_path = file.path(store, name), + chunk_dim = 1, + compressor = .get_compressor(compression) + ) + + # Write attributes + write_zarr_encoding(store, name, "string", version) +} + +#' Write Zarr numeric scalar +#' +#' Write a numeric scalar to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_numeric_scalar <- function( + value, + store, + name, + compression, + version = "0.2.0" +) { + # Write scalar + zarr_write_compressed(store, name, value, compression) + + # Write attributes + write_zarr_encoding(store, name, "numeric-scalar", version) +} + +#' Write Zarr mapping +#' +#' Write a mapping to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param version Encoding version of the element to write +write_zarr_mapping <- function( + value, + store, + name, + compression, + version = "0.1.0" +) { + create_zarr_group(store, name) + + # Write mapping elements + for (key in names(value)) { + write_zarr_element( + value[[key]], + store, + paste0(name, "/", key), + compression + ) + } + + write_zarr_encoding(store, name, "dict", version) +} + +#' Write Zarr data frame +#' +#' Write a data frame to a Zarr store +#' +#' @noRd +#' +#' @param value Value to write +#' @param store A Zarr store instance +#' @param name Name of the element within the Zarr store +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"`, `"gzip"` or `"lzf"`. Defaults to `"none"`. +#' @param index The index to write. Can either be a vector of length equal to +#' the number of rows in `values` or a single character string giving the name +#' of a column in `values`. If `NULL` then `rownames(value)` is used. +#' @param version Encoding version of the element to write +write_zarr_data_frame <- function( + value, + store, + name, + compression, + index = NULL, + version = "0.2.0" +) { + create_zarr_group(store, name) + write_zarr_encoding(store, name, "dataframe", version) + + if (is.null(index)) { + index_name <- "_index" + index_value <- rownames(value) + } else if (length(index) == nrow(value)) { + index_name <- "_index" + index_value <- index + } else if (length(index) == 1 && index %in% colnames(value)) { + index_name <- index + index_value <- value[[index_name]] + value[[index_name]] <- NULL + } else { + stop( + "index must be a vector with length `nrow(value)` or a single character", + "string giving the name of a column in `value`" + ) + } + if (is.null(index_value)) { + index_value <- seq_len(nrow(value)) - 1L + } + + # Write data frame columns + for (col in colnames(value)) { + write_zarr_element( + value[[col]], + store, + paste0(name, "/", col), + compression + ) + } + + # Write index + write_zarr_element( + index_value, + store, + paste0(name, "/", index_name), + compression + ) + + # Write additional data frame attributes + Rarr::write_zarr_attributes( + zarr_path = file.path(store, name), + new.zattrs = list("_index" = index_name) + ) + + # Write additional data frame attributes + col_order <- colnames(value) + col_order <- col_order[col_order != index_name] + # If there are no columns other than the index we set column order to an + # empty numeric vector + if (length(col_order) == 0) { + col_order <- numeric() + } + + Rarr::write_zarr_attributes( + zarr_path = file.path(store, name), + new.zattrs = list(`column-order` = col_order) + ) +} + +#' Write empty Zarr +#' +#' Write a new empty Zarr store +#' +#' @noRd +#' +#' @param store Path to the Zarr store to write +#' @param obs Data frame with observations +#' @param var Data frame with variables +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"` or `"gzip"`. Defaults to `"none"`. +#' @param version The anndata on-disk format version to write +write_empty_zarr <- function( + store, + obs, + var, + compression, + version = "0.1.0" +) { + create_zarr(store = store) + write_zarr_encoding(store, "/", "anndata", "0.1.0") + + write_zarr_element(obs[, integer(0)], store, "/obs", compression) + write_zarr_element(var[, integer(0)], store, "/var", compression) + + create_zarr_group(store, "layers") + write_zarr_encoding(store, "/layers", "dict", "0.1.0") + + create_zarr_group(store, "obsm") + write_zarr_encoding(store, "/obsm", "dict", "0.1.0") + + create_zarr_group(store, "obsp") + write_zarr_encoding(store, "/obsp", "dict", "0.1.0") + + create_zarr_group(store, "uns") + write_zarr_encoding(store, "/uns", "dict", "0.1.0") + + create_zarr_group(store, "varm") + write_zarr_encoding(store, "/varm", "dict", "0.1.0") + + create_zarr_group(store, "varp") + write_zarr_encoding(store, "/varp", "dict", "0.1.0") +} + +#' Zarr path exists +#' +#' Check that a path in Zarr exists +#' +#' @noRd +#' +#' @param store Path to a Zarr store +#' @param target_path The path within the store to test for +#' +#' @return Whether the `target_path` exists in `store` +zarr_path_exists <- function(store, target_path) { + zarr <- file.path(store, target_path) + if (!dir.exists(zarr)) { + FALSE + } else { + list_files <- list.files( + path = zarr, + full.names = FALSE, + recursive = FALSE, + all.files = TRUE + ) + if (any(c(".zarray", ".zattrs", ".zgroup") %in% list_files)) { + TRUE + } else { + FALSE + } + } +} + +#' Zarr write compressed +#' +#' Write Zarr dataset with chosen compression (can be none) +#' +#' @noRd +#' +#' @param store Path to a Zarr store +#' @param name Name of the element within the Zarr store containing the data +#' frame +#' @param value Value to write. Must be a vector to the same length as the data +#' frame. +#' @param compression The compression to use when writing the element. Can be +#' one of `"none"` or `"gzip"`. Defaults to `"none"`. +#' +#' @return Whether the `path` exists in `file` +zarr_write_compressed <- function( + store, + name, + value, + compression = c("none", "gzip"), + chunks = TRUE +) { + compression <- match.arg(compression) + if (!is.null(dim(value))) { + dims <- dim(value) + } else { + dims <- length(value) + } + + data <- array(data = value, dim = dims) + Rarr::write_zarr_array( + data, + zarr_array_path = file.path(store, name), + chunk_dim = dims, + compressor = .get_compressor(compression) + ) +} + +.get_compressor <- function(x) { + switch(x, "none" = NULL, "gzip" = Rarr::use_gzip()) +} diff --git a/README.md b/README.md index 87ef97ee..5cb32d81 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ the task you want to perform. `BiocManager::install("SingleCellExperiment")` - To convert to/from `Seurat` objects, install [SeuratObject](https://cran.r-project.org/package=SeuratObject): `install.packages("SeuratObject")` +- To read/write \*.zarr files, you need to install + [zarr](https://www.bioconductor.org/packages/release/bioc/html/Rarr.html): + `BiocManager::install("Rarr")` Alternatively, you can install all suggested dependencies at once: diff --git a/inst/extdata/example.zarr.zip b/inst/extdata/example.zarr.zip new file mode 100644 index 00000000..8801bb09 Binary files /dev/null and b/inst/extdata/example.zarr.zip differ diff --git a/inst/scripts/example_h5ad.py b/inst/scripts/example_files.py similarity index 85% rename from inst/scripts/example_h5ad.py rename to inst/scripts/example_files.py index 9142caaf..29b9dc4e 100644 --- a/inst/scripts/example_h5ad.py +++ b/inst/scripts/example_files.py @@ -4,8 +4,13 @@ import numpy # numpy v2.2.6 import pandas # pandas v2.3.0 import scipy.sparse # scipy v1.14.1 +import zarr # zarr 2.14.2 -# This script uses Python to create an example H5AD file for testing +import os +import shutil +import zipfile + +# This script uses Python to create example H5AD and Zarr files for testing # interoperability between languages. It is designed to be a small but # relatively complex file that tests reading of different types and data # structures. The standard scanpy workflow has also been applied to populate @@ -17,11 +22,17 @@ # changelog below and format the file using Python Black # (https://black.readthedocs.io/en/stable/). # -# Version: 0.2.0 -# Date: 2023-05-11 +# Version: 0.4.0 +# Date: 2025-11-24 # # CHANGELOG -# +# +# v0.4.0 (2025-11-24) +# - Add zarr example +# - Add requirements.yml +# v0.3.0 (2025-08-04) +# - Add adata.varp["test_varp"] to test reading of varp +# - Update package versions to latest stable versions # v0.3.0 (2025-08-04) # - Add adata.varp["test_varp"] to test reading of varp # - Update package versions to latest stable versions @@ -91,3 +102,12 @@ # Write the H5AD file adata.write_h5ad("inst/extdata/example.h5ad", compression="gzip") + +# Write Zarr +adata.write_zarr("inst/extdata/example2.zarr") +# os.chdir("inst/extdata/") +# zip = zipfile.ZipFile("example.zarr.zip", "w", zipfile.ZIP_DEFLATED) +# zip.write("example.zarr") +# shutil.rmtree("example.zarr") +# zip.close() + diff --git a/inst/scripts/requirements.yml b/inst/scripts/requirements.yml new file mode 100644 index 00000000..9f6dfc75 --- /dev/null +++ b/inst/scripts/requirements.yml @@ -0,0 +1,14 @@ +name: anndatar_example +channels: + - conda-forge + - defaults +dependencies: + - python=3.13.5 + - pip + - pip: + - anndata==0.11.4 + - scanpy==1.11.4 + - numpy==2.2.6 + - pandas==2.3.0 + - scipy==1.14.1 + - zarr==2.14.2 diff --git a/man/AbstractAnnData.Rd b/man/AbstractAnnData.Rd index da4553e8..931af073 100644 --- a/man/AbstractAnnData.Rd +++ b/man/AbstractAnnData.Rd @@ -20,7 +20,8 @@ Other AnnData classes: \code{\link{AnnDataView}}, \code{\link{HDF5AnnData}}, \code{\link{InMemoryAnnData}}, -\code{\link{ReticulateAnnData}} +\code{\link{ReticulateAnnData}}, +\code{\link{ZarrAnnData}} } \concept{AnnData classes} \section{Active bindings}{ @@ -70,7 +71,9 @@ Other AnnData classes: \item \href{#method-AbstractAnnData-as_InMemoryAnnData}{\code{AbstractAnnData$as_InMemoryAnnData()}} \item \href{#method-AbstractAnnData-as_ReticulateAnnData}{\code{AbstractAnnData$as_ReticulateAnnData()}} \item \href{#method-AbstractAnnData-as_HDF5AnnData}{\code{AbstractAnnData$as_HDF5AnnData()}} +\item \href{#method-AbstractAnnData-as_ZarrAnnData}{\code{AbstractAnnData$as_ZarrAnnData()}} \item \href{#method-AbstractAnnData-write_h5ad}{\code{AbstractAnnData$write_h5ad()}} +\item \href{#method-AbstractAnnData-write_zarr}{\code{AbstractAnnData$write_zarr()}} \item \href{#method-AbstractAnnData-clone}{\code{AbstractAnnData$clone()}} } } @@ -352,6 +355,36 @@ An \code{\link{HDF5AnnData}} object } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-AbstractAnnData-as_ZarrAnnData}{}}} +\subsection{Method \code{as_ZarrAnnData()}}{ +Convert to an \code{\link{ZarrAnnData}} + +See \code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}} for more details on the conversion +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{AbstractAnnData$as_ZarrAnnData( + file, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{file}}{See \code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}}} + +\item{\code{compression}}{See \code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}}} + +\item{\code{mode}}{See \code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}}} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +An \code{\link{ZarrAnnData}} object +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-AbstractAnnData-write_h5ad}{}}} \subsection{Method \code{write_h5ad()}}{ @@ -382,6 +415,36 @@ See \code{\link[=write_h5ad]{write_h5ad()}} for details } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-AbstractAnnData-write_zarr}{}}} +\subsection{Method \code{write_zarr()}}{ +Write the \code{AnnData} object to an H5AD file + +See \code{\link[=write_zarr]{write_zarr()}} for details +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{AbstractAnnData$write_zarr( + path, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path}}{See \code{\link[=write_zarr]{write_zarr()}}} + +\item{\code{compression}}{See \code{\link[=write_zarr]{write_zarr()}}} + +\item{\code{mode}}{See \code{\link[=write_zarr]{write_zarr()}}} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +\code{path} invisibly +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-AbstractAnnData-clone}{}}} \subsection{Method \code{clone()}}{ diff --git a/man/AnnData.Rd b/man/AnnData.Rd index f8074138..a1399c7b 100644 --- a/man/AnnData.Rd +++ b/man/AnnData.Rd @@ -66,6 +66,7 @@ adata Other AnnData creators: \code{\link{as_AnnData}()}, -\code{\link{read_h5ad}()} +\code{\link{read_h5ad}()}, +\code{\link{read_zarr}()} } \concept{AnnData creators} diff --git a/man/AnnDataView.Rd b/man/AnnDataView.Rd index b0a7131e..b5740cc4 100644 --- a/man/AnnDataView.Rd +++ b/man/AnnDataView.Rd @@ -34,7 +34,8 @@ Other AnnData classes: \code{\link{AbstractAnnData}}, \code{\link{HDF5AnnData}}, \code{\link{InMemoryAnnData}}, -\code{\link{ReticulateAnnData}} +\code{\link{ReticulateAnnData}}, +\code{\link{ZarrAnnData}} } \concept{AnnData classes} \section{Super class}{ @@ -83,6 +84,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$as_ReticulateAnnData()
  • anndataR::AbstractAnnData$as_Seurat()
  • anndataR::AbstractAnnData$as_SingleCellExperiment()
  • +
  • anndataR::AbstractAnnData$as_ZarrAnnData()
  • anndataR::AbstractAnnData$layers_keys()
  • anndataR::AbstractAnnData$n_obs()
  • anndataR::AbstractAnnData$n_vars()
  • @@ -96,6 +98,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$varm_keys()
  • anndataR::AbstractAnnData$varp_keys()
  • anndataR::AbstractAnnData$write_h5ad()
  • +
  • anndataR::AbstractAnnData$write_zarr()
  • }} diff --git a/man/HDF5AnnData.Rd b/man/HDF5AnnData.Rd index 9176a7b4..679841eb 100644 --- a/man/HDF5AnnData.Rd +++ b/man/HDF5AnnData.Rd @@ -22,7 +22,8 @@ Other AnnData classes: \code{\link{AbstractAnnData}}, \code{\link{AnnDataView}}, \code{\link{InMemoryAnnData}}, -\code{\link{ReticulateAnnData}} +\code{\link{ReticulateAnnData}}, +\code{\link{ZarrAnnData}} } \concept{AnnData classes} \section{Super class}{ @@ -72,6 +73,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$as_ReticulateAnnData()
  • anndataR::AbstractAnnData$as_Seurat()
  • anndataR::AbstractAnnData$as_SingleCellExperiment()
  • +
  • anndataR::AbstractAnnData$as_ZarrAnnData()
  • anndataR::AbstractAnnData$layers_keys()
  • anndataR::AbstractAnnData$obs_keys()
  • anndataR::AbstractAnnData$obsm_keys()
  • @@ -83,6 +85,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$varm_keys()
  • anndataR::AbstractAnnData$varp_keys()
  • anndataR::AbstractAnnData$write_h5ad()
  • +
  • anndataR::AbstractAnnData$write_zarr()
  • }} diff --git a/man/InMemoryAnnData.Rd b/man/InMemoryAnnData.Rd index d397cb6e..5be3f56b 100644 --- a/man/InMemoryAnnData.Rd +++ b/man/InMemoryAnnData.Rd @@ -41,7 +41,8 @@ Other AnnData classes: \code{\link{AbstractAnnData}}, \code{\link{AnnDataView}}, \code{\link{HDF5AnnData}}, -\code{\link{ReticulateAnnData}} +\code{\link{ReticulateAnnData}}, +\code{\link{ZarrAnnData}} } \concept{AnnData classes} \section{Super class}{ @@ -89,6 +90,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$as_ReticulateAnnData()
  • anndataR::AbstractAnnData$as_Seurat()
  • anndataR::AbstractAnnData$as_SingleCellExperiment()
  • +
  • anndataR::AbstractAnnData$as_ZarrAnnData()
  • anndataR::AbstractAnnData$layers_keys()
  • anndataR::AbstractAnnData$n_obs()
  • anndataR::AbstractAnnData$n_vars()
  • @@ -102,6 +104,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$varm_keys()
  • anndataR::AbstractAnnData$varp_keys()
  • anndataR::AbstractAnnData$write_h5ad()
  • +
  • anndataR::AbstractAnnData$write_zarr()
  • }} diff --git a/man/ReticulateAnnData.Rd b/man/ReticulateAnnData.Rd index 2b5ac2cc..8d4a3daa 100644 --- a/man/ReticulateAnnData.Rd +++ b/man/ReticulateAnnData.Rd @@ -22,7 +22,8 @@ Other AnnData classes: \code{\link{AbstractAnnData}}, \code{\link{AnnDataView}}, \code{\link{HDF5AnnData}}, -\code{\link{InMemoryAnnData}} +\code{\link{InMemoryAnnData}}, +\code{\link{ZarrAnnData}} } \concept{AnnData classes} \section{Super class}{ @@ -72,6 +73,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$as_ReticulateAnnData()
  • anndataR::AbstractAnnData$as_Seurat()
  • anndataR::AbstractAnnData$as_SingleCellExperiment()
  • +
  • anndataR::AbstractAnnData$as_ZarrAnnData()
  • anndataR::AbstractAnnData$layers_keys()
  • anndataR::AbstractAnnData$obs_keys()
  • anndataR::AbstractAnnData$obsm_keys()
  • @@ -83,6 +85,7 @@ Other AnnData classes:
  • anndataR::AbstractAnnData$varm_keys()
  • anndataR::AbstractAnnData$varp_keys()
  • anndataR::AbstractAnnData$write_h5ad()
  • +
  • anndataR::AbstractAnnData$write_zarr()
  • }} diff --git a/man/ZarrAnnData.Rd b/man/ZarrAnnData.Rd new file mode 100644 index 00000000..bd4902b2 --- /dev/null +++ b/man/ZarrAnnData.Rd @@ -0,0 +1,191 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZarrAnnData.R +\name{ZarrAnnData} +\alias{ZarrAnnData} +\title{ZarrAnnData} +\value{ +An \code{ZarrAnnData} object +} +\description{ +Implementation of an Zarr-backed \code{AnnData} object. This class provides an +interface to a Zarr file and minimal data is stored in memory until it is +requested by the user. It is primarily designed as an intermediate object +when reading/writing Zarr files but can be useful for accessing parts of +large files. + +See \link{AnnData-usage} for details on creating and using \code{AnnData} objects. +} +\seealso{ +\link{AnnData-usage} for details on creating and using \code{AnnData} objects + +Other AnnData classes: +\code{\link{AbstractAnnData}}, +\code{\link{AnnDataView}}, +\code{\link{HDF5AnnData}}, +\code{\link{InMemoryAnnData}}, +\code{\link{ReticulateAnnData}} +} +\concept{AnnData classes} +\section{Super class}{ +\code{\link[anndataR:AbstractAnnData]{anndataR::AbstractAnnData}} -> \code{ZarrAnnData} +} +\section{Active bindings}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{X}}{See \link{AnnData-usage}} + +\item{\code{layers}}{See \link{AnnData-usage}} + +\item{\code{obsm}}{See \link{AnnData-usage}} + +\item{\code{varm}}{See \link{AnnData-usage}} + +\item{\code{obsp}}{See \link{AnnData-usage}} + +\item{\code{varp}}{See \link{AnnData-usage}} + +\item{\code{obs}}{See \link{AnnData-usage}} + +\item{\code{var}}{See \link{AnnData-usage}} + +\item{\code{obs_names}}{See \link{AnnData-usage}} + +\item{\code{var_names}}{See \link{AnnData-usage}} + +\item{\code{uns}}{See \link{AnnData-usage}} +} +\if{html}{\out{
    }} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-ZarrAnnData-new}{\code{ZarrAnnData$new()}} +\item \href{#method-ZarrAnnData-close}{\code{ZarrAnnData$close()}} +\item \href{#method-ZarrAnnData-n_obs}{\code{ZarrAnnData$n_obs()}} +\item \href{#method-ZarrAnnData-n_vars}{\code{ZarrAnnData$n_vars()}} +} +} +\if{html}{\out{ +
    Inherited methods + +
    +}} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZarrAnnData-new}{}}} +\subsection{Method \code{new()}}{ +Close the Zarr file when the object is garbage collected + + +\code{ZarrAnnData} constructor +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ZarrAnnData$new( + file, + X = NULL, + obs = NULL, + var = NULL, + layers = NULL, + obsm = NULL, + varm = NULL, + obsp = NULL, + varp = NULL, + uns = NULL, + shape = NULL, + mode = c("a", "r", "r+", "w", "w-", "x"), + compression = c("none", "gzip") +)}\if{html}{\out{
    }} +} + +\subsection{Arguments}{ +\if{html}{\out{
    }} +\describe{ +\item{\code{file}}{The file name (character) of the \code{.zarr} file. If this file +already exits, other arguments must be \code{NULL}.} + +\item{\code{X}}{See the \code{X} slot in \link{AnnData-usage}} + +\item{\code{obs}}{See the \code{obs} slot in \link{AnnData-usage}} + +\item{\code{var}}{See the \code{var} slot in \link{AnnData-usage}} + +\item{\code{layers}}{See the \code{layers} slot in \link{AnnData-usage}} + +\item{\code{obsm}}{See the \code{obsm} slot in \link{AnnData-usage}} + +\item{\code{varm}}{See the \code{varm} slot in \link{AnnData-usage}} + +\item{\code{obsp}}{See the \code{obsp} slot in \link{AnnData-usage}} + +\item{\code{varp}}{See the \code{varp} slot in \link{AnnData-usage}} + +\item{\code{uns}}{See the \code{uns} slot in \link{AnnData-usage}} + +\item{\code{shape}}{Shape tuple (e.g. \code{c(n_obs, n_vars)}). Can be provided if +both \code{X} or \code{obs} and \code{var} are not provided.} + +\item{\code{mode}}{The mode to open the Zarr file. See \code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}} for +details} + +\item{\code{compression}}{The compression algorithm to use. See +\code{\link[=as_ZarrAnnData]{as_ZarrAnnData()}} for details} +} +\if{html}{\out{
    }} +} +\subsection{Details}{ +The constructor creates a new Zarr \code{AnnData} interface object. This can +either be used to either connect to an existing \code{.zarr} file or to +create a new one. If any additional slot arguments are set an existing +file will be overwritten. +} + +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZarrAnnData-close}{}}} +\subsection{Method \code{close()}}{ +Close the Zarr store/file +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ZarrAnnData$close()}\if{html}{\out{
    }} +} + +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZarrAnnData-n_obs}{}}} +\subsection{Method \code{n_obs()}}{ +See the \code{n_obs} field in \link{AnnData-usage} +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ZarrAnnData$n_obs()}\if{html}{\out{
    }} +} + +} +\if{html}{\out{
    }} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-ZarrAnnData-n_vars}{}}} +\subsection{Method \code{n_vars()}}{ +See the \code{n_vars} field in \link{AnnData-usage} +\subsection{Usage}{ +\if{html}{\out{
    }}\preformatted{ZarrAnnData$n_vars()}\if{html}{\out{
    }} +} + +} +} diff --git a/man/anndataR-package.Rd b/man/anndataR-package.Rd index e14408b8..c7230981 100644 --- a/man/anndataR-package.Rd +++ b/man/anndataR-package.Rd @@ -35,7 +35,7 @@ Other contributors: \item Isaac Virshup (\href{https://orcid.org/0000-0002-1710-8945}{ORCID}) (ivirshup) [contributor] \item Brian Schilder \email{brian_schilder@alumni.brown.edu} (\href{https://orcid.org/0000-0001-5949-2191}{ORCID}) (bschilder) [contributor] \item Chananchida Sang-aram (\href{https://orcid.org/0000-0002-0922-0822}{ORCID}) (csangara) [contributor] - \item Data Intuitive \email{info@data-intuitive.com} [funder, copyright holder] + \item Data Intuitive \email{info@data-intuitive.com} [funder] \item Chan Zuckerberg Initiative [funder] \item scverse consortium [sponsor] } diff --git a/man/as_AnnData.Rd b/man/as_AnnData.Rd index 2d71f80f..2d5901d8 100644 --- a/man/as_AnnData.Rd +++ b/man/as_AnnData.Rd @@ -18,7 +18,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemory", "HDF5AnnData", "ZarrAnnData", "ReticulateAnnData"), ... ) @@ -34,7 +34,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemory", "HDF5AnnData", "ZarrAnnData", "ReticulateAnnData"), ... ) @@ -50,7 +50,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemory", "HDF5AnnData", "ZarrAnnData", "ReticulateAnnData"), ... ) } @@ -231,7 +231,8 @@ as_AnnData(sce) \seealso{ Other AnnData creators: \code{\link{AnnData}()}, -\code{\link{read_h5ad}()} +\code{\link{read_h5ad}()}, +\code{\link{read_zarr}()} Other object converters: \code{\link{as_HDF5AnnData}()}, @@ -239,6 +240,7 @@ Other object converters: \code{\link{as_ReticulateAnnData}()}, \code{\link{as_Seurat}()}, \code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{AnnData creators} diff --git a/man/as_HDF5AnnData.Rd b/man/as_HDF5AnnData.Rd index 2cced825..d64fd45a 100644 --- a/man/as_HDF5AnnData.Rd +++ b/man/as_HDF5AnnData.Rd @@ -43,6 +43,7 @@ Other object converters: \code{\link{as_ReticulateAnnData}()}, \code{\link{as_Seurat}()}, \code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{object converters} diff --git a/man/as_InMemoryAnnData.Rd b/man/as_InMemoryAnnData.Rd index c1e0108a..d803276e 100644 --- a/man/as_InMemoryAnnData.Rd +++ b/man/as_InMemoryAnnData.Rd @@ -35,6 +35,7 @@ Other object converters: \code{\link{as_ReticulateAnnData}()}, \code{\link{as_Seurat}()}, \code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{object converters} diff --git a/man/as_ReticulateAnnData.Rd b/man/as_ReticulateAnnData.Rd index 79a75ada..d62660c0 100644 --- a/man/as_ReticulateAnnData.Rd +++ b/man/as_ReticulateAnnData.Rd @@ -41,6 +41,7 @@ Other object converters: \code{\link{as_InMemoryAnnData}()}, \code{\link{as_Seurat}()}, \code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{object converters} diff --git a/man/as_Seurat.Rd b/man/as_Seurat.Rd index 9da4e682..dfafff57 100644 --- a/man/as_Seurat.Rd +++ b/man/as_Seurat.Rd @@ -159,6 +159,7 @@ Other object converters: \code{\link{as_InMemoryAnnData}()}, \code{\link{as_ReticulateAnnData}()}, \code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{object converters} diff --git a/man/as_SingleCellExperiment.Rd b/man/as_SingleCellExperiment.Rd index 6058f00e..c6b4d4fc 100644 --- a/man/as_SingleCellExperiment.Rd +++ b/man/as_SingleCellExperiment.Rd @@ -165,6 +165,7 @@ Other object converters: \code{\link{as_InMemoryAnnData}()}, \code{\link{as_ReticulateAnnData}()}, \code{\link{as_Seurat}()}, +\code{\link{as_ZarrAnnData}()}, \code{\link{reticulate-helpers}} } \concept{object converters} diff --git a/man/as_ZarrAnnData.Rd b/man/as_ZarrAnnData.Rd new file mode 100644 index 00000000..841cd29e --- /dev/null +++ b/man/as_ZarrAnnData.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ZarrAnnData.R +\name{as_ZarrAnnData} +\alias{as_ZarrAnnData} +\title{Convert an \code{AnnData} to an \code{ZarrAnnData}} +\usage{ +as_ZarrAnnData( + adata, + file, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x") +) +} +\arguments{ +\item{adata}{An \code{AnnData} object to be converted to \code{\link{ZarrAnnData}}} + +\item{file}{The file name (character) of the \code{.zarr} file} + +\item{compression}{The compression algorithm to use when writing the +Zarr file. Can be one of \code{"none"}, \code{"gzip"} or \code{"lzf"}. Defaults to +\code{"none"}.} + +\item{mode}{The mode to open the Zarr file: +\itemize{ +\item \code{a} creates a new file or opens an existing one for read/write +\item \code{r} opens an existing file for reading +\item \verb{r+} opens an existing file for read/write +\item \code{w} creates a file, truncating any existing ones +\item \verb{w-}/\code{x} are synonyms, creating a file and failing if it already exists +}} +} +\value{ +An \code{\link{ZarrAnnData}} object with the same data as the input \code{AnnData} +object. +} +\description{ +Convert another \code{AnnData} object to an \code{\link{ZarrAnnData}} object +} +\seealso{ +Other object converters: +\code{\link{as_AnnData}()}, +\code{\link{as_HDF5AnnData}()}, +\code{\link{as_InMemoryAnnData}()}, +\code{\link{as_ReticulateAnnData}()}, +\code{\link{as_Seurat}()}, +\code{\link{as_SingleCellExperiment}()}, +\code{\link{reticulate-helpers}} +} +\concept{object converters} +\keyword{internal} diff --git a/man/create_zarr.Rd b/man/create_zarr.Rd new file mode 100644 index 00000000..f4a16f5f --- /dev/null +++ b/man/create_zarr.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Rarr_utils.R +\name{create_zarr} +\alias{create_zarr} +\title{create_zarr} +\usage{ +create_zarr(store, version = "v2") +} +\arguments{ +\item{store}{the location of zarr store} + +\item{version}{zarr version} +} +\value{ +\code{NULL} +} +\description{ +create zarr store +} +\examples{ +store <- tempfile(fileext = ".zarr") +create_zarr(store) +} diff --git a/man/create_zarr_group.Rd b/man/create_zarr_group.Rd new file mode 100644 index 00000000..4afc722a --- /dev/null +++ b/man/create_zarr_group.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Rarr_utils.R +\name{create_zarr_group} +\alias{create_zarr_group} +\title{create_zarr_group} +\usage{ +create_zarr_group(store, name, version = "v2") +} +\arguments{ +\item{store}{the location of (zarr) store} + +\item{name}{name of the group} + +\item{version}{zarr version} +} +\value{ +\code{NULL} +} +\description{ +create zarr groups +} +\examples{ +store <- tempfile(fileext = ".zarr") +create_zarr(store) +create_zarr_group(store, "gp") +} diff --git a/man/is_zarr_empty.Rd b/man/is_zarr_empty.Rd new file mode 100644 index 00000000..8c31fc0e --- /dev/null +++ b/man/is_zarr_empty.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Rarr_utils.R +\name{is_zarr_empty} +\alias{is_zarr_empty} +\title{create_zarr} +\usage{ +is_zarr_empty(store) +} +\arguments{ +\item{store}{the location of zarr store} +} +\value{ +returns TRUE if zarr store is not empty +} +\description{ +create zarr store +} +\examples{ +store <- tempfile(fileext = ".zarr") +create_zarr(store) +is_zarr_empty(store) +} diff --git a/man/read_h5ad.Rd b/man/read_h5ad.Rd index fe9bd5bd..b6aaed24 100644 --- a/man/read_h5ad.Rd +++ b/man/read_h5ad.Rd @@ -59,6 +59,7 @@ if (requireNamespace("SeuratObject", quietly = TRUE)) { \seealso{ Other AnnData creators: \code{\link{AnnData}()}, -\code{\link{as_AnnData}()} +\code{\link{as_AnnData}()}, +\code{\link{read_zarr}()} } \concept{AnnData creators} diff --git a/man/read_zarr.Rd b/man/read_zarr.Rd new file mode 100644 index 00000000..602da2c3 --- /dev/null +++ b/man/read_zarr.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_zarr.R +\name{read_zarr} +\alias{read_zarr} +\title{Read Zarr} +\usage{ +read_zarr( + path, + as = c("InMemoryAnnData", "ZarrAnnData", "SingleCellExperiment", "Seurat"), + mode = c("r", "r+", "a", "w", "w-", "x"), + ... +) +} +\arguments{ +\item{path}{Path to the Zarr store to read} + +\item{as}{The type of object to return. One of: +\itemize{ +\item \code{"InMemoryAnnData"}: Read the Zarr store into memory as an +\code{\link{InMemoryAnnData}} object +\item \code{"ZarrAnnData"}: Read the Zarr store as an \code{\link{ZarrAnnData}} object +\item \code{"SingleCellExperiment"}: Read the Zarr store as a +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment::SingleCellExperiment}} object +\item \code{"Seurat"}: Read the Zarr store as a +\code{\link[SeuratObject:Seurat-class]{SeuratObject::Seurat}} object +}} + +\item{mode}{The mode to open the Zarr file. +\itemize{ +\item \code{a} creates a new file or opens an existing one for read/write. +\item \code{r} opens an existing file for reading. +\item \verb{r+} opens an existing file for read/write. +\item \code{w} creates a file, truncating any existing ones. +\item \verb{w-}/\code{x} are synonyms, creating a file and failing if it already exists. +}} + +\item{...}{Extra arguments provided to the \verb{as_*} conversion function for the +object specified by \code{as}} +} +\value{ +The object specified by \code{as} +} +\description{ +Read data from a Zarr store +} +\examples{ +zarr_dir <- system.file("extdata", "example.zarr.zip", package = "anndataR") +td <- tempdir(check = TRUE) +unzip(zarr_dir, exdir = td) +zarr_store <- file.path(td, "example.zarr") + +# Read the Zarr as a SingleCellExperiment object +if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { + sce <- read_zarr(zarr_store, as = "SingleCellExperiment") +} + +# Read the Zarr as a Seurat object +if (requireNamespace("SeuratObject", quietly = TRUE)) { + seurat <- read_zarr(zarr_store, as = "Seurat") +} +} +\seealso{ +Other AnnData creators: +\code{\link{AnnData}()}, +\code{\link{as_AnnData}()}, +\code{\link{read_h5ad}()} +} +\concept{AnnData creators} diff --git a/man/reticulate-helpers.Rd b/man/reticulate-helpers.Rd index f0759403..17c5cd95 100644 --- a/man/reticulate-helpers.Rd +++ b/man/reticulate-helpers.Rd @@ -71,6 +71,7 @@ Other object converters: \code{\link{as_InMemoryAnnData}()}, \code{\link{as_ReticulateAnnData}()}, \code{\link{as_Seurat}()}, -\code{\link{as_SingleCellExperiment}()} +\code{\link{as_SingleCellExperiment}()}, +\code{\link{as_ZarrAnnData}()} } \concept{object converters} diff --git a/man/write_zarr.Rd b/man/write_zarr.Rd new file mode 100644 index 00000000..90e58f2d --- /dev/null +++ b/man/write_zarr.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_zarr.R +\name{write_zarr} +\alias{write_zarr} +\title{Write Zarr} +\usage{ +write_zarr( + object, + path, + compression = c("none", "gzip"), + mode = c("w-", "r", "r+", "a", "w", "x"), + ... +) +} +\arguments{ +\item{object}{The object to write, either a +\code{\link[SingleCellExperiment:SingleCellExperiment]{SingleCellExperiment::SingleCellExperiment}} or a +\code{\link[SeuratObject:Seurat-class]{SeuratObject::Seurat}} object} + +\item{path}{Path of the file to write to} + +\item{compression}{The compression algorithm to use when writing the Zarr +file. Can be one of \code{"none"} or \code{"gzip"}. Defaults to \code{"none"}.} + +\item{mode}{The mode to open the Zarr file. +\itemize{ +\item \code{a} creates a new file or opens an existing one for read/write +\item \verb{r+} opens an existing file for read/write +\item \code{w} creates a file, truncating any existing ones +\item \verb{w-}/\code{x} are synonyms creating a file and failing if it already exists +}} + +\item{...}{Additional arguments passed to \code{\link[=as_AnnData]{as_AnnData()}}} +} +\value{ +\code{path} invisibly +} +\description{ +Write an Zarr file +} +\details{ +\subsection{Compression}{ + +Compression is currently not supported for Boolean arrays, they will be +written uncompressed. +} + +\subsection{\code{NULL} values}{ + +For compatibility with changes in Python \strong{anndata} 0.12.0, \code{NULL} values +in \code{uns} are written to Zarr files as a \code{NULL} dataset (instead of not being +written at all). To disable this behaviour, set +\code{option(anndataR.write_null = FALSE)}. This may be required to allow the file +to be read by older versions of Python \strong{anndata}. +} +} +\examples{ +adata <- AnnData( + X = matrix(1:5, 3L, 5L), + layers = list( + A = matrix(5:1, 3L, 5L), + B = matrix(letters[1:5], 3L, 5L) + ), + obs = data.frame(row.names = LETTERS[1:3], cell = 1:3), + var = data.frame(row.names = letters[1:5], gene = 1:5) +) +zarr_store <- tempfile(fileext = ".zarr") +adata$write_zarr(zarr_store) + +# Write a SingleCellExperiment as an Zarr +if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { + ncells <- 100 + counts <- matrix(rpois(20000, 5), ncol = ncells) + logcounts <- log2(counts + 1) + + pca <- matrix(runif(ncells * 5), ncells) + tsne <- matrix(rnorm(ncells * 2), ncells) + + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = counts, logcounts = logcounts), + reducedDims = list(PCA = pca, tSNE = tsne) + ) + + adata <- as_AnnData(sce) + zarr_store <- tempfile(fileext = ".zarr") + adata$write_zarr(zarr_store) +} + +# Write a Seurat as a Zarr +if (requireNamespace("Seurat", quietly = TRUE)) { + library(Seurat) + + counts <- matrix(1:15, 5L, 3L) + dimnames(counts) <- list( + LETTERS[1:5], + letters[1:3] + ) + cell.metadata <- data.frame( + row.names = letters[1:3], + cell = 1:3 + ) + obj <- CreateSeuratObject(counts, meta.data = cell.metadata) + gene.metadata <- data.frame( + row.names = LETTERS[1:5], + gene = 1:5 + ) + obj[["RNA"]] <- AddMetaData(GetAssay(obj), gene.metadata) + + adata <- as_AnnData(obj) + zarr_store <- tempfile(fileext = ".zarr") + adata$write_zarr(zarr_store) +} +} diff --git a/tests/testthat/test-Zarr-read.R b/tests/testthat/test-Zarr-read.R new file mode 100644 index 00000000..6559269c --- /dev/null +++ b/tests/testthat/test-Zarr-read.R @@ -0,0 +1,141 @@ +skip_if_not_installed("Rarr") + +# zarr file +zarr_dir <- system.file("extdata", "example.zarr.zip", package = "anndataR") +td <- tempdir(check = TRUE) +unzip(zarr_dir, exdir = td) +store <- file.path(td, "example.zarr") + +test_that("reading encoding works", { + encoding <- read_zarr_encoding(store, "obs") + expect_equal(names(encoding), c("type", "version")) +}) + +test_that("reading dense matrices works", { + mat <- read_zarr_dense_array(store, "layers/dense_counts") + expect_true(is.matrix(mat)) + expect_type(mat, "integer") + expect_equal(dim(mat), c(50, 100)) + + mat <- read_zarr_dense_array(store, "layers/dense_X") + expect_true(is.matrix(mat)) + expect_type(mat, "double") + expect_equal(dim(mat), c(50, 100)) +}) + +test_that("reading sparse matrices works", { + mat <- read_zarr_sparse_array(store, "layers/csc_counts", type = "csc") + expect_s4_class(mat, "dgCMatrix") + expect_equal(dim(mat), c(50, 100)) + + mat <- read_zarr_sparse_array(store, "layers/counts", type = "csr") + expect_s4_class(mat, "dgRMatrix") + expect_equal(dim(mat), c(50, 100)) +}) + +# TODO: doesn't work anymore ? +test_that("reading recarrays works", { + skip("Skipping recarray test, not implemented in Rarr yet") + array_list <- read_zarr_rec_array( + store, + "uns/rank_genes_groups/logfoldchanges" + ) + expect_true(is.list(array_list)) + expect_equal(names(array_list), c("0", "1", "2", "3", "4", "5")) + for (array in array_list) { + expect_true(is.vector(array)) + expect_type(array, "double") + expect_equal(length(array), 100) + } +}) + +test_that("reading 1D numeric arrays works", { + array_1d <- read_zarr_dense_array(store, "obs/Int") + expect_equal(array_1d, array(0L:49L)) + + array_1d <- read_zarr_dense_array(store, "obs/Float") + expect_equal(array_1d, array(rep(42.42, 50))) +}) + +test_that("reading 1D sparse numeric arrays works", { + array_1d <- read_zarr_sparse_array(store, "uns/Sparse1D", type = "csc") + expect_s4_class(array_1d, "dgCMatrix") + expect_equal(dim(array_1d), c(1, 6)) +}) + +test_that("reading 1D nullable arrays works", { + array_1d <- read_zarr_nullable_integer(store, "obs/IntNA") + expect_vector(array_1d, ptype = integer(), size = 50) + expect_true(any(is.na(array_1d))) + + array_1d <- read_zarr_dense_array(store, "obs/FloatNA") + expected <- array(rep(42.42, 50)) + expected[1] <- NA + expect_equal(array_1d, expected) + + array_1d <- read_zarr_nullable_boolean(store, "obs/BoolNA") + expect_vector(array_1d, ptype = logical(), size = 50) + expect_true(any(is.na(array_1d))) +}) + +test_that("reading string scalars works", { + scalar <- read_zarr_string_scalar(store, "uns/StringScalar") + expect_equal(scalar, "A string") +}) + +test_that("reading numeric scalars works", { + scalar <- read_zarr_numeric_scalar(store, "uns/IntScalar") + expect_equal(scalar, 1) +}) + +test_that("reading string arrays works", { + array <- read_zarr_string_array(store, "uns/String") + expect_equal(array, array(paste0("String ", 0L:9L))) + + array <- read_zarr_string_array(store, "uns/String2D") + expect_true(is.matrix(array)) + expect_type(array, "character") + expect_equal(dim(array), c(5, 10)) +}) + +# TODO: can we get ordering info from attrs ? +test_that("reading mappings works", { + mapping <- read_zarr_mapping(store, "uns") + expect_type(mapping, "list") + expect_type(names(mapping), "character") +}) + +test_that("reading dataframes works", { + df <- read_zarr_data_frame(store, "obs", include_index = TRUE) + expect_s3_class(df, "data.frame") + expect_equal( + colnames(df), + c( + "Float", + "FloatNA", + "Int", + "IntNA", + "Bool", + "BoolNA", + "n_genes_by_counts", + "log1p_n_genes_by_counts", + "total_counts", + "log1p_total_counts", + "leiden" + ) + ) +}) + +test_that("reading Zarr as SingleCellExperiment works", { + skip_if_not_installed("SingleCellExperiment") + + sce <- read_zarr(store, as = "SingleCellExperiment") + expect_s4_class(sce, "SingleCellExperiment") +}) + +test_that("reading Zarr as Seurat works", { + skip_if_not_installed("SeuratObject") + + seurat <- suppressWarnings(read_zarr(store, as = "Seurat")) + expect_s4_class(seurat, "Seurat") +}) diff --git a/tests/testthat/test-Zarr-write.R b/tests/testthat/test-Zarr-write.R new file mode 100644 index 00000000..40473d42 --- /dev/null +++ b/tests/testthat/test-Zarr-write.R @@ -0,0 +1,253 @@ +skip_if_not_installed("Rarr") + +store <- tempfile(fileext = ".zarr") +if (dir.exists(store)) { + unlink(store, recursive = TRUE) +} + +create_zarr(store = store) + +test_that("Writing Zarr dense arrays works", { + array <- matrix(rnorm(20), nrow = 5, ncol = 4) + + expect_silent(write_zarr_element( + array, + store, + "dense_array", + compression = "none" + )) + expect_true(zarr_path_exists(store, "dense_array")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "dense_array")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "array") +}) + +test_that("Writing Zarr dense 3D arrays works", { + value <- array(rnorm(60), dim = c(5, 4, 3)) + + expect_silent( + write_zarr_element( + value, + store, + "dense_3d_array" + ) + ) + expect_true(zarr_path_exists(store, "dense_3d_array")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "dense_3d_array")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "array") +}) + +test_that("Writing Zarr sparse arrays works", { + array <- matrix(rnorm(20), nrow = 5, ncol = 4) + + csc_array <- as(array, "CsparseMatrix") + expect_silent(write_zarr_element( + csc_array, + store, + "csc_array", + compression = "none" + )) + expect_true(zarr_path_exists(store, "csc_array")) + expect_true(zarr_path_exists(store, "csc_array/data")) + expect_true(zarr_path_exists(store, "csc_array/indices")) + expect_true(zarr_path_exists(store, "csc_array/indptr")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "csc_array")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "csc_matrix") + + csr_array <- as(array, "RsparseMatrix") + expect_silent(write_zarr_element( + csr_array, + store, + "csr_array", + compression = "none" + )) + expect_true(zarr_path_exists(store, "csr_array")) + expect_true(zarr_path_exists(store, "csr_array/data")) + expect_true(zarr_path_exists(store, "csr_array/indices")) + expect_true(zarr_path_exists(store, "csr_array/indptr")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "csr_array")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "csr_matrix") +}) + +test_that("Writing dgeMatrix", { + value <- matrix(rnorm(20), nrow = 5, ncol = 4) |> + as("dMatrix") |> + as("generalMatrix") |> + as("unpackedMatrix") + + expect_silent( + write_zarr_element(value, store, "dgematrix") + ) + expect_true(zarr_path_exists(store, "dgematrix")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "dgematrix")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "array") +}) + +test_that("Writing Zarr nullable booleans works", { + nullable <- c(TRUE, TRUE, FALSE, FALSE, FALSE) + nullable[5] <- NA + + expect_silent(write_zarr_element(nullable, store, "nullable_bool")) + expect_true(zarr_path_exists(store, "nullable_bool")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "nullable_bool")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "nullable-boolean") +}) + +test_that("Writing Zarr nullable integers works", { + nullable <- as.integer(1:5) + nullable[5] <- NA + + expect_silent(write_zarr_element(nullable, store, "nullable_int")) + expect_true(zarr_path_exists(store, "nullable_int")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "nullable_int")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "nullable-integer") +}) + +test_that("Writing Zarr string arrays works", { + string <- LETTERS[1:5] + + write_zarr_element(string, store, "string_array") + expect_true(zarr_path_exists(store, "string_array")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "string_array")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "string-array") + + string2d <- matrix(LETTERS[1:20], nrow = 5, ncol = 4) + + expect_silent(write_zarr_element(string2d, store, "string_array2D")) + expect_true(zarr_path_exists(store, "string_array2D")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "string_array2D")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "string-array") +}) + +test_that("Writing Zarr categoricals works", { + categorical <- factor(LETTERS[1:5]) + + expect_no_error(write_zarr_element(categorical, store, "categorical")) + expect_true(zarr_path_exists(store, "categorical")) + expect_true(zarr_path_exists(store, "categorical/categories")) + expect_true(zarr_path_exists(store, "categorical/codes")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "categorical")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "categorical") +}) + +test_that("Writing Zarr string scalars works", { + string <- "A" + + expect_silent(write_zarr_element(string, store, "string_scalar")) + expect_true(zarr_path_exists(store, "string_scalar")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "string_scalar")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "string") +}) + +test_that("Writing Zarr numeric scalars works", { + number <- 1.0 + + expect_silent(write_zarr_element(number, store, "numeric_scalar")) + expect_true(zarr_path_exists(store, "numeric_scalar")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "numeric_scalar")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "numeric-scalar") +}) + +test_that("Writing Zarr mappings works", { + mapping <- list( + array = matrix(rnorm(20), nrow = 5, ncol = 4), + sparse = as(matrix(rnorm(20), nrow = 5, ncol = 4), "CsparseMatrix"), + string = LETTERS[1:5], + numeric = rnorm(5), + scalar = 2 + ) + + expect_silent(write_zarr_element( + mapping, + store, + "mapping", + compression = "none" + )) + expect_true(zarr_path_exists(store, "mapping")) + expect_true(zarr_path_exists(store, "mapping/array")) + expect_true(zarr_path_exists(store, "mapping/sparse")) + expect_true(zarr_path_exists(store, "mapping/sparse/data")) + expect_true(zarr_path_exists(store, "mapping/sparse/indices")) + expect_true(zarr_path_exists(store, "mapping/sparse/indptr")) + expect_true(zarr_path_exists(store, "mapping/string")) + expect_true(zarr_path_exists(store, "mapping/numeric")) + expect_true(zarr_path_exists(store, "mapping/scalar")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "mapping")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "dict") +}) + +test_that("Writing Zarr data frames works", { + df <- data.frame( + Letters = letters[1:5], + Numbers = 1:5 + ) + + expect_silent(write_zarr_element(df, store, "dataframe")) + expect_true(zarr_path_exists(store, "dataframe")) + expect_true(zarr_path_exists(store, "dataframe/Letters")) + expect_true(zarr_path_exists(store, "dataframe/Numbers")) + expect_true(zarr_path_exists(store, "dataframe/_index")) + attrs <- Rarr::read_zarr_attributes(file.path(store, "dataframe")) + expect_true(all(c("encoding-type", "encoding-version") %in% names(attrs))) + expect_equal(attrs[["encoding-type"]], "dataframe") + expect_true(all(c("_index", "column-order") %in% names(attrs))) + expect_equal(attrs[["_index"]], "_index") + expect_identical( + as.character(attrs[["column-order"]]), + c("Letters", "Numbers") + ) +}) + +test_that("writing Zarr from SingleCellExperiment works", { + skip_if_not_installed("SingleCellExperiment") + store <- tempfile(fileext = ".zarr") + sce <- generate_dataset(format = "SingleCellExperiment") + write_zarr(sce, store) + expect_true(dir.exists(store)) +}) + +test_that("writing Zarr from Seurat works", { + skip_if_not_installed("SeuratObject") + store <- tempfile(fileext = ".zarr") + sce <- generate_dataset(format = "Seurat") + write_zarr(sce, store) + expect_true(dir.exists(store)) +}) + +dir_size <- function(path) { + files <- list.files(path, recursive = TRUE, full.names = TRUE) + sum(file.info(files)$size, na.rm = TRUE) +} + +test_that("writing gzip compressed files works for Zarr", { + dummy <- generate_dataset(100, 200) + non_random_X <- matrix(5, 100, 200) # nolint + + adata <- AnnData( + X = non_random_X, + obs = dummy$obs, + var = dummy$var + ) + + store_none <- tempfile(fileext = ".zarr") + store_gzip <- tempfile(fileext = ".zarr") + + write_zarr(adata, store_none, compression = "none") + write_zarr(adata, store_gzip, compression = "gzip") + + expect_true(dir_size(store_none) > dir_size(store_gzip)) +}) + +# TODO: add other zipping schemes ? diff --git a/tests/testthat/test-ZarrAnnData.R b/tests/testthat/test-ZarrAnnData.R new file mode 100644 index 00000000..7d41d81b --- /dev/null +++ b/tests/testthat/test-ZarrAnnData.R @@ -0,0 +1,302 @@ +file <- system.file("extdata", "example.zarr.zip", package = "anndataR") +td <- tempdir(check = TRUE) +unzip(file, exdir = td) +store <- file.path(td, "example.zarr") + +test_that("opening Zarr works", { + adata <- ZarrAnnData$new(store, mode = "r") + expect_true(inherits(adata, "ZarrAnnData")) +}) + +adata <- ZarrAnnData$new(store, mode = "r") + +# GETTERS ---------------------------------------------------------------- +# trackstatus: class=ZarrAnnData, feature=test_get_X, status=done +test_that("reading X works", { + X <- adata$X + expect_s4_class(X, "dgRMatrix") + expect_equal(dim(X), c(50, 100)) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_layers, status=done +test_that("reading layers works", { + layers <- adata$layers + expect_true(is.list(layers), "list") + expect_equal( + names(layers), + c("counts", "csc_counts", "dense_X", "dense_counts") + ) +}) + +test_that("reading obsm works", { + obsm <- adata$obsm + expect_true(is.list(obsm), "list") + expect_equal( + names(obsm), + c("X_pca", "X_umap") + ) +}) + +test_that("reading varm works", { + varm <- adata$varm + expect_true(is.list(varm), "list") + expect_equal( + names(varm), + c("PCs") + ) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_obsp, status=done +test_that("reading obsp works", { + obsp <- adata$obsp + expect_true(is.list(obsp), "list") + expect_equal( + names(obsp), + c("connectivities", "distances") + ) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_varp, status=done +test_that("reading varp works", { + varp <- adata$varp + expect_true(is.list(varp), "list") + expect_equal( + names(varp), + c("test_varp") + ) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_obs, status=done +test_that("reading obs works", { + obs <- adata$obs + expect_s3_class(obs, "data.frame") + expect_equal( + colnames(obs), + c( + "Float", + "FloatNA", + "Int", + "IntNA", + "Bool", + "BoolNA", + "n_genes_by_counts", + "log1p_n_genes_by_counts", + "total_counts", + "log1p_total_counts", + "leiden" + ) + ) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_var, status=done +test_that("reading var works", { + var <- adata$var + expect_s3_class(var, "data.frame") + expect_equal( + colnames(var), + c( + "String", + "n_cells_by_counts", + "mean_counts", + "log1p_mean_counts", + "pct_dropout_by_counts", + "total_counts", + "log1p_total_counts", + "highly_variable", + "means", + "dispersions", + "dispersions_norm" + ) + ) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_obs_names, status=done +test_that("reading obs names works", { + obs_names <- adata$obs_names + expect_vector(obs_names, ptype = character(), size = 50) +}) + +# trackstatus: class=ZarrAnnData, feature=test_get_var_names, status=done +test_that("reading var names works", { + var_names <- adata$var_names + expect_vector(var_names, ptype = character(), size = 100) +}) + +# SETTERS ---------------------------------------------------------------- +test_that("creating empty Zarr works", { + skip("for now, empty zarr dataframes cannot be written.") + empty_store <- tempfile(fileext = ".zarr") + expect_silent(ZarrAnnData$new(empty_store)) + unlink(empty_store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_X, status=done +test_that("writing X works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + X <- matrix(rnorm(10 * 20), nrow = 10, ncol = 20) + expect_silent(zarr$X <- X) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_layers, status=done +test_that("writing layers works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + X <- matrix(rnorm(10 * 20), nrow = 10, ncol = 20) + expect_silent(zarr$layers <- list(layer1 = X, layer2 = X)) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_obs, status=done +test_that("writing obs works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + obs <- data.frame( + Letters = LETTERS[1:10], + Numbers = 1:10, + row.names = paste0("Row", 1:10) + ) + zarr$obs <- obs + expect_identical(zarr$obs_names, paste0("Row", 1:10)) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_var, status=done +test_that("writing var works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + var <- data.frame( + Letters = LETTERS[1:20], + Numbers = 1:20, + row.names = paste0("Row", 1:20) + ) + zarr$var <- var + expect_identical(zarr$var_names, paste0("Row", 1:20)) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_obs_names, status=done +test_that("writing obs names works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + zarr$obs_names <- LETTERS[1:10] + expect_identical(zarr$obs_names, LETTERS[1:10]) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_var_names, status=done +test_that("writing var names works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + zarr$var_names <- LETTERS[1:20] + expect_identical(zarr$var_names, LETTERS[1:20]) + unlink(store, recursive = TRUE) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_obsm, status=done +test_that("writing obsm works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + obsm_x <- matrix(rnorm(10 * 5), nrow = 10, ncol = 5) + zarr$obsm <- list(X = obsm_x) + # obsm should now have rownames added on-the-fly + expected_obsm_x <- obsm_x + rownames(expected_obsm_x) <- zarr$obs_names + expect_identical(zarr$obsm$X, expected_obsm_x) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_varm, status=done +test_that("writing varm works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + varm_x <- matrix(rnorm(20 * 5), nrow = 20, ncol = 5) + zarr$varm <- list(PCs = varm_x) + # varm should now have rownames added on-the-fly + expected_varm_x <- varm_x + rownames(expected_varm_x) <- zarr$var_names + expect_identical(zarr$varm$PCs, expected_varm_x) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_obsp, status=done +test_that("writing obsp works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + + obsp_x <- matrix(rnorm(10 * 10), nrow = 10, ncol = 10) + zarr$obsp <- list(connectivities = obsp_x) + # obsp should now have dimnames added on-the-fly + expected_obsp_x <- obsp_x + dimnames(expected_obsp_x) <- list(zarr$obs_names, zarr$obs_names) + expect_identical(zarr$obsp$connectivities, expected_obsp_x) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_varp, status=done +test_that("writing varp works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + varp_x <- matrix(rnorm(20 * 20), nrow = 20, ncol = 20) + zarr$varp <- list(connectivities = varp_x) + # varp should now have dimnames added on-the-fly + expected_varp_x <- varp_x + dimnames(expected_varp_x) <- list(zarr$var_names, zarr$var_names) + expect_identical(zarr$varp$connectivities, expected_varp_x) +}) + +# trackstatus: class=ZarrAnnData, feature=test_set_uns, status=done +test_that("writing uns works", { + store <- tempfile(fileext = ".zarr") + create_zarr(store = store) + obs <- data.frame(row.names = 1:10) + var <- data.frame(row.names = 1:20) + zarr <- ZarrAnnData$new(store, obs = obs, var = var) + zarr$uns <- list( + foo = "bar", + baz = c(1, 2, 3), + nested = list( + nested_foo = "nested_bar", + nested_baz = c(4L, 5L, 6L) + ) + ) + expect_identical(zarr$uns$foo, "bar") + expect_equal(zarr$uns$baz, c(1, 2, 3), ignore_attr = TRUE) + expect_identical(zarr$uns$nested$nested_foo, "nested_bar") + expect_equal(zarr$uns$nested$nested_baz, c(4L, 5L, 6L), ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-h5ad-zarr.R b/tests/testthat/test-h5ad-zarr.R new file mode 100644 index 00000000..b848051e --- /dev/null +++ b/tests/testthat/test-h5ad-zarr.R @@ -0,0 +1,125 @@ +skip_if_not_installed("rhdf5") +skip_if_not_installed("Rarr") + +# h5ad file +filename <- system.file("extdata", "example.h5ad", package = "anndataR") +file <- rhdf5::H5Fopen(filename, flags = "H5F_ACC_RDONLY", native = FALSE) + +# zarr file +zarr_dir <- system.file("extdata", "example.zarr.zip", package = "anndataR") +td <- tempdir(check = TRUE) +unzip(zarr_dir, exdir = td) +store <- file.path(td, "example.zarr") + +test_that("reading dense matrices is same for h5ad and zarr", { + mat_h5ad <- read_h5ad_dense_array(file, "layers/dense_counts") + mat_zarr <- read_zarr_dense_array(store, "layers/dense_counts") + expect_equal(mat_h5ad, mat_zarr) + + mat_h5ad <- read_h5ad_dense_array(file, "layers/dense_X") + mat_zarr <- read_zarr_dense_array(store, "layers/dense_X") + expect_equal(mat_h5ad, mat_zarr) +}) + +test_that("reading sparse matrices is same for h5ad and zarr", { + mat_h5ad <- read_h5ad_sparse_array(file, "layers/csc_counts", type = "csc") + mat_zarr <- read_zarr_sparse_array(store, "layers/csc_counts", type = "csc") + expect_equal(mat_h5ad, mat_zarr) + + mat_h5ad <- read_h5ad_sparse_array(file, "layers/counts", type = "csr") + mat_zarr <- read_zarr_sparse_array(store, "layers/counts", type = "csr") + expect_equal(mat_h5ad, mat_zarr) +}) + +test_that("reading recarrays works", { + skip("read_zarr_rec_array is not implemented yet") + array_list <- read_zarr_rec_array( + file, + "uns/rank_genes_groups/logfoldchanges" + ) + expect_true(is.list(array_list)) + expect_equal(names(array_list), c("0", "1", "2", "3", "4", "5")) + for (array in array_list) { + expect_true(is.array(array)) + expect_type(array, "double") + expect_equal(dim(array), 100) + } +}) + +test_that("reading 1D numeric arrays is same for h5ad and zarr", { + array_1d_h5ad <- read_h5ad_dense_array(file, "obs/Int") + array_1d_zarr <- read_zarr_dense_array(store, "obs/Int") + expect_equal(array_1d_h5ad, array_1d_zarr) + + array_1d_h5ad <- read_h5ad_dense_array(file, "obs/Float") + array_1d_zarr <- read_zarr_dense_array(store, "obs/Float") + expect_equal(array_1d_h5ad, array_1d_zarr) +}) + +test_that("reading 1D sparse numeric arrays is same for h5ad and zarr", { + array_1d_h5ad <- read_h5ad_sparse_array(file, "uns/Sparse1D", type = "csc") + array_1d_zarr <- read_zarr_sparse_array(store, "uns/Sparse1D", type = "csc") + expect_equal(array_1d_h5ad, array_1d_zarr) +}) + +test_that("reading 1D nullable arrays is same for h5ad and zarr", { + array_1d_h5ad <- read_h5ad_nullable_integer(file, "obs/IntNA") + array_1d_zarr <- read_zarr_nullable_integer(store, "obs/IntNA") + expect_equal(array_1d_h5ad, array_1d_zarr) + + array_1d_h5ad <- read_h5ad_dense_array(file, "obs/FloatNA") + array_1d_zarr <- read_zarr_dense_array(store, "obs/FloatNA") + expect_equal(array_1d_h5ad, array_1d_zarr) + + # TODO: check this test, zarr Bools are stored as dense array hence no mask is given + array_1d_h5ad <- read_h5ad_nullable_boolean(file, "obs/Bool") + array_1d_zarr <- read_zarr_nullable_boolean(store, "obs/Bool") + expect_equal(array_1d_h5ad, array_1d_zarr) + + array_1d_h5ad <- read_h5ad_nullable_boolean(file, "obs/BoolNA") + array_1d_zarr <- read_zarr_nullable_boolean(store, "obs/BoolNA") + expect_equal(array_1d_h5ad, array_1d_zarr) +}) + +test_that("reading string scalars is same for h5ad and zarr", { + scalar_h5ad <- read_h5ad_string_scalar(file, "uns/StringScalar") + scalar_zarr <- read_zarr_string_scalar(store, "uns/StringScalar") + expect_equal(scalar_h5ad, scalar_zarr) +}) + +test_that("reading numeric scalars is same for h5ad and zarr", { + scalar_h5ad <- read_h5ad_numeric_scalar(file, "uns/IntScalar") + scalar_zarr <- read_zarr_numeric_scalar(store, "uns/IntScalar") + expect_equal(scalar_h5ad, scalar_zarr) +}) + +test_that("reading string arrays is same for h5ad and zarr", { + array_h5ad <- read_h5ad_string_array(file, "uns/String") + array_zarr <- read_zarr_string_array(store, "uns/String") + expect_equal(array_h5ad, array_zarr) + + array_h5ad <- read_h5ad_string_array(file, "uns/String2D") + array_zarr <- read_zarr_string_array(store, "uns/String2D") + expect_equal(array_h5ad, array_zarr) +}) + +test_that("reading mappings is same for h5ad and zarr", { + skip("for now, example.zarr and example.h5ad are not identical!") + mapping_h5ad <- read_h5ad_mapping(file, "uns") + mapping_zarr <- read_zarr_mapping(store, "uns") + expect_equal(mapping_h5ad, mapping_zarr) +}) + +test_that("reading dataframes works", { + df_h5ad <- read_h5ad_data_frame(file, "obs") + df_zarr <- read_zarr_data_frame(store, "obs", include_index = TRUE) + expect_equal(df_h5ad, df_zarr) +}) + +test_that("reading H5AD as SingleCellExperiment is same for h5ad and zarr", { + skip("for now, example.zarr and example.h5ad are not identical!") + skip_if_not_installed("SingleCellExperiment") + sce_h5ad <- read_h5ad(file, as = "SingleCellExperiment") + sce_zarr <- read_zarr(store, as = "SingleCellExperiment") + expect_equal(sce_h5ad, sce_zarr) +}) diff --git a/vignettes/articles/development_status.Rmd b/vignettes/articles/development_status.Rmd index d60d9870..8e86fd87 100644 --- a/vignettes/articles/development_status.Rmd +++ b/vignettes/articles/development_status.Rmd @@ -67,7 +67,7 @@ status_lines_proc <- status_lines |> # combine with missing fields status_lines_required <- crossing( - class = c("InMemoryAnnData", "HDF5AnnData", "Seurat", "SingleCellExperiment"), + class = c("InMemoryAnnData", "HDF5AnnData", "ZarrAnnData", "Seurat", "SingleCellExperiment"), prefix = c("get_", "test_get_", "set_", "test_set_"), slot = c( "X", diff --git a/vignettes/articles/software_design.Rmd b/vignettes/articles/software_design.Rmd index 1492ab9c..6e0d319d 100644 --- a/vignettes/articles/software_design.Rmd +++ b/vignettes/articles/software_design.Rmd @@ -50,7 +50,7 @@ The different `AnnData` classes provide a consistent interface but store and acc It is primarily designed as an intermediate object when reading/writing H5AD files but can be useful for accessing parts of large files. - The `ReticulateAnnData` access data stored in an `AnnData` object in a concurrent Python session. This comes with the overhead and complexity of using `r CRANpkg("reticulate")` but is sometimes useful to access functionality that has not yet been implemented in `r Biocpkg("anndataR")`. -- The planned `ZarrAnnData` will provide an interface to an `AnnData` Zarr store, similar to `HDF5AnnData`. +- The `ZarrAnnData` provides an interface to a Zarr data (i.e. Zarr store) and, similar to a H5AD file, minimal data is stored in memory until it is requested by the user. It is as an intermediate object when reading/writing Zarr store but can be useful for accessing parts of large files. - An `AnnDataView` is returned when subsetting an `AnnData` object and provides access to a subset of the data in the referenced object. Some functionality (such as setting slots) requires converting to one of the full classes.