diff --git a/R/preprocessing.R b/R/preprocessing.R index 159889081..ee35cb086 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -642,6 +642,139 @@ Load10X_Spatial <- function ( return(object) } +#' Add 10X Cell Types to a Seurat Object +#' +#' This function reads cell type annotations from a CSV file and adds them to the metadata of a Seurat object. +#' If the cell type file does not exist, the original Seurat object is returned unchanged. +#' +#' @param data.dir A string specifying the directory containing the "cell_types" folder with the "cell_types.csv" file. +#' @param object A Seurat object to which the cell type annotations will be added. +#' +#' @return A Seurat object with updated metadata including cell type annotations if the file is found. +#' +#' @details +#' The function searches for a CSV file named "cell_types.csv" in the "cell_types" subdirectory within `data.dir`. +#' The CSV file should contain at least a "barcode" column that matches the cell barcodes in the Seurat object. +#' Additional columns in the CSV file will be merged into the Seurat object's metadata. +#' +#' @importFrom utils read.csv +#' @importFrom tibble rownames_to_column column_to_rownames +#' +#' @examples +#' \dontrun{ +#' # Specify the data directory containing the "cell_types" folder +#' data.dir <- "/path/to/data" +#' +#' # Create a Seurat object (example) +#' seurat_obj <- CreateSeuratObject(counts = some_counts_matrix) +#' +#' # Add cell type annotations to the Seurat object +#' seurat_obj <- Add_10X_CellTypes(data.dir, seurat_obj) +#' } + +Add_10X_CellTypes <- function(data.dir, object) { + cell_types_path <- file.path(data.dir, "cell_types", "cell_types.csv") + if (file.exists(cell_types_path)) { + cell.types <- read.csv(cell_types_path) + meta_data_with_barcodes <- tibble::rownames_to_column(object@meta.data, "barcode") + merged_meta_data <- merge( + x = meta_data_with_barcodes, + y = cell.types, + by = "barcode", + all.x = TRUE + ) + object@meta.data <- tibble::column_to_rownames(merged_meta_data, "barcode") + return(object) + } else { + return(object) + } +} + +#' Load a 10x Genomics Single Cell Experiment into a \code{Seurat} object +#' +#' @inheritParams Read10X +#' @inheritParams SeuratObject::CreateSeuratObject +#' @note If multiome 10x data the assay param will not be used. The names of each assay contained in the matrix are used. +#' @param data.dir Directory containing the H5 file specified by \code{filename} +#' @param filename Name of H5 file containing the feature barcode matrix +#' @param to.upper Converts all feature names to upper case. This can provide an +#' approximate conversion of mouse to human gene names which can be useful in an +#' explorative analysis. For cross-species comparisons, orthologous genes should +#' be identified across species and used instead. +#' @param ... Arguments passed to \code{\link{Read10X_h5}} +#' +#' @return A \code{Seurat} object +#' +#' +#' @export +#' @concept preprocessing +#' +#' @examples +#' \dontrun{ +#' data_dir <- 'path/to/data/directory' +#' list.files(data_dir) # Should show filtered_feature_bc_matrix.h5 +#' Load10X(data.dir = data_dir) +#' } +#' +Load10X <- function(data.dir, filename = "filtered_feature_bc_matrix.h5", + assay = "RNA", to.upper = FALSE, ...) { + + if (length(data.dir) > 1) { + stop("`data.dir` expects a single directory path but received multiple values.") + } + if (!file.exists(data.dir)) { + stop("No such file or directory: '", data.dir, "'") + } + + + filename <- list.files(data.dir, filename, full.names = FALSE, recursive = FALSE) + counts.path <- file.path(data.dir, filename) + if (!file.exists(counts.path)) { + stop("File not found: '", counts.path, "'") + } + + counts <- Read10X_h5(counts.path, ...) + + if (is.list(counts)) { + counts <- lapply(counts, function(mat) { + rownames(mat) <- toupper(rownames(mat)) + mat + }) + } else { + rownames(counts) <- toupper(rownames(counts)) + } + + if (is.list(counts)) { + seurat.list <- lapply(names(counts), function(name) { + CreateSeuratObject( + counts = counts[[name]], + assay = name, + project = name + ) + }) + + for (i in seq_along(seurat.list)) { + if (Assays(seurat.list[[i]]) %in% c("Gene Expression", "RNA")) { + seurat.list[[i]] <- Add_10X_CellTypes(data.dir, seurat.list[[i]]) + } + } + + merged.object <- merge( + x = seurat.list[[1]], + y = seurat.list[-1], + add.cell.ids = names(counts), + merge.data = FALSE + ) + return(merged.object) + + } else { + object <- CreateSeuratObject(counts, assay = assay) + if (Assays(object) %in% c("Gene Expression", "RNA")) { + object <- Add_10X_CellTypes(data.dir, object) + } + return(object) + } +} #' Read10x Probe Metadata @@ -1263,8 +1396,8 @@ Read10X_Image <- function( image = image ) - # As of v5.1.0 `Radius.VisiumV1` no longer returns the value of the - # `spot.radius` slot and instead calculates the value on the fly, but we + # As of v5.1.0 `Radius.VisiumV1` no longer returns the value of the + # `spot.radius` slot and instead calculates the value on the fly, but we # can populate the static slot in case it's depended on. visium.v1@spot.radius <- Radius(visium.v1) @@ -3520,7 +3653,7 @@ SampleUMI <- function( #' replaces the \code{NormalizeData} → \code{FindVariableFeatures} → #' \code{ScaleData} workflow by fitting a regularized negative binomial model #' per gene and returning: -#' +#' #' - A new assay (default name “SCT”), in which: #' - \code{counts}: depth‐corrected UMI counts (as if each cell had uniform #' sequencing depth; controlled by \code{do.correct.umi}). @@ -3531,13 +3664,13 @@ SampleUMI <- function( #' #' When multiple \code{counts} layers exist (e.g. after \code{split()}), #' each layer is modeled independently. A consensus variable‐feature set is -#' then defined by ranking features by how often they’re called “variable” +#' then defined by ranking features by how often they’re called “variable” #' across different layers (ties broken by median rank). -#' +#' #' By default, \code{sctransform::vst} will drop features expressed in fewer #' than five cells. In the multi-layer case, this can lead to consenus #' variable-features being excluded from the output's \code{scale.data} when -#' a feature is "variable" across many layers but sparsely expressed in at +#' a feature is "variable" across many layers but sparsely expressed in at #' least one. #' #' @param object A Seurat object or UMI count matrix. @@ -3593,11 +3726,11 @@ SampleUMI <- function( #' @seealso \code{\link[sctransform]{vst}}, #' \code{\link[sctransform]{get_residuals}}, #' \code{\link[sctransform]{correct_counts}} -#' +#' #' @rdname SCTransform #' @concept preprocessing #' @export -#' +#' SCTransform.default <- function( object, cell.attr, @@ -4557,7 +4690,7 @@ FindSpatiallyVariableFeatures.Seurat <- function( verbose = verbose, ... ) - + object <- LogSeuratCommand(object) return(object)