Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 142 additions & 9 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,139 @@ Load10X_Spatial <- function (

return(object)
}
#' Add 10X Cell Types to a Seurat Object
#'
#' This function reads cell type annotations from a CSV file and adds them to the metadata of a Seurat object.
#' If the cell type file does not exist, the original Seurat object is returned unchanged.
#'
#' @param data.dir A string specifying the directory containing the "cell_types" folder with the "cell_types.csv" file.
#' @param object A Seurat object to which the cell type annotations will be added.
#'
#' @return A Seurat object with updated metadata including cell type annotations if the file is found.
#'
#' @details
#' The function searches for a CSV file named "cell_types.csv" in the "cell_types" subdirectory within `data.dir`.
#' The CSV file should contain at least a "barcode" column that matches the cell barcodes in the Seurat object.
#' Additional columns in the CSV file will be merged into the Seurat object's metadata.
#'
#' @importFrom utils read.csv
#' @importFrom tibble rownames_to_column column_to_rownames
#'
#' @examples
#' \dontrun{
#' # Specify the data directory containing the "cell_types" folder
#' data.dir <- "/path/to/data"
#'
#' # Create a Seurat object (example)
#' seurat_obj <- CreateSeuratObject(counts = some_counts_matrix)
#'
#' # Add cell type annotations to the Seurat object
#' seurat_obj <- Add_10X_CellTypes(data.dir, seurat_obj)
#' }

Add_10X_CellTypes <- function(data.dir, object) {
cell_types_path <- file.path(data.dir, "cell_types", "cell_types.csv")
if (file.exists(cell_types_path)) {
cell.types <- read.csv(cell_types_path)
meta_data_with_barcodes <- tibble::rownames_to_column([email protected], "barcode")
merged_meta_data <- merge(
x = meta_data_with_barcodes,
y = cell.types,
by = "barcode",
all.x = TRUE
)
[email protected] <- tibble::column_to_rownames(merged_meta_data, "barcode")
return(object)
} else {
return(object)
}
}

#' Load a 10x Genomics Single Cell Experiment into a \code{Seurat} object
#'
#' @inheritParams Read10X
#' @inheritParams SeuratObject::CreateSeuratObject
#' @note If multiome 10x data the assay param will not be used. The names of each assay contained in the matrix are used.
#' @param data.dir Directory containing the H5 file specified by \code{filename}
#' @param filename Name of H5 file containing the feature barcode matrix
#' @param to.upper Converts all feature names to upper case. This can provide an
#' approximate conversion of mouse to human gene names which can be useful in an
#' explorative analysis. For cross-species comparisons, orthologous genes should
#' be identified across species and used instead.
#' @param ... Arguments passed to \code{\link{Read10X_h5}}
#'
#' @return A \code{Seurat} object
#'
#'
#' @export
#' @concept preprocessing
#'
#' @examples
#' \dontrun{
#' data_dir <- 'path/to/data/directory'
#' list.files(data_dir) # Should show filtered_feature_bc_matrix.h5
#' Load10X(data.dir = data_dir)
#' }
#'
Load10X <- function(data.dir, filename = "filtered_feature_bc_matrix.h5",
assay = "RNA", to.upper = FALSE, ...) {

if (length(data.dir) > 1) {
stop("`data.dir` expects a single directory path but received multiple values.")
}
if (!file.exists(data.dir)) {
stop("No such file or directory: '", data.dir, "'")
}


filename <- list.files(data.dir, filename, full.names = FALSE, recursive = FALSE)
counts.path <- file.path(data.dir, filename)
if (!file.exists(counts.path)) {
stop("File not found: '", counts.path, "'")
}

counts <- Read10X_h5(counts.path, ...)

if (is.list(counts)) {
counts <- lapply(counts, function(mat) {
rownames(mat) <- toupper(rownames(mat))
mat
})
} else {
rownames(counts) <- toupper(rownames(counts))
}

if (is.list(counts)) {
seurat.list <- lapply(names(counts), function(name) {
CreateSeuratObject(
counts = counts[[name]],
assay = name,
project = name
)
})

for (i in seq_along(seurat.list)) {
if (Assays(seurat.list[[i]]) %in% c("Gene Expression", "RNA")) {
seurat.list[[i]] <- Add_10X_CellTypes(data.dir, seurat.list[[i]])
}
}

merged.object <- merge(
x = seurat.list[[1]],
y = seurat.list[-1],
add.cell.ids = names(counts),
merge.data = FALSE
)
return(merged.object)

} else {
object <- CreateSeuratObject(counts, assay = assay)
if (Assays(object) %in% c("Gene Expression", "RNA")) {
object <- Add_10X_CellTypes(data.dir, object)
}
return(object)
}
}


#' Read10x Probe Metadata
Expand Down Expand Up @@ -1263,8 +1396,8 @@ Read10X_Image <- function(
image = image
)

# As of v5.1.0 `Radius.VisiumV1` no longer returns the value of the
# `spot.radius` slot and instead calculates the value on the fly, but we
# As of v5.1.0 `Radius.VisiumV1` no longer returns the value of the
# `spot.radius` slot and instead calculates the value on the fly, but we
# can populate the static slot in case it's depended on.
[email protected] <- Radius(visium.v1)

Expand Down Expand Up @@ -3520,7 +3653,7 @@ SampleUMI <- function(
#' replaces the \code{NormalizeData} → \code{FindVariableFeatures} →
#' \code{ScaleData} workflow by fitting a regularized negative binomial model
#' per gene and returning:
#'
#'
#' - A new assay (default name “SCT”), in which:
#' - \code{counts}: depth‐corrected UMI counts (as if each cell had uniform
#' sequencing depth; controlled by \code{do.correct.umi}).
Expand All @@ -3531,13 +3664,13 @@ SampleUMI <- function(
#'
#' When multiple \code{counts} layers exist (e.g. after \code{split()}),
#' each layer is modeled independently. A consensus variable‐feature set is
#' then defined by ranking features by how often they’re called “variable”
#' then defined by ranking features by how often they’re called “variable”
#' across different layers (ties broken by median rank).
#'
#'
#' By default, \code{sctransform::vst} will drop features expressed in fewer
#' than five cells. In the multi-layer case, this can lead to consenus
#' variable-features being excluded from the output's \code{scale.data} when
#' a feature is "variable" across many layers but sparsely expressed in at
#' a feature is "variable" across many layers but sparsely expressed in at
#' least one.
#'
#' @param object A Seurat object or UMI count matrix.
Expand Down Expand Up @@ -3593,11 +3726,11 @@ SampleUMI <- function(
#' @seealso \code{\link[sctransform]{vst}},
#' \code{\link[sctransform]{get_residuals}},
#' \code{\link[sctransform]{correct_counts}}
#'
#'
#' @rdname SCTransform
#' @concept preprocessing
#' @export
#'
#'
SCTransform.default <- function(
object,
cell.attr,
Expand Down Expand Up @@ -4557,7 +4690,7 @@ FindSpatiallyVariableFeatures.Seurat <- function(
verbose = verbose,
...
)

object <- LogSeuratCommand(object)

return(object)
Expand Down