Skip to content
Open
18 changes: 18 additions & 0 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -3120,6 +3120,15 @@ RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE) {
if (verbose) {
cat("Performing relative-counts-normalization\n", file = stderr())
}

#setting scale.factor to be the median of counts across all columns if scale.factor is the string "median"
if (is.character(scale.factor) && scale.factor == "median") {
if(verbose){
cat("Calculating median scale factor\n", file = stderr())
}
scale.factor <- median(Matrix::colSums(data))
}

norm.data <- data
norm.data@x <- norm.data@x / rep.int(Matrix::colSums(norm.data), diff(norm.data@p)) * scale.factor
return(norm.data)
Expand Down Expand Up @@ -4336,6 +4345,15 @@ LogNormalize.V3Matrix <- function(
if (verbose) {
cat("Performing log-normalization\n", file = stderr())
}

#setting scale.factor to be the median of counts across all columns if scale.factor is the string "median"
if (is.character(scale.factor) && scale.factor == "median") {
if(verbose){
cat("Calculating median scale factor\n", file = stderr())
}
scale.factor <- median(Matrix::colSums(data))
}

norm.data <- LogNorm(data, scale_factor = scale.factor, display_progress = verbose)
colnames(x = norm.data) <- colnames(x = data)
rownames(x = norm.data) <- rownames(x = data)
Expand Down
50 changes: 50 additions & 0 deletions R/preprocessing5.R
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,20 @@ LogNormalize.default <- function(
if (isTRUE(x = verbose)) {
pb <- txtProgressBar(file = stderr(), style = 3)
}

#setting scale.factor to be the median of counts across all columns if scale.factor is the string "median"
if (is.character(scale.factor) && scale.factor == "median") {
if(verbose){
cat("Calculating median scale factor\n", file = stderr())
}
sums <- if (margin == 1L) {
rowSums(data) # Sum of each row (gene) if margin is 1L
} else {
colSums(data) # Sum of each column (cell) if margin is 2L
}
scale.factor = median(sums)
}

for (i in seq_len(length.out = ncells)) {
x <- if (margin == 1L) {
data[i, ]
Expand Down Expand Up @@ -288,6 +302,15 @@ LogNormalize.IterableMatrix <- function(
verbose = TRUE,
...
) {

#setting scale.factor to be the median of counts across all columns if scale.factor is the string "median"
if (is.character(scale.factor) && scale.factor == "median") {
if(verbose){
cat("Calculating median scale factor\n", file = stderr())
}
scale.factor <- median(colSums(data))
}

data <- BPCells::t(BPCells::t(data) / colSums(data))
# Log normalization
data <- log1p(data * scale.factor)
Expand Down Expand Up @@ -860,6 +883,33 @@ DISP <- function(
p <- p + 1L
}
np <- length(x = p) - 1L

#adding a progress bar for median calculation is verbose is TRUE
if (is.character(scale.factor) && scale.factor == "median" && isTRUE(x = verbose)) {
cat("Calculating median scale factor\n", file = stderr())
pb_median <- txtProgressBar(style = 3L, file = stderr())
}

#setting scale.factor to be the median of counts across all columns if scale.factor is the string "median"
if (is.character(scale.factor) && scale.factor == "median") {
col_sums <- numeric(np)
for (i in seq_len(length.out = np)) {
idx <- seq.int(from = p[i], to = p[i + 1] - 1L)
xidx <- slot(object = data, name = entryname)[idx]
col_sums[i] <- sum(xidx)

if (isTRUE(x = verbose)) {
setTxtProgressBar(pb_median, value = i / np)
}
}

if (isTRUE(x = verbose)) {
close(pb_median)
}

scale.factor <- median(col_sums)
}

if (isTRUE(x = verbose)) {
pb <- txtProgressBar(style = 3L, file = stderr())
}
Expand Down
53 changes: 53 additions & 0 deletions tests/testthat/test_preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ test_that("Relative count normalization returns expected values", {
expect_equal(rc.counts[2, 1], 14285.71, tolerance = 1e-6)
})

denseMatrix <- as.matrix(pbmc.test) # Matrix to test LogNormalize.V3Matrix and RelativeCounts methods
test_that("LogNormalize.V3Matrix computes median scale factor correctly", {
expectedMedian <- median(colSums(denseMatrix))
resultFromExpectedMedian <- LogNormalize.V3Matrix(data = denseMatrix, scale.factor = expectedMedian, margin = 2L, verbose = FALSE)
resultFromScaleFactorSetToMedian <- LogNormalize.V3Matrix(data = denseMatrix, scale.factor = "median", margin = 2L, verbose = FALSE)
expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6)
})

test_that("RelativeCounts computes median scale factor correctly", {
expectedMedian <- median(colSums(denseMatrix))
resultFromExpectedMedian <- RelativeCounts(data = denseMatrix, scale.factor = expectedMedian, verbose = FALSE)
resultFromScaleFactorSetToMedian <- RelativeCounts(data = denseMatrix, scale.factor = "median", verbose = FALSE)
expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6)
})

# Tests for v5 NormalizeData
# --------------------------------------------------------------------------------
context("v5 NormalizeData")
Expand Down Expand Up @@ -175,6 +190,44 @@ test_that("LogNormalize normalizes properly for BPCells", {
)
})

test_that("LogNormalize.IterableMatrix computes median scale factor correctly", {
skip_on_cran()
library(Matrix)
skip_if_not_installed("BPCells")
library(BPCells)
mat_bpcells <- t(as(t(object[['RNA']]$counts ), "IterableMatrix"))
expectedMedian <- median(colSums(mat_bpcells))
resultFromExpectedMedian <- LogNormalize.IterableMatrix(data = mat_bpcells, scale.factor = expectedMedian, margin = 2L, verbose = FALSE)
resultFromScaleFactorSetToMedian <- LogNormalize.IterableMatrix(data = mat_bpcells, scale.factor = "median", margin = 2L, verbose = FALSE)
expect_equal(as.matrix(resultFromExpectedMedian), as.matrix(resultFromScaleFactorSetToMedian), tolerance = 1e-6)
})

denseMatrix <- as.matrix(pbmc.test) # Matrix to test LogNormalize.default when scale.factor is set to "median"
test_that("LogNormalize.default computes median scale factor correctly for both margin values", {
expectedMedianForMargin1L <- median(rowSums(denseMatrix))
expectedMedianForMargin2L <- median(colSums(denseMatrix))

resultFromExpectedMedianForMargin1L <- LogNormalize.default(data = denseMatrix, scale.factor = expectedMedianForMargin1L, margin = 1L, verbose = FALSE)
resultFromExpectedMedianForMargin2L <- LogNormalize.default(data = denseMatrix, scale.factor = expectedMedianForMargin2L, margin = 2L, verbose = FALSE)

resultsFromScaleFactorSetToMedianForMargin1L <- LogNormalize.default(data = denseMatrix, scale.factor = "median", margin = 1L, verbose = FALSE)#if the normalization is across rows (genes)
resultsFromScaleFactorSetToMedianForMargin2L <- LogNormalize.default(data = denseMatrix, scale.factor = "median", margin = 2L, verbose = FALSE)#if the normalization is across columns (cells)

expect_equal(as.matrix(resultFromExpectedMedianForMargin1L), as.matrix(resultsFromScaleFactorSetToMedianForMargin1L), tolerance = 1e-6)
expect_equal(as.matrix(resultFromExpectedMedianForMargin2L), as.matrix(resultsFromScaleFactorSetToMedianForMargin2L), tolerance = 1e-6)
})

theSparseMatrix <- as.sparse(denseMatrix) # Sparse Matrix to test .SparseNormalize computes median scale factor correctly
test_that("LogNormalize.default computes median scale factor correctly for both margin values", {
expectedMedian <- median(colSums(theSparseMatrix))

resultFromExpectedMedian <- .SparseNormalize(data = theSparseMatrix, scale.factor = expectedMedian, verbose = FALSE)
resultsFromScaleFactorSetToMedian <- .SparseNormalize(data = theSparseMatrix, scale.factor = "median", verbose = FALSE)

expect_equal(resultFromExpectedMedian, resultsFromScaleFactorSetToMedian, tolerance = 1e-6)
})


# Tests for ScaleData
# --------------------------------------------------------------------------------
context("ScaleData")
Expand Down