Skip to content

Commit 3fe7ffd

Browse files
committed
first round of fixes
1 parent d785cf9 commit 3fe7ffd

File tree

11 files changed

+341
-116
lines changed

11 files changed

+341
-116
lines changed

.github/workflows/check-bioc.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,8 @@ jobs:
164164
remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE)
165165
166166
## Manually install required packages
167-
# BiocManager::install("SummarizedExperiment", ask = FALSE, update = FALSE)
168-
# BiocManager::install("RforMassSpectrometry/MsBackendMetaboLights")
169-
# BiocManager::install("msdata")
167+
BiocManager::install("RforMassSpectrometry/MsBackendMetaboLights")
168+
BiocManager::install("msdata")
170169
171170
# BiocManager::install(c("devtools", "usethis", "vdiffr"), dependencies = TRUE, ask = FALSE, update = FALSE)
172171
## For running the checks
@@ -252,4 +251,4 @@ jobs:
252251
uses: actions/upload-artifact@master
253252
with:
254253
name: ${{ runner.os }}-biocversion-devel-r-devel-results
255-
path: check
254+
path: check

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ exportMethods(backendInitialize)
4545
exportMethods(backendMerge)
4646
exportMethods(backendParallelFactor)
4747
exportMethods(chromData)
48+
exportMethods(chromExtract)
4849
exportMethods(chromIndex)
4950
exportMethods(chromVariables)
5051
exportMethods(collisionEnergy)
@@ -53,6 +54,7 @@ exportMethods(extractByIndex)
5354
exportMethods(factorize)
5455
exportMethods(filterChromData)
5556
exportMethods(filterPeaksData)
57+
exportMethods(imputePeaksData)
5658
exportMethods(intensity)
5759
exportMethods(isEmpty)
5860
exportMethods(isReadOnly)
@@ -108,6 +110,9 @@ importFrom(methods,existsMethod)
108110
importFrom(methods,is)
109111
importFrom(methods,new)
110112
importFrom(methods,validObject)
113+
importFrom(stats,approx)
114+
importFrom(stats,filter)
115+
importFrom(stats,loess)
111116
importFrom(utils,capture.output)
112117
importFrom(utils,head)
113118
importMethodsFrom(ProtGenerics,"collisionEnergy<-")

R/AllGenerics.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ setGeneric("chromData", function(object, ...) standardGeneric("chromData"))
55
setGeneric("chromData<-", function(object, value) {
66
standardGeneric("chromData<-")
77
})
8+
setGeneric("chromExtract", function(object,peak_table, by, ...) standardGeneric("chromExtract"))
89
setGeneric("chromIndex", function(object, ...) standardGeneric("chromIndex"))
910
setGeneric("chromIndex<-", function(object, value) {
1011
standardGeneric("chromIndex<-")
@@ -47,6 +48,8 @@ setGeneric("productMzMin", function(object, ...) {
4748
setGeneric("productMzMin<-", function(object, value) {
4849
standardGeneric("productMzMin<-")
4950
})
51+
setGeneric("imputePeaksData", function(object, ...)
52+
standardGeneric("imputePeaksData"))
5053
#' @rdname hidden_aliases
5154
setGeneric("reset", function(object, ...) {
5255
standardGeneric("reset")

R/ChromBackend.R

Lines changed: 121 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@
119119
#' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()]
120120
#' for more information.
121121
#'
122+
#' @param by A character vector specifying the column names in both
123+
#' `peak_table` and the `chromData` of `object` that uniquely identify
124+
#' chromatograms. The combination of these columns must be unique in the
125+
#' `chromData`. Can also be of length 1
126+
#'
122127
#' @param columns For `chromData()` accessor: optional `character` with column
123128
#' names (chromatogram variables) that should be included in the
124129
#' returned `data.frame`. By default, all columns are returned.
@@ -148,6 +153,15 @@
148153
#'
149154
#' @param object Object extending `ChromBackend`.
150155
#'
156+
#' @param peak_table A data frame containing the following minimum columns:
157+
#' - "rtMin": Minimum retention time for each peak. Cannot be NA
158+
#' - "rtMax": Maximum retention time for each peak. Cannot be NA
159+
#'
160+
#' Additionally, the `peak_table` must include columns that uniquely identify
161+
#' chromatograms in the `object`. Common choices are "chromIndex" and/or
162+
#' "dataOrigin". These columns must also be present in the `chromData` of the
163+
#' `object`.
164+
#'
151165
#' @param ranges For `filterChromData()` : a `numeric`
152166
#' vector of paired values (upper and lower boundary) that define the
153167
#' ranges to filter the `object`. These paired values need to be in the
@@ -273,6 +287,9 @@
273287
#' available in `object`. Variables listed by this function are expected to
274288
#' be returned (if requested) by the `chromData()` function.
275289
#'
290+
#' - `chromExtract()`: return A new `Chrombackend` object containing separated
291+
#' peaks as individual chromatograms.
292+
#'
276293
#' - `collisionEnergy()`, `collisionEnergy<-`: gets or sets the collision
277294
#' energy for the precursor (for SRM data). `collisionEnergy()` returns a
278295
#' `numeric` of length equal to the number of chromatograms in `object`.
@@ -473,11 +490,11 @@
473490
NULL
474491

475492
setClass("ChromBackend",
476-
contains = "VIRTUAL",
477-
slots = c(
478-
version = "character"
479-
),
480-
prototype = prototype(readonly = FALSE, version = "0.1")
493+
contains = "VIRTUAL",
494+
slots = c(
495+
version = "character"
496+
),
497+
prototype = prototype(readonly = FALSE, version = "0.1")
481498
)
482499

483500
#' @importMethodsFrom S4Vectors $ $<-
@@ -601,8 +618,8 @@ setReplaceMethod("[[", "ChromBackend", function(x, i, j, ..., value) {
601618
#'
602619
#' @export
603620
setMethod("backendBpparam",
604-
signature = "ChromBackend",
605-
function(object, BPPARAM = bpparam()) BPPARAM
621+
signature = "ChromBackend",
622+
function(object, BPPARAM = bpparam()) BPPARAM
606623
)
607624

608625
#' @exportMethod backendInitialize
@@ -613,11 +630,11 @@ setMethod("backendBpparam",
613630
#'
614631
#' @rdname ChromBackend
615632
setMethod("backendInitialize",
616-
signature = "ChromBackend",
617-
definition = function(object, ...) {
618-
validObject(object)
619-
object
620-
}
633+
signature = "ChromBackend",
634+
definition = function(object, ...) {
635+
validObject(object)
636+
object
637+
}
621638
)
622639

623640
#' @rdname ChromBackend
@@ -1024,28 +1041,28 @@ setMethod("split", "ChromBackend", function(x, f, drop = FALSE, ...) {
10241041
setMethod(
10251042
"filterChromData", "ChromBackend",
10261043
function(object, variables = character(),
1027-
ranges = numeric(), match = c("any", "all"),
1028-
keep = TRUE) {
1044+
ranges = numeric(), match = c("any", "all"),
1045+
keep = TRUE) {
10291046
if (!length(variables) || !length(ranges))
10301047
return(object)
10311048
if (!is.numeric(ranges))
10321049
stop("filterChromData only support filtering for numerical ",
1033-
"'variables'")
1050+
"'variables'")
10341051
match <- match.arg(match)
10351052
if (is.character(variables)) {
10361053
if (!all(variables %in% chromVariables(object)))
10371054
stop("One or more values passed with parameter ",
1038-
"'variables' are not available as chromatogram ",
1039-
"variables in object. Use the 'chromVariables()' ",
1040-
"function to list possible values." )
1055+
"'variables' are not available as chromatogram ",
1056+
"variables in object. Use the 'chromVariables()' ",
1057+
"function to list possible values." )
10411058
} else
10421059
stop("The 'variables' parameter needs to be of type ",
1043-
"'character'." )
1060+
"'character'." )
10441061
if (length(variables) != length(ranges) / 2)
10451062
stop("Length of 'ranges' needs to be twice the length of ",
1046-
"the parameter 'variables' and define the lower ",
1047-
"and upper bound for values of each chromatogram ",
1048-
"variable defined with parameter 'variables'." )
1063+
"the parameter 'variables' and define the lower ",
1064+
"and upper bound for values of each chromatogram ",
1065+
"variable defined with parameter 'variables'." )
10491066
query <- chromData(object, columns = variables)
10501067
idx <- .filter_ranges(query, ranges, match)
10511068
if (keep) return(object[idx])
@@ -1063,41 +1080,37 @@ setMethod(
10631080
#' @description
10641081
#' Filter the peak data based on the provided ranges for the given variables.
10651082
#'
1066-
#' @note This function replaces the peaksData() of the input object. Therefore
1067-
#' backend with `readOnly == TRUE` (i.e. ChromBackendmzR) will need to have a
1068-
#' carefully implemented `peaksData(object) <-` method.
1069-
#'
10701083
#' @export
10711084
setMethod(
10721085
"filterPeaksData", "ChromBackend",
10731086
function(object, variables = character(),
1074-
ranges = numeric(), match = c("any", "all"),
1075-
keep = TRUE) {
1087+
ranges = numeric(), match = c("any", "all"),
1088+
keep = TRUE) {
10761089
if (!length(ranges) || !length(variables))
10771090
return(object)
10781091
if (!is.numeric(ranges))
10791092
stop( "filterPeaksData only support filtering for ",
1080-
"numerical peak variables")
1093+
"numerical peak variables")
10811094
match <- match.arg(match)
10821095
if (is.character(variables)) {
10831096
if (!all(variables %in% peaksVariables(object)))
10841097
stop("One or more values passed with parameter ",
1085-
"'variables' are not available as peaks ",
1086-
"variables in object. Use the 'peaksVariables()' ",
1087-
"function to list possible values.")
1098+
"'variables' are not available as peaks ",
1099+
"variables in object. Use the 'peaksVariables()' ",
1100+
"function to list possible values.")
10881101
} else
10891102
stop("The 'variables' parameter needs to be of type ",
1090-
"'character'.")
1103+
"'character'.")
10911104
if (length(variables) != length(ranges) / 2)
10921105
stop("Length of 'ranges' needs to be twice the length of ",
1093-
"the parameter 'variables' and define the lower and ",
1094-
"upper bound for values of each peak variable ",
1095-
"defined with parameter 'variables'.")
1106+
"the parameter 'variables' and define the lower and ",
1107+
"upper bound for values of each peak variable ",
1108+
"defined with parameter 'variables'.")
10961109
if (keep) sel_fun <- function(z, idx) z[idx, , drop = FALSE]
10971110
else sel_fun <- function(z, idx) {
1098-
if (!length(idx)) return(z)
1099-
else return(z[-idx, , drop = FALSE])
1100-
}
1111+
if (!length(idx)) return(z)
1112+
else return(z[-idx, , drop = FALSE])
1113+
}
11011114
peaksData(object) <- lapply(peaksData(object), function(pd) {
11021115
sel_fun(pd, .filter_ranges(
11031116
pd[, variables, drop = FALSE],
@@ -1113,5 +1126,73 @@ setMethod(
11131126
#' @exportMethod supportsSetBackend
11141127
#'
11151128
#' @rdname ChromBackend
1116-
#' @export
11171129
setMethod("supportsSetBackend", "ChromBackend", function(object, ...) FALSE)
1130+
1131+
#' @rdname ChromBackend
1132+
#' @exportMethod imputePeaksData
1133+
#' @param method character: Imputation method ("linear", "spline",
1134+
#' "gaussian", "loess")
1135+
#' @param span `numeric`, for the loess method: Smoothing parameter (only used
1136+
#' if method == "loess")
1137+
#' @param sd `numeric`, for the gaussian method: Standard deviation for
1138+
#' Gaussian kernel (only for method == "gaussian")
1139+
#' @param window `integer`, for the gaussian method: Half-width of Gaussian
1140+
#' kernel window (e.g., 2 gives window size 5)
1141+
#' @importFrom stats approx filter loess
1142+
setMethod("imputePeaksData", signature(object = "ChromBackend"),
1143+
function(object,
1144+
method = c("linear", "spline", "gaussian", "loess"),
1145+
span = 0.3,
1146+
sd = 1,
1147+
window = 2,
1148+
...) {
1149+
method <- match.arg(method)
1150+
impute <- function(x) {
1151+
if (all(is.na(x))) return(x)
1152+
1153+
na_idx <- which(is.na(x))
1154+
if (length(na_idx) == 0) return(x)
1155+
1156+
not_na_idx <- which(!is.na(x))
1157+
x_out <- seq_along(x)
1158+
1159+
x[na_idx] <- switch(method,
1160+
linear = approx(not_na_idx, x[not_na_idx],
1161+
xout = na_idx, rule = 2)$y,
1162+
1163+
spline = spline(not_na_idx, x[not_na_idx],
1164+
xout = na_idx, method = "natural")$y,
1165+
gaussian = {
1166+
# Create symmetric Gaussian kernel
1167+
kernel_range <- -window:window
1168+
w <- dnorm(kernel_range, mean = 0, sd = sd)
1169+
w <- w / sum(w)
1170+
1171+
# Fill missing with linear approx to allow smoothing
1172+
x_filled <- x
1173+
x_filled[is.na(x_filled)] <- approx(not_na_idx, x[not_na_idx],
1174+
xout = which(is.na(x_filled)),
1175+
rule = 2)$y
1176+
smoothed <- filter(x_filled, filter = w, sides = 2, circular = FALSE)
1177+
smoothed[na_idx]
1178+
},
1179+
1180+
loess = {
1181+
fit <- loess(x[not_na_idx] ~ not_na_idx, span = span)
1182+
predict(fit, newdata = na_idx)
1183+
}
1184+
)
1185+
x
1186+
}
1187+
object$intensity <- lapply(object$intensity, impute)
1188+
object
1189+
})
1190+
1191+
#' @rdname ChromBackend
1192+
#' @exportMethod chromExtract
1193+
#'
1194+
setMethod("chromExtract", "ChromBackend",
1195+
function(object, by = character(), drop = FALSE) {
1196+
stop("Not implemented for ", class(object), ".")
1197+
}
1198+
)

R/ChromBackendMemory.R

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,54 @@ setReplaceMethod("$", "ChromBackendMemory", function(x, name, value) {
297297
}
298298
x
299299
})
300+
301+
#' @rdname hidden_aliases
302+
setMethod("chromExtract", "ChromBackendMemory", function(object, peak_table, by) {
303+
if (!all(c("rtMin", "rtMax") %in% colnames(peak_table))) {
304+
stop('The peak_table must contain the columns: "rtMin" and "rtMax".')
305+
}
306+
307+
if (any(is.na(peak_table$rtMin)) || any(is.na(peak_table$rtMax))) {
308+
stop("The 'rtMin' and 'rtMax' columns in peak_table cannot contain NA values.")
309+
}
310+
311+
cd <- chromData(object)
312+
313+
if (!all(by %in% colnames(cd))) {
314+
stop("All 'by' columns must be present in the chromData of the object.")
315+
}
316+
317+
# Check for uniqueness of the combination of 'by' columns in chromData
318+
if (nrow(cd) != nrow(unique(cd[by]))) {
319+
stop("The combination of 'by' columns must be unique in the chromData.")
320+
}
321+
322+
pdata <- vector("list", nrow(peak_table))
323+
new_cdata <- data.frame()
324+
325+
for (i in seq_len(nrow(peak_table))) {
326+
p <- peak_table[i, ]
327+
328+
# Create a logical vector to match rows based on 'by' columns
329+
match_idx <- rep(TRUE, nrow(cd))
330+
for (col in by) {
331+
match_idx <- match_idx & (cd[[col]] == p[[col]])
332+
}
333+
334+
idx <- which(match_idx)
335+
336+
if (length(idx) == 0) next
337+
338+
# Update peaksData
339+
rt <- rtime(object)[[idx]]
340+
inrt <- rt >= p$rtMin & rt <= p$rtMax
341+
pdata[[i]] <- peaksData(object)[[idx]][inrt, , drop = FALSE]
342+
343+
# Update chromData
344+
new_cdata <- rbind(new_cdata, cd[idx, ])
345+
}
346+
347+
backendInitialize(new("ChromBackendMemory"),
348+
chromData = new_cdata, peaksData = pdata)
349+
350+
})

0 commit comments

Comments
 (0)