|
| 1 | +#' check_input_data |
| 2 | +#' |
| 3 | +#' saftey check for function in harp |
| 4 | +#' This function checks the input data for correctness, ensuring that the matrices for the reference profile (`C`), |
| 5 | +#' the bulk expression profile (`Y`), and the scaled expression matrix (`X_sc`) fullfill required structural |
| 6 | +#' and naming standards. It also verifies the presence of mutual genes between `Y` and `X_sc`, rearranges the data |
| 7 | +#' to align them properly, and confirms that sample and gene names are consistent across all matrices. |
| 8 | +#' @param C matrix representing the reference profile, with row names as cell types and column names as samples. |
| 9 | +#' @param Y matrix representing the bulk expression profile, with row names as genes and column names as samples. |
| 10 | +#' @param X_sc matrix representing the reference profiles of cell types, with row names as genes and column names as cell types. |
| 11 | +#' |
| 12 | +#' @return list containing the validated and reordered matrices |
| 13 | +#' @noRd |
| 14 | +check_input_data <- function(C, Y, X_sc) { |
| 15 | + message("Starting validation of the input data...") |
| 16 | + |
| 17 | + # Check if C is a matrix |
| 18 | + if (!is.matrix(C)) { |
| 19 | + stop("In check_input: C (cell counts) is not a matrix") |
| 20 | + } |
| 21 | + |
| 22 | + # Check if Y is a matrix |
| 23 | + if (!is.matrix(Y)) { |
| 24 | + stop("In check_input: Y (bulk expression) is not a matrix") |
| 25 | + } |
| 26 | + # Check if Y is a matrix |
| 27 | + if (!is.matrix(X_sc)) { |
| 28 | + stop("In check_input: X_sc (cell reference profile) is not a matrix") |
| 29 | + } |
| 30 | + |
| 31 | + # Check if column names of Y are in column names of C |
| 32 | + if (!all(colnames(Y) %in% colnames(C)) || !all(colnames(C) %in% colnames(Y))) { |
| 33 | + stop("In check_input: sample names in Y (bulk expression) and C (cell counts) must match exactly") |
| 34 | + } |
| 35 | + |
| 36 | + # Check if rownames of C match colnames of X_sc |
| 37 | + if (!all(rownames(C) %in% colnames(X_sc)) || !all(colnames(X_sc) %in% rownames(C))) { |
| 38 | + stop("In check_input: cell type names in C (cell counts) and X_sc (cell reference profile) must match exactly") |
| 39 | + } |
| 40 | + # Check if rownames of Y are in rownames of X_sc |
| 41 | + if (!any(all(rownames(Y) %in% rownames(X_sc)))) { |
| 42 | + message("Genes in cell reference profile and bulk expression profile do not completely match; mutual genes will be selected.") |
| 43 | + } |
| 44 | + |
| 45 | + # Select mutual genes |
| 46 | + gene <- intersect(rownames(X_sc), rownames(Y)) |
| 47 | + |
| 48 | + # Stop if no mutual genes are found |
| 49 | + if (length(gene) == 0) { |
| 50 | + stop("In check_input: No mutual genes found between reference profile and bulk expression profile") |
| 51 | + } |
| 52 | + |
| 53 | + # Produce a warning if mutual genes are fewer than 500 |
| 54 | + if (length(gene) < 500) { |
| 55 | + warning("In check_input: Fewer than 500 mutual genes found between reference profile and bulk profile") |
| 56 | + } |
| 57 | + |
| 58 | + # reorder samples in Y to match the order in c |
| 59 | + Y <- Y[gene, colnames(C)] |
| 60 | + # reorder cell types of X_sc to match the order in C |
| 61 | + X_sc <- X_sc[gene, rownames(C)] |
| 62 | + message("Input data validation complete.") |
| 63 | + |
| 64 | + list(C = C, Y = Y, X_sc = X_sc) |
| 65 | +} |
| 66 | + |
| 67 | +#' test_number |
| 68 | +#' saftey check for function in harp |
| 69 | +#' |
| 70 | +#' @param value value to be tested for number properties |
| 71 | +#' @param validation.source vector of length 2: [1] the calling function's name, [2] the parameter name being tested |
| 72 | +#' @param min minimum |
| 73 | +#' @param max maximum |
| 74 | +#' @param integer_only logical, if TRUE checks for integer, if FALSE allows any non-negative number |
| 75 | +#' |
| 76 | +#' @return TRUE, if no error is detected, stops with error otherwise |
| 77 | +#' @noRd |
| 78 | +test_number <- function(value, |
| 79 | + validation.source, |
| 80 | + min, |
| 81 | + max, |
| 82 | + integer_only = TRUE) { |
| 83 | + error.message <- paste0("In ", validation.source[1], ": ", "'", validation.source[2], "'") |
| 84 | + |
| 85 | + if (!is.numeric(value) || length(value) != 1) { |
| 86 | + error.message <- paste0(error.message, " is not a single numeric value") |
| 87 | + stop(error.message, call. = FALSE) |
| 88 | + } |
| 89 | + |
| 90 | + # for non-integer case, ensure value is >= 0 |
| 91 | + if (!integer_only && value < 0) { |
| 92 | + error.message <- paste0(error.message, " must be non-negative") |
| 93 | + stop(error.message, call. = FALSE) |
| 94 | + } |
| 95 | + # check for integer if required |
| 96 | + if (integer_only && round(value) != value) { |
| 97 | + error.message <- paste0(error.message, " is not an integer") |
| 98 | + stop(error.message, call. = FALSE) |
| 99 | + } |
| 100 | + |
| 101 | + if (value < min) { |
| 102 | + error.message <- paste0(error.message, " is below minimal value") |
| 103 | + stop(error.message, call. = FALSE) |
| 104 | + } |
| 105 | + |
| 106 | + if (value > max) { |
| 107 | + error.message <- paste0(error.message, " is above maximal value") |
| 108 | + stop(error.message, call. = FALSE) |
| 109 | + } |
| 110 | + |
| 111 | + return(TRUE) |
| 112 | +} |
| 113 | + |
| 114 | +#' validate lambda sequence |
| 115 | +#' |
| 116 | +#' saftey check for function in harp |
| 117 | +#' @param lambda_seq single non-negative number or sequence of non-negative numbers (at least two values recommended) |
| 118 | +#' @param caller_function_name name of the calling function for error messages |
| 119 | +#' @param allow_single logical; if TRUE, lambda_seq can be a single non-negative number. |
| 120 | +#' |
| 121 | +#' |
| 122 | +#' @return TRUE if validation passes, stops with error otherwise |
| 123 | +#' @noRd |
| 124 | +validate_lambda_seq <- function(lambda_seq, caller_function_name, allow_single = TRUE) { |
| 125 | + # check if caller_function_name is provided |
| 126 | + if (missing(caller_function_name)) { |
| 127 | + stop("caller_function_name must be provided") |
| 128 | + } |
| 129 | + |
| 130 | + # check if input is numeric vector |
| 131 | + if (!is.numeric(lambda_seq)) { |
| 132 | + stop(paste0("In ", caller_function_name, ": lambda_seq must be numeric")) |
| 133 | + } |
| 134 | + |
| 135 | + # for single value |
| 136 | + if (length(lambda_seq) == 1) { |
| 137 | + if (!allow_single) { |
| 138 | + stop(paste0("In ", caller_function_name, ": lambda_seq cannot be a single value when allow_single is FALSE")) |
| 139 | + } |
| 140 | + test_number( |
| 141 | + value = lambda_seq, |
| 142 | + validation.source = c(caller_function_name, "lambda_seq"), |
| 143 | + min = 0, |
| 144 | + max = Inf, |
| 145 | + integer_only = FALSE # allow any non-negative number |
| 146 | + ) |
| 147 | + } |
| 148 | + # for sequence |
| 149 | + else { |
| 150 | + # check if any values are negative |
| 151 | + if (any(lambda_seq < 0)) { |
| 152 | + stop(paste0("In ", caller_function_name, ": all values in lambda_seq must be non-negative")) |
| 153 | + } |
| 154 | + |
| 155 | + # check if sequence has at least two values |
| 156 | + if (length(lambda_seq) < 2) { |
| 157 | + stop(paste0("In ", caller_function_name, ": lambda_seq must contain at least two values (more values recommended)")) |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + return(TRUE) |
| 162 | +} |
| 163 | + |
| 164 | +#' check_logical |
| 165 | +#' |
| 166 | +#' saftey check for functions in harp |
| 167 | +#' @param value value to be tested for number properties |
| 168 | +#' @param validation.source vector of length 2: [1] the calling function's name, [2] the parameter name being tested |
| 169 | +#' |
| 170 | +#' @return TRUE, or it throws an error |
| 171 | +#' @noRd |
| 172 | +check_logical <- function(value, |
| 173 | + validation.source) { |
| 174 | + error.message <- paste0("In ", validation.source[1], ": ", "'", validation.source[2], "'") |
| 175 | + |
| 176 | + if (any(!is.logical(value)) || length(value) != 1) { |
| 177 | + error.message <- paste0(error.message, " must be a single value, either 'TRUE' or 'FALSE' ") |
| 178 | + stop(error.message, call. = FALSE) |
| 179 | + } |
| 180 | + return(TRUE) |
| 181 | +} |
0 commit comments