diff --git a/RPackageSource/DESCRIPTION b/RPackageSource/DESCRIPTION index 76e6268c..12c772c2 100644 --- a/RPackageSource/DESCRIPTION +++ b/RPackageSource/DESCRIPTION @@ -13,12 +13,13 @@ Description: This R package support all cohort-specific analyses of the COMETS reflects the number of major revision and 4th level for bug fixes. Depends: R (>= 3.5.0) Imports: readxl, rio, dplyr, plyr, plotly, tidyr, heatmaply, stringr, - data.table, caret, subselect, broom, psych, MASS, ppcor, + data.table, caret, broom, psych, MASS, ppcor, survival Suggests: Hmisc, knitr, testthat, rmarkdown, RaMP VignetteBuilder: knitr License: GPL-3 -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 +Encoding: UTF-8 NeedsCompilation: no URL: http://comets-analytics.org/ BugReports: https://github.com/CBIIT/nci-webtools-comets-analytics/issues diff --git a/RPackageSource/NAMESPACE b/RPackageSource/NAMESPACE index 6723cbec..15796c43 100644 --- a/RPackageSource/NAMESPACE +++ b/RPackageSource/NAMESPACE @@ -48,7 +48,6 @@ importFrom(stats,pt) importFrom(stats,qnorm) importFrom(stats,relevel) importFrom(stringr,str_locate) -importFrom(subselect,trim.matrix) importFrom(survival,Surv) importFrom(survival,clogit) importFrom(survival,coxph) diff --git a/RPackageSource/R/02_getModelData.R b/RPackageSource/R/02_getModelData.R index 5aa879f1..fb6a12b0 100644 --- a/RPackageSource/R/02_getModelData.R +++ b/RPackageSource/R/02_getModelData.R @@ -64,7 +64,7 @@ getModelData <- function(readData, offvar = NULL, timevar = NULL, groupvar = NULL, - where = NULL, + where = NULL, exposurerefs = NULL) { rowvars <- outcomes @@ -283,6 +283,10 @@ getModelData <- function(readData, if (!is.null(acovs)) covlist <- c(covlist, acovs) if (!is.null(scovs)) covlist <- c(covlist, scovs) covlist <- c(covlist, wgtcov, offcov, timecov, groupcov) + + # Store all variables before subsetting + all_vars <- colnames(readData$subjdata) + varMap <- NULL if (length(wgtvar) || length(offvar)) { varMap <- c(wgtvar, offvar) @@ -360,7 +364,9 @@ getModelData <- function(readData, allvsall = allvsall, varMap = varMap, options = options, - exposurerefs = exposurerefs + exposurerefs = exposurerefs, + all_vars = all_vars, + all_data = readData$subjdata ) ret[[runModel.getWarningsListName()]] <- rem.obj diff --git a/RPackageSource/R/03_runModel.R b/RPackageSource/R/03_runModel.R index 0d4f5247..82b07245 100644 --- a/RPackageSource/R/03_runModel.R +++ b/RPackageSource/R/03_runModel.R @@ -33,10 +33,10 @@ #' obj <- runModel(modeldata,exmetabdata, cohortLabel="DPP") #' @export -runModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofile=FALSE) { +runModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofile=FALSE, all_vars=FALSE) { ret <- try(myrunModel(modeldata, metabdata, cohortLabel=cohortLabel, - op=op, writeTofile=writeTofile), silent=FALSE) + op=op, writeTofile=writeTofile, all_vars=all_vars), silent=FALSE) if (("try-error" %in% class(ret)) || !isValidReturnObj(ret)) { lab <- modeldata[["modlabel", exact=TRUE]] ret <- getResListFromError(ret, lab) @@ -44,7 +44,7 @@ runModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofile= ret } -myrunModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofile=FALSE) { +myrunModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofile=FALSE, all_vars=FALSE) { ptm <- base::proc.time() # start processing time @@ -66,8 +66,8 @@ myrunModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofil ret <- runModel.start(modeldata, metabdata, op) ret <- runModel.checkRetlist(ret, op) - ret <- runModel.addMetabCols(ret, metabdata, op) - ret <- runModel.getTable1(ret, modeldata, op) + ret <- runModel.addMetabCols(ret, metabdata, op) + ret <- runModel.getTable1(ret, modeldata, op, all_vars) ret <- runModel.getInfoDF(ret, modeldata, metabdata, op) ret <- runModel.dupMetabHarmIds(ret, metabdata, op) @@ -87,7 +87,7 @@ myrunModel <- function(modeldata, metabdata, cohortLabel="", op=NULL, writeTofil runModel.dupMetabHarmIds <- function(ret, metabdata, op) { ids <- metabdata[[dupMetabHarmIds(), exact=TRUE]] - if (!length(ids)) return(ret) + if (!length(na.omit(ids))) return(ret) idstr <- paste0(ids, collapse=", ") nm <- runModel.getWarningsListName() obj <- ret[[nm, exact=TRUE]] diff --git a/RPackageSource/R/imports.R b/RPackageSource/R/imports.R index c4154b45..761a30b5 100644 --- a/RPackageSource/R/imports.R +++ b/RPackageSource/R/imports.R @@ -16,7 +16,6 @@ #' @importFrom broom glance tidy #' @importFrom stats cor #' @importFrom MASS ginv -#' @importFrom subselect trim.matrix #' @importFrom survival coxph Surv clogit strata #' @importFrom stringr str_locate #' @importFrom utils sessionInfo untar unzip read.table diff --git a/RPackageSource/R/internalFunctions.r b/RPackageSource/R/internalFunctions.r index 40f47828..b56615ee 100644 --- a/RPackageSource/R/internalFunctions.r +++ b/RPackageSource/R/internalFunctions.r @@ -759,3 +759,37 @@ addMetabInfo <- function(corrlong, modeldata, metabdata) { } # END: addMetabInfo + + + + +# --------------------------------------------------------------------------- +# trim.matrix function (from subselect)-------------------------------------- +# --------------------------------------------------------------------------- +#' This function removes variables that cause the input design matrix to be ill-conditionned. The function was copied direclty from the package as a quickfix: https://cran.r-project.org/src/contrib/Archive/subselect/subselect_0.15.5.tar.gz +#' @keywords internal +#' @param mat correlation matrix +#' @param totalval totalval + +trim.matrix <- function(mat,tolval=10*.Machine$double.eps) +{ + p <- dim(mat)[2] + matindices <- 1:p + mat.eig <- eigen(mat,symmetric=TRUE) + discard <- rep(FALSE,p) + newmat <- mat + newmatindices <- matindices + while(mat.eig$values[p]/mat.eig$values[1] < tolval) + { + int <- as.numeric(newmatindices[order(abs(mat.eig$vectors[,p]),decreasing=TRUE)[1]]) + discard[int] <- TRUE + newmat <- mat[!discard,!discard] + newmatindices <- matindices[!discard] + p <- p-1 + mat.eig <- eigen(newmat,symmetric=TRUE) + } + size <- dim(newmat)[2] + output <- list(newmat,as.numeric(matindices[discard]),colnames(mat)[discard],size) + names(output) <- c("trimmedmat","numbers.discarded","names.discarded","size") + output +} diff --git a/RPackageSource/R/options.R b/RPackageSource/R/options.R index 7ae9506a..dd2145a5 100644 --- a/RPackageSource/R/options.R +++ b/RPackageSource/R/options.R @@ -76,7 +76,7 @@ #' are removed by computing the correlation matrix #' \code{cor_matrix = cor(design_matrix, method=check.cor.method)}. \cr #' 4. If \code{check.illCond} is TRUE, then the function -#' \code{subselect::trim.matrix} is applied +#' \code{trim.matrix} is applied #' to the correlation matrix to determine if additional #' columns will be removed. \cr \cr #' If \code{check.design} is TRUE, then steps 2-4 are repeated each time an exposure diff --git a/RPackageSource/R/runModel_checks.R b/RPackageSource/R/runModel_checks.R index a113eccc..227f61e0 100644 --- a/RPackageSource/R/runModel_checks.R +++ b/RPackageSource/R/runModel_checks.R @@ -110,7 +110,7 @@ runModel.checkDesignMatCols <- function(dmat, op, rem.obj=NULL, varMap=NULL, # check for ill conditioned square matrix for cor if ((ncol(dmat) > 2) && (op$check.illCond)) { if (is.null(corMat)) corMat <- stats::cor(dmat[, -1, drop=FALSE], method=method) - rem <- subselect::trim.matrix(corMat) + rem <- trim.matrix(corMat) rem <- rem$names.discarded if (length(rem)) { rem.obj <- runModel.addRemVars(rem.obj, rem, varSet, "ill conditioned", diff --git a/RPackageSource/R/table1.R b/RPackageSource/R/table1.R index d7ee1217..8c556f28 100644 --- a/RPackageSource/R/table1.R +++ b/RPackageSource/R/table1.R @@ -1,7 +1,6 @@ -runModel.getTable1 <- function(ret, modeldata, op) { - +runModel.getTable1 <- function(ret, modeldata, op, all_vars) { if (!is.list(ret)) return(ret) - tab1 <- try(getTable1(modeldata, op), silent=TRUE) + tab1 <- try(getTable1(modeldata, op, all_vars), silent=TRUE) if (!("try-error" %in% class(tab1))) { ret[[getTable1DfName()]] <- tab1 } @@ -25,11 +24,20 @@ getTable1ColNames <- function() { ov0=ov0, ov1=ov1) } -getTable1 <- function(modeldata, op) { - +getTable1 <- function(modeldata, op, all_vars) { modeldata <- table1_getVars(modeldata) - vars <- c("rcovs", "ccovs", "acovs", - "timecov", "groupcov", "wgtcov", "offcov") + if (all_vars) { + modeldata$all_vars <- modeldata$all_vars[!grepl("\\.\\.\\.", modeldata$all_vars)] + vars <- c( + "rcovs", "all_vars", "acovs", + "timecov", "groupcov", "wgtcov", "offcov" + ) + } else { + vars <- c( + "rcovs", "ccovs", "acovs", + "timecov", "groupcov", "wgtcov", "offcov" + ) + } nms <- c("outcome", "exposure", "adjustment", "time", "group", "weight", "offset") yv <- modeldata[["rcovs", exact=TRUE]] @@ -89,14 +97,18 @@ getTable1 <- function(modeldata, op) { row <- 0 for (strat in 1:nstrata) { strata <- stratlist[strat] - tmp <- stratvec %in% strata - data <- (modeldata$gdta)[tmp, , drop=FALSE] + tmp <- stratvec %in% strata + if (all_vars) { + data <- modeldata$all_data[tmp, , drop = FALSE] + } else { + data <- (modeldata$gdta)[tmp, , drop=FALSE] + } yeq0 <- data[, yv, drop=TRUE] %in% 0 yeq1 <- data[, yv, drop=TRUE] %in% 1 #stratS <- table1_getStratStr(sv, strata) for (i in 1:tot) { - var <- allvars[i] + var <- allvars[i] mtype <- allmtypes[i] catvec <- rep("", ncatCols) contvec <- rep(NA, ncontCols) diff --git a/RPackageSource/man/COMETS.Rd b/RPackageSource/man/COMETS.Rd deleted file mode 100644 index bcafc117..00000000 --- a/RPackageSource/man/COMETS.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/imports.R -\docType{package} -\name{COMETS} -\alias{COMETS} -\title{COMETS Analytics R package} -\description{ -This R package supports all cohort-specific analyses of - the COMETS consortium \url{https://www.comets-analytics.org/}. -Data are not saved in the system but - output must be downloaded and submitted for meta-analyses. - import only functions needed -} -\details{ -\bold{Functions for analysis:} \cr -\code{\link{runCorr}} (correlation analysis) \cr -\code{\link{runModel}} (correlation, glm or lm) \cr -\code{\link{runAllModels}} (run models in batch mode from models sheet) \cr -\bold{Functions for graphics:} \cr -\code{\link{plotVar}} (metabolite variance distribution plot) \cr -\code{\link{plotMinvalues}} (distribution of missing values) \cr -\code{\link{showHeatmap}} (heat map of metabolite correlations) \cr -\code{\link{showHClust}} (interactive heat map with hierarchical clustering) \cr -\bold{Functions for saving results to files:} \cr -\code{\link{OutputCSVResults}} (write to .csv file) \cr -\code{\link{OutputXLSResults}} (write to excel file) \cr -\code{\link{OutputListToExcel}} (write list of data frames to excel file with multiple sheets) \cr -} diff --git a/RPackageSource/man/CometsAnalytics.Rd b/RPackageSource/man/CometsAnalytics.Rd deleted file mode 100644 index 3ec2b6fd..00000000 --- a/RPackageSource/man/CometsAnalytics.Rd +++ /dev/null @@ -1,13 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/imports.R -\docType{package} -\name{CometsAnalytics} -\alias{CometsAnalytics} -\alias{CometsAnalytics-package} -\title{COMETS Analytics R package} -\description{ -This R package support all cohort-specific analyses of - the COMETS consortium. Data are not saved in the system but - output must be downloaded and submitted for meta-analyses. - import only functions needed -} diff --git a/RPackageSource/man/RcometsAnalytics.Rd b/RPackageSource/man/RcometsAnalytics.Rd deleted file mode 100644 index 2a424bd0..00000000 --- a/RPackageSource/man/RcometsAnalytics.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/imports.R -\docType{package} -\name{RcometsAnalytics} -\alias{RcometsAnalytics} -\title{RcometsAnalytics R package} -\description{ -This R package supports all cohort-specific analyses of - the COMETS consortium \url{https://www.comets-analytics.org/}. -Data are not saved in the system but - output must be downloaded and submitted for meta-analyses. - import only functions needed -} -\details{ -\bold{Functions for analysis:} \cr -\code{\link{runCorr}} (correlation analysis) \cr -\code{\link{runModel}} (correlation, glm or lm) \cr -\code{\link{runAllModels}} (run models in batch mode from models sheet) \cr -\bold{Functions for graphics:} \cr -\code{\link{plotVar}} (metabolite variance distribution plot) \cr -\code{\link{plotMinvalues}} (distribution of missing values) \cr -\code{\link{showHeatmap}} (heat map of metabolite correlations) \cr -\code{\link{showHClust}} (interactive heat map with hierarchical clustering) \cr -\bold{Functions for saving results to files:} \cr -\code{\link{OutputCSVResults}} (write to .csv file) \cr -\code{\link{OutputXLSResults}} (write to excel file) \cr -\code{\link{OutputListToExcel}} (write list of data frames to excel file with multiple sheets) \cr -} diff --git a/RPackageSource/man/options.Rd b/RPackageSource/man/options.Rd index 7bdd60fe..30c1fa7a 100644 --- a/RPackageSource/man/options.Rd +++ b/RPackageSource/man/options.Rd @@ -81,7 +81,7 @@ Before any analysis is performed, an initial design matrix is are removed by computing the correlation matrix \code{cor_matrix = cor(design_matrix, method=check.cor.method)}. \cr 4. If \code{check.illCond} is TRUE, then the function - \code{subselect::trim.matrix} is applied + \code{trim.matrix} is applied to the correlation matrix to determine if additional columns will be removed. \cr \cr If \code{check.design} is TRUE, then steps 2-4 are repeated each time an exposure diff --git a/RPackageSource/man/readCOMETSinput.Rd b/RPackageSource/man/readCOMETSinput.Rd index 06559d7e..c73373d4 100644 --- a/RPackageSource/man/readCOMETSinput.Rd +++ b/RPackageSource/man/readCOMETSinput.Rd @@ -30,8 +30,10 @@ Additional information regarding each sheet in the input Excel file is given bel \bold{Metabolites} \cr A table with the columns \code{METABID}, \code{METABOLITE_NAME}, and possibly other columns \cr - of information about the metabolites. The \code{METABID} column is used - for harmonizing the metabolite names across different cohorts when meta-analyses are performed. \cr + of information about the metabolites. The \code{METABID} column gives the column names of the + metabolites in the \bold{SubjectMetabolites} sheet below. Other useful columns are for + example, \code{HMDB}, \code{PUBCHEM}, etc that give corresponding metabolite ids + in that specific metabolite database. \cr \bold{SubjectMetabolites} \cr A table with the subject ids in the first column and metabolites as the other columns. Each cell must contain a numeric value or left empty (missing); otherwise an error diff --git a/RPackageSource/man/runAllModels.Rd b/RPackageSource/man/runAllModels.Rd index 58bb3a45..0fa867fb 100644 --- a/RPackageSource/man/runAllModels.Rd +++ b/RPackageSource/man/runAllModels.Rd @@ -17,7 +17,7 @@ separate xlsx files). Files are written to current directory. Default is TRUE.} } \value{ A list of return objects from \code{\link{runModel}} or \code{\link{runCorr}}. - The \code{ith} element in this list is the output from + The \code{ith} element in this list is the output from the \code{ith} model run. } \description{ diff --git a/RPackageSource/man/runModel.Rd b/RPackageSource/man/runModel.Rd index edb5a9a6..f5c09cce 100644 --- a/RPackageSource/man/runModel.Rd +++ b/RPackageSource/man/runModel.Rd @@ -9,7 +9,8 @@ runModel( metabdata, cohortLabel = "", op = NULL, - writeTofile = FALSE + writeTofile = FALSE, + all_vars = FALSE ) } \arguments{ diff --git a/RPackageSource/man/trim.matrix.Rd b/RPackageSource/man/trim.matrix.Rd new file mode 100644 index 00000000..cb51175f --- /dev/null +++ b/RPackageSource/man/trim.matrix.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/internalFunctions.r +\name{trim.matrix} +\alias{trim.matrix} +\title{This function removes variables that cause the input design matrix to be ill-conditionned. The function was copied direclty from the package as a quickfix: https://cran.r-project.org/src/contrib/Archive/subselect/subselect_0.15.5.tar.gz} +\usage{ +trim.matrix(mat, tolval = 10 * .Machine$double.eps) +} +\arguments{ +\item{mat}{correlation matrix} + +\item{totalval}{totalval} +} +\description{ +This function removes variables that cause the input design matrix to be ill-conditionned. The function was copied direclty from the package as a quickfix: https://cran.r-project.org/src/contrib/Archive/subselect/subselect_0.15.5.tar.gz +} +\keyword{internal} diff --git a/RPackageSource/vignettes/cometsvignette.Rmd b/RPackageSource/vignettes/cometsvignette.Rmd index 79e8afe2..046fb9cb 100644 --- a/RPackageSource/vignettes/cometsvignette.Rmd +++ b/RPackageSource/vignettes/cometsvignette.Rmd @@ -131,7 +131,7 @@ This diplay requires at least 2 rows and 2 columns in the correlation matrix. ```{r, eval=T} exmodeldata<-RcometsAnalytics::getModelData(exmetabdata,modelspec = "Interactive",exposures = c("bmi_grp","age")) -excorrdata <- RcometsAnalytics::runModel(exmodeldata,exmetabdata,"DPP") +excorrdata <- RcometsAnalytics::runModel(exmodeldata,exmetabdata,"DPP", all_vars = FALSE) RcometsAnalytics::showHClust(excorrdata, showticklabels=FALSE) ```