From bfd21f927c1755d4de6ebc9c792c69e0f1aa8ee0 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 15:01:38 +0100 Subject: [PATCH 01/82] update sidebar menu depending on chosen mode (diyabc-rf analysis or data generation) --- R-pkg/NAMESPACE | 2 ++ R-pkg/R/app_index.R | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/R-pkg/NAMESPACE b/R-pkg/NAMESPACE index 575391ac..f61d490f 100644 --- a/R-pkg/NAMESPACE +++ b/R-pkg/NAMESPACE @@ -74,7 +74,9 @@ importFrom(shinydashboard,dashboardHeader) importFrom(shinydashboard,dashboardPage) importFrom(shinydashboard,dashboardSidebar) importFrom(shinydashboard,menuItem) +importFrom(shinydashboard,renderMenu) importFrom(shinydashboard,sidebarMenu) +importFrom(shinydashboard,sidebarMenuOutput) importFrom(shinydashboard,tabItem) importFrom(shinydashboard,tabItems) importFrom(shinydashboard,updateTabItems) diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 98d5b4c4..149c627c 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -1,7 +1,8 @@ #' App dashboard simplified sidebar #' @keywords internal #' @author Ghislain Durif -#' @importFrom shinydashboard dashboardSidebar menuItem sidebarMenu +#' @importFrom shinydashboard dashboardSidebar menuItem sidebarMenu +#' sidebarMenuOutput app_sidebar <- function() { dashboardSidebar( sidebarMenu( @@ -11,16 +12,7 @@ app_sidebar <- function() { tabName = "home_tab", icon = icon("home") ), - menuItem( - "DIYABC-RF main pipeline", - tabName = "analysis_tab", - icon = icon("flask") - ), - menuItem( - "Synthetic data file generation", - tabName = "datagen_tab", - icon = icon("dna") - ), + sidebarMenuOutput("menu_tabs"), menuItem( "Preferences", tabName = "pref_tab", @@ -79,6 +71,7 @@ app_body <- function() { #' App simplified dashboard server function #' @keywords internal #' @author Ghislain Durif +#' @importFrom shinydashboard renderMenu index_server <- function(input, output, session) { # home page @@ -87,14 +80,40 @@ index_server <- function(input, output, session) { ## new analysis project observeEvent(home_page$new_analysis_project, { req(home_page$new_analysis_project) + # rendering sidebar menu + output$menu_tabs <- renderMenu({ + sidebarMenu( + id = "dynamic_tabs", + menuItem( + "DIYABC-RF main pipeline", + tabName = "analysis_tab", + icon = icon("flask") + ) + ) + }) + # update tab item updateTabItems(session, "app_menu", selected = "analysis_tab") + # init env init_diyabcrf_env() }) ## new data generation project observeEvent(home_page$new_datagen_project, { req(home_page$new_datagen_project) + # rendering sidebar menu + output$menu_tabs <- renderMenu({ + sidebarMenu( + id = "dynamic_tabs", + menuItem( + "Synthetic data file generation", + tabName = "datagen_tab", + icon = icon("dna") + ) + ) + }) + # update tab item updateTabItems(session, "app_menu", selected = "datagen_tab") + # init env init_datagen_env() }) From a927836e5977a9391e208b4ba52178ae97d95f31 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 15:35:39 +0100 Subject: [PATCH 02/82] remove useless (deprecated) home page ui --- R-pkg/R/51_home_page_module.R | 114 ---------------------------------- 1 file changed, 114 deletions(-) diff --git a/R-pkg/R/51_home_page_module.R b/R-pkg/R/51_home_page_module.R index fe3afdd6..cc068562 100644 --- a/R-pkg/R/51_home_page_module.R +++ b/R-pkg/R/51_home_page_module.R @@ -123,117 +123,3 @@ simplified_home_page_server <- function(input, output, session) { # output return(out) } - -#' Home page ui -#' @keywords internal -#' @author Ghislain Durif -#' @description deprecated -home_page_ui <- function(id) { - ns <- NS(id) - tagList( - fluidRow( - box( - title = "Data analysis", - width = 12, - status = "primary", solidHeader = TRUE, - collapsible = TRUE, - tags$div( - tags$p("Including the two modules of DIYABC Random Forest:"), - tags$ul( - tags$li("training set simulation"), - tags$li("random forest analyses") - ) - ) %>% - helper(type = "markdown", - content = "data_analysis"), - open_project_ui(ns("analysis_project")) - ) - ), - fluidRow( - box( - title = "Pseudo-observed dataset simulation", - width = 12, - status = "info", solidHeader = TRUE, - collapsible = TRUE, collapsed = TRUE, - tags$div( - tags$p("Direct use of DIYABC-RF simulation engine.") %>% - helper(type = "markdown", - content = "data_simulation") - ), - open_project_ui(ns("simu_project")) - ) - ) - ) -} - -#' Home page server -#' @keywords internal -#' @author Ghislain Durif -#' @description deprecated -home_page_server <- function(input, output, session) { - # init output - out <- reactiveValues( - new_analysis_project = NULL, - existing_analysis_project = NULL, - new_simu_project = NULL, - existing_simu_project = NULL - ) - # input - analysis_project <- callModule(open_project_server, "analysis_project") - simu_project <- callModule(open_project_server, "simu_project") - # analysis project reactivity - observeEvent(analysis_project$new, { - out$new_analysis_project <- analysis_project$new - }) - observeEvent(analysis_project$existing, { - out$existing_analysis_project <- analysis_project$existing - }) - # simu project reactivity - observeEvent(simu_project$new, { - out$new_simu_project <- simu_project$new - }) - observeEvent(simu_project$existing, { - out$existing_simu_project <- simu_project$existing - }) - # output - return(out) -} - -#' Open project module ui -#' @description -#' Wrapper to choose between creating a new project and opening an existing one -#' @keywords internal -#' @author Ghislain Durif -#' @importFrom shinyWidgets actionGroupButtons -open_project_ui <- function(id, label = "New project") { - ns <- NS(id) - tagList( - actionGroupButtons( - inputIds = c(ns("new_project"), - ns("existing_project")), - labels = list( - tags$span(icon("plus"), "New project"), - tags$span(icon("folder-open"), "Existing project") - ), - fullwidth = TRUE - ) - ) -} - -#' Open project module server -#' @keywords internal -#' @author Ghislain Durif -open_project_server <- function(input, output, session) { - # init output - out <- reactiveValues(new = NULL, existing = NULL) - # react to new project request - observeEvent(input$new_project, { - out$new <- ifelse(is.null(out$new), 0, out$new) + 1 - }) - # react to open existing project request - observeEvent(input$existing_project, { - out$existing <- ifelse(is.null(out$existing), 0, out$existing) + 1 - }) - # output - return(out) -} From e2cd1de0282222249028df5d2496c9c2f8e1caa6 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 15:41:47 +0100 Subject: [PATCH 03/82] rename home page ui and server --- R-pkg/R/51_home_page_module.R | 4 ++-- R-pkg/R/app_index.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R-pkg/R/51_home_page_module.R b/R-pkg/R/51_home_page_module.R index cc068562..b55325f4 100644 --- a/R-pkg/R/51_home_page_module.R +++ b/R-pkg/R/51_home_page_module.R @@ -1,7 +1,7 @@ #' Simplified home page ui #' @keywords internal #' @author Ghislain Durif -simplified_home_page_ui <- function(id) { +home_page_ui <- function(id) { ns <- NS(id) tagList( fluidRow( @@ -98,7 +98,7 @@ simplified_home_page_ui <- function(id) { #' Simlified home page server #' @keywords internal #' @author Ghislain Durif -simplified_home_page_server <- function(input, output, session) { +home_page_server <- function(input, output, session) { # init output out <- reactiveValues( new_analysis_project = NULL, diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 149c627c..014e5b2c 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -39,7 +39,7 @@ app_body <- function() { tabItems( tabItem( tabName = "home_tab", - simplified_home_page_ui("home_page") + home_page_ui("home_page") ), tabItem( tabName = "analysis_tab", @@ -75,7 +75,7 @@ app_body <- function() { index_server <- function(input, output, session) { # home page - home_page <- callModule(simplified_home_page_server, "home_page") + home_page <- callModule(home_page_server, "home_page") ## new analysis project observeEvent(home_page$new_analysis_project, { From 7ab8d8d7403e29d2681a2102b1cff9421f8ab623 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 15:47:18 +0100 Subject: [PATCH 04/82] disable analysis and data generations modules, ready to refactor the app using global environment --- R-pkg/R/app_index.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 014e5b2c..642832c5 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -43,11 +43,11 @@ app_body <- function() { ), tabItem( tabName = "analysis_tab", - analysis_page_ui("analysis_page") + # analysis_page_ui("analysis_page") ), tabItem( tabName = "datagen_tab", - datagen_page_ui("datagen_page") + # datagen_page_ui("datagen_page") ), tabItem( tabName = "pref_tab", @@ -118,7 +118,7 @@ index_server <- function(input, output, session) { }) ## analysis page - analysis_page <- callModule(analysis_page_server, "analysis_page") + # analysis_page <- callModule(analysis_page_server, "analysis_page") # # reset # observeEvent(analysis_page$reset, { # req(analysis_page$reset) @@ -127,7 +127,7 @@ index_server <- function(input, output, session) { # }) ## datagen page - datagen_page <- callModule(datagen_page_server, "datagen_page") + # datagen_page <- callModule(datagen_page_server, "datagen_page") # # reset # observeEvent(datagen_page$reset, { # req(datagen_page$reset) From e92aa27fd21287d2c75cd5df15225cb55efcf66e Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 15:50:28 +0100 Subject: [PATCH 05/82] simplify file name (replace page denomination by module) --- ...sis_page_module.R => 61_analysis_module.R} | 0 ...agen_page_module.R => 91_datagen_module.R} | 0 R-pkg/R/app_index.R | 20 ++++--------------- 3 files changed, 4 insertions(+), 16 deletions(-) rename R-pkg/R/{61_analysis_page_module.R => 61_analysis_module.R} (100%) rename R-pkg/R/{91_datagen_page_module.R => 91_datagen_module.R} (100%) diff --git a/R-pkg/R/61_analysis_page_module.R b/R-pkg/R/61_analysis_module.R similarity index 100% rename from R-pkg/R/61_analysis_page_module.R rename to R-pkg/R/61_analysis_module.R diff --git a/R-pkg/R/91_datagen_page_module.R b/R-pkg/R/91_datagen_module.R similarity index 100% rename from R-pkg/R/91_datagen_page_module.R rename to R-pkg/R/91_datagen_module.R diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 642832c5..79bb241e 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -43,11 +43,11 @@ app_body <- function() { ), tabItem( tabName = "analysis_tab", - # analysis_page_ui("analysis_page") + # analysis_module_ui("analysis_module") ), tabItem( tabName = "datagen_tab", - # datagen_page_ui("datagen_page") + # datagen_module_ui("datagen_module") ), tabItem( tabName = "pref_tab", @@ -118,22 +118,10 @@ index_server <- function(input, output, session) { }) ## analysis page - # analysis_page <- callModule(analysis_page_server, "analysis_page") - # # reset - # observeEvent(analysis_page$reset, { - # req(analysis_page$reset) - # session$reload() - # updateTabItems(session, "app_menu", selected = "analysis_tab") - # }) + # analysis_module <- callModule(analysis_module_server, "analysis_module") ## datagen page - # datagen_page <- callModule(datagen_page_server, "datagen_page") - # # reset - # observeEvent(datagen_page$reset, { - # req(datagen_page$reset) - # session$reload() - # updateTabItems(session, "app_menu", selected = "datagen_tab") - # }) + # datagen_module <- callModule(datagen_module_server, "datagen_module") ## preferences callModule(pref_page_server, "pref_page") From 27631f39f6e6ed062dd3fa61eb3c42430493c620 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 16:05:46 +0100 Subject: [PATCH 06/82] use proj_name and proj_dir in environment --- R-pkg/R/04_environment.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index bf8da83b..f2b052a9 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -18,8 +18,8 @@ init_diyabcrf_env <- function() { # clean environment tmp <- reactiveValues( ## project setup - name = NULL, # project name - dir = NULL, # project directory + proj_name = NULL, # project name + proj_dir = NULL, # project directory locus_type = NULL, # "SNP" or "MSS" seq_mode = NULL, # "IndSeq" or "PoolSeq" ## observed data @@ -81,7 +81,7 @@ init_diyabcrf_env <- function() { ) # init project directory - tmp$dir <- mk_proj_dir("diyabc_rf") + tmp$proj_dir <- mk_proj_dir("diyabc_rf") # init env assign("ap", tmp, env) @@ -95,8 +95,8 @@ init_datagen_env <- function() { # clean environment tmp <- reactiveValues( ## project setup - name = NULL, # project name - dir = NULL, # project directory + proj_name = NULL, # project name + proj_dir = NULL, # project directory locus_type = NULL, # "SNP" or "MSS" seq_mode = NULL, # "IndSeq" or "PoolSeq" ## data description @@ -111,7 +111,7 @@ init_datagen_env <- function() { ) # init project directory - tmp$dir <- mk_proj_dir("diyabc_datagen") + tmp$proj_dir <- mk_proj_dir("diyabc_datagen") # init env assign("dp", tmp, env) From 12234d46dd0d314d87e0419628fdc7dc337453aa Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 16:23:52 +0100 Subject: [PATCH 07/82] check if reactive values before using reactive values to list function --- R-pkg/R/04_environment.R | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index f2b052a9..a7ae20cc 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -121,7 +121,11 @@ init_datagen_env <- function() { #' @keywords internal #' @author Ghislain Durif debug_ap <- function() { - pprint(reactiveValuesToList(env$ap)) + if(is.reactivevalues(env$ap)) { + pprint(reactiveValuesToList(env$ap)) + } else { + pprint(env$ap) + } } #' Print content of metadata inside diyabc-rf project sub-environment @@ -129,7 +133,11 @@ debug_ap <- function() { #' @keywords internal #' @author Ghislain Durif debug_ap_metadata <- function() { - pprint(reactiveValuesToList(env$ap$metadata)) + if(is.reactivevalues(env$ap$metadata)) { + pprint(reactiveValuesToList(env$ap$metadata)) + } else { + pprint(env$ap$metadata) + } } #' Print content of training set simulation setup inside diyabc-rf project @@ -137,7 +145,11 @@ debug_ap_metadata <- function() { #' @keywords internal #' @author Ghislain Durif debug_ap_ts <- function() { - pprint(reactiveValuesToList(env$ap$ts)) + if(is.reactivevalues(env$ap)) { + pprint(reactiveValuesToList(env$ap$ts)) + } else { + pprint(env$ap$ts) + } } #' Print content of random forest setup inside diyabc-rf project @@ -145,7 +157,11 @@ debug_ap_ts <- function() { #' @keywords internal #' @author Ghislain Durif debug_ap_rf <- function() { - pprint(reactiveValuesToList(env$ap$rf)) + if(is.reactivevalues(env$ap$rf)) { + pprint(reactiveValuesToList(env$ap$rf)) + } else { + pprint(env$ap$rf) + } } #' Print content of data generation project sub-environment for debugging @@ -153,7 +169,11 @@ debug_ap_rf <- function() { #' @keywords internal #' @author Ghislain Durif debug_dp <- function() { - pprint(reactiveValuesToList(env$dp)) + if(is.reactivevalues(env$ap)) { + pprint(reactiveValuesToList(env$dp)) + } else { + pprint(env$dp) + } } From 1c481f6b6e11f51c012b2210f47bf9dbab6813ad Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 18:35:30 +0100 Subject: [PATCH 08/82] use variables from global environment --- R-pkg/R/32_project_admin.R | 80 ++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 43 deletions(-) diff --git a/R-pkg/R/32_project_admin.R b/R-pkg/R/32_project_admin.R index 85d03247..eb5b049c 100644 --- a/R-pkg/R/32_project_admin.R +++ b/R-pkg/R/32_project_admin.R @@ -37,55 +37,49 @@ proj_admin_ui <- function(id) { #' Project administration server #' @keywords internal #' @author Ghislain Durif -proj_admin_server <- function(input, output, session, - proj_dir = reactive({NULL}), - proj_name = reactive({NULL})) { - # init local - local <- reactiveValues( - proj_dir = NULL, - proj_name = NULL - ) - # get input - observe({ - local$proj_dir <- proj_dir() - local$proj_name <- proj_name() - }) +#' @param tag character string, type of project identified by `"ap"` +#' (for diyabc-rf analysis project) or `"dp"` (data generation project). +proj_admin_server <- function(input, output, session, tag = NULL) { + # init output out <- reactiveValues(reset = NULL) - # save - output$save <- downloadHandler( - filename = function() { - file_name <- "project_name.zip" - if(!is.null(local$proj_name)) { - if(str_length(local$proj_name) > 0) { - file_name <- str_c(local$proj_name, ".zip") + + # check input + if(isTruthy(tag)) { + + # save + output$save <- downloadHandler( + filename = function() { + file_name <- "project_name.zip" + if(isTruthy(env[[tag]]$proj_name)) { + file_name <- str_c(env[[tag]]$proj_name, ".zip") } + return(file_name) + }, + content = function(file) { + req(env[[tag]]$proj_dir) + wd <- getwd() + on.exit(setwd(wd)) + setwd(env[[tag]]$proj_dir) + cleanup_diyabc_run(env[[tag]]$proj_dir) + cleanup_abcranger_run(env[[tag]]$proj_dir) + zip::zip(file, list.files(env[[tag]]$proj_dir)) } - return(file_name) - }, - content = function(file) { - wd <- getwd() - on.exit(setwd(wd)) - setwd(local$proj_dir) - cleanup_diyabc_run(local$proj_dir) - cleanup_abcranger_run(local$proj_dir) - zip::zip(file, list.files(local$proj_dir)) - } - ) - - ## reset - observeEvent(input$reset, { - ask_confirmation( - inputId = "reset_confirmation", - title = "Want to confirm ?" ) - }) - observeEvent(input$reset_confirmation, { - req(!is.null(input$reset_confirmation)) - if(isTRUE(input$reset_confirmation)) { + + ## reset + observeEvent(input$reset, { + ask_confirmation( + inputId = "reset_confirmation", + title = "Want to confirm ?" + ) + }) + + observeEvent(input$reset_confirmation, { + req(input$reset_confirmation) out$reset <- ifelse(!is.null(out$reset), out$reset, 0) + 1 - } - }) + }) + } ## output return(out) From e69a4e999689f1a4f8a1e6011d99697bc93f8903 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 3 Mar 2021 18:37:17 +0100 Subject: [PATCH 09/82] WIP: refactoring analysis module (TODO: setup, training set simulation and random forest analysis) --- R-pkg/R/61_analysis_module.R | 91 ++++-------------------------------- R-pkg/R/app_index.R | 4 +- 2 files changed, 11 insertions(+), 84 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 75d86183..ea3a9165 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -1,7 +1,7 @@ -#' Analysis page ui +#' Analysis module ui #' @keywords internal #' @author Ghislain Durif -analysis_page_ui <- function(id) { +analysis_module_ui <- function(id) { ns <- NS(id) tagList( tags$style(HTML(".box-header{text-align: center;}")), @@ -38,101 +38,28 @@ analysis_page_ui <- function(id) { ) } -#' Analysis page server +#' Analysis module server #' @keywords internal #' @author Ghislain Durif -analysis_page_server <- function(input, output, session) { - # namespace - ns <- session$ns - # init local - local <- reactiveValues( - locus_type = NULL, - seq_mode = NULL, - new_proj = NULL, - proj_dir = NULL, - proj_header = NULL, - scenario_list = NULL - ) - # init output - out <- reactiveValues( - setting = NULL, - scenario = NULL, - reset = NULL - ) +analysis_module_server <- function(input, output, session) { + ## project setting proj_set <- callModule(analysis_proj_set_server, "proj_set") - # # output: - # data_file = NULL, - # data_info = NULL, - # locus_type = NULL, - # seq_mode = NULL, - # new_proj = NULL, - # proj_dir = mk_proj_dir(), - # proj_file_list = character(0), - # proj_header_content = list(), - # proj_name = NULL, - # valid_data_file = FALSE - # valid_proj = FALSEvalid_proj = FALSE - - # # debugging - # observe({ - # pprint("#### Project settings ####") - # pprint(reactiveValuesToList(proj_set)) - # }) - - - # FIXME - # update local - observe({ - # FIXME obsolete - local$proj_dir <- proj_set$proj_dir - local$proj_name <- proj_set$proj_name - }) ## Training set sub-module - training_set <- callModule( - training_set_server, "train_set", - data_file = reactive(proj_set$data_file), - data_info = reactive(proj_set$data_info), - locus_type = reactive(proj_set$locus_type), - seq_mode = reactive(proj_set$seq_mode), - new_proj = reactive(proj_set$new_proj), - proj_dir = reactive(proj_set$proj_dir), - proj_file_list = reactive({NULL}), - valid_proj = reactive(proj_set$valid_proj) - ) + # training_set <- callModule(training_set_server, "train_set") - # # debugging - # observe({ - # pprint("training set valid proj") - # pprint(training_set$valid_proj) - # }) - - ## random forest module - rf <- callModule( - rf_module_server, "rf", - locus_type = reactive(proj_set$locus_type), - proj_dir = reactive(proj_set$proj_dir), - # proj_file_list = reactive(proj_set$proj_file_list), - valid_proj = reactive(proj_set$valid_proj) - ) + ## random forest sub-module + # rf <- callModule(rf_module_server, "rf") ## action - proj_admin <- callModule( - proj_admin_server, "proj_admin", - proj_dir = reactive(proj_set$proj_dir), - proj_name = reactive(proj_set$proj_name) - ) + proj_admin <- callModule(proj_admin_server, "proj_admin", tag = "ap") ## reset observeEvent(proj_admin$reset, { req(proj_admin$reset) - out$reset <- proj_admin$reset session$reload() }) - - # output - return(out) } #' Analysis project setting ui diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 79bb241e..50d8ed83 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -43,7 +43,7 @@ app_body <- function() { ), tabItem( tabName = "analysis_tab", - # analysis_module_ui("analysis_module") + analysis_module_ui("analysis_module") ), tabItem( tabName = "datagen_tab", @@ -118,7 +118,7 @@ index_server <- function(input, output, session) { }) ## analysis page - # analysis_module <- callModule(analysis_module_server, "analysis_module") + analysis_module <- callModule(analysis_module_server, "analysis_module") ## datagen page # datagen_module <- callModule(datagen_module_server, "datagen_module") From 4e45426aa8dff81d9cddf5512f24f5de212cb703 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 14:21:50 +0100 Subject: [PATCH 10/82] use shinyFeedback package --- R-pkg/DESCRIPTION | 2 +- R-pkg/NAMESPACE | 2 ++ R-pkg/R/diyabcGUI-package.R | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R-pkg/DESCRIPTION b/R-pkg/DESCRIPTION index 7d1f1529..25db74f9 100644 --- a/R-pkg/DESCRIPTION +++ b/R-pkg/DESCRIPTION @@ -63,7 +63,7 @@ Imports: rlang, shiny, shinybusy (>= 0.2.2), - shinydashboard, shinyFiles, + shinydashboard, shinyFeedback, shinyFiles, shinyhelper, shinyjs, shinyWidgets, stringr, tibble, diff --git a/R-pkg/NAMESPACE b/R-pkg/NAMESPACE index f61d490f..e879737c 100644 --- a/R-pkg/NAMESPACE +++ b/R-pkg/NAMESPACE @@ -59,6 +59,8 @@ importFrom(parallel,stopCluster) importFrom(pbapply,pblapply) importFrom(processx,process) importFrom(rlang,duplicate) +importFrom(shinyFeedback,feedbackWarning) +importFrom(shinyFeedback,useShinyFeedback) importFrom(shinyWidgets,actionBttn) importFrom(shinyWidgets,actionGroupButtons) importFrom(shinyWidgets,ask_confirmation) diff --git a/R-pkg/R/diyabcGUI-package.R b/R-pkg/R/diyabcGUI-package.R index 8c584dd6..7c6f7018 100644 --- a/R-pkg/R/diyabcGUI-package.R +++ b/R-pkg/R/diyabcGUI-package.R @@ -19,6 +19,7 @@ #' @import shiny #' @importFrom shinybusy add_busy_spinner #' @importFrom shinydashboard box updateTabItems +#' @importFrom shinyFeedback feedbackWarning #' @importFrom shinyhelper helper #' @importFrom shinyjs disable enable hidden hide reset show #' @importFrom shinyWidgets actionBttn actionGroupButtons ask_confirmation progressBar From c6a6d8b85d3f717604f52838b444ac885f283d8f Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 14:29:42 +0100 Subject: [PATCH 11/82] rewrite global environment and rectiveValues in global environment definition and initialization --- R-pkg/R/04_environment.R | 203 ++++++++++++++++++++++++--------------- 1 file changed, 128 insertions(+), 75 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index a7ae20cc..3ce01a0f 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -6,85 +6,111 @@ #' (diyabc-rf analysis and data generation) inside `diyabcGUI` global #' environment. init_diyabc_env <- function() { - assign("ap", NULL, env) # diyabc-rf analysis project - assign("dp", NULL, env) # data generation project + # diyabc-rf analysis project + assign("ap", reactiveValues(), env) + + # diyabc-rf dataset metadata + assign("md", reactiveValues(), env) + + # diyabc-rf training set simulation sub-module + assign("ts", reactiveValues(), env) + + # diyabc-rf random forest sub-module + assign("rf", reactiveValues(), env) + + # data generation project + assign("dp", reactiveValues(), env) } #' Initialize environment for DIYABC-RF pipeline #' @keywords internal #' @author Ghislain Durif init_diyabcrf_env <- function() { + ## environment attributes - # clean environment - tmp <- reactiveValues( + # analysis project + tmp_ap <- list( ## project setup - proj_name = NULL, # project name - proj_dir = NULL, # project directory + proj_name = NULL, # project name + proj_dir = NULL, # project directory locus_type = NULL, # "SNP" or "MSS" seq_mode = NULL, # "IndSeq" or "PoolSeq" ## observed data - data_file = NULL, # observed data file name - metadata = reactiveValues( - # number of loci in the data file - n_loci = NULL, - # table of locus description: name, type, number - locus_des = NULL - ), - ## training set simulation - ts = reactiveValues( - # list of historical models - hist_model = NULL, - # list number of parameters per model - n_param = NULL, - # list of model priors (discrete probabilities) - model_prior = NULL, - # table of historical model parameters (name, type, priors) - param = NULL, - # list of conditions on historical parameters - cond = NULL, - # table of loci description - loci_des = NULL, - # number of loci group - n_group = NULL, - # list of group priors for MSS data - mss_prior = NULL, - # specific ref table column names for MSS data - mss_reftab_colname = NULL - ), - ## random forest analysis - rf = reactiveValues( - # analysis mode: "param_estim" or "model_choice" - mode = NULL, - # number of samples - n_ref = NULL, - # minimal node size - min_node_size = NULL, - # number of tree - n_tree = NULL, - # number of noise columns - noise_columns = NULL, - # boolean: if TRUE, disable LDA for model choice or PLS for - # parameter estimation - no_linear = NULL, - # percentage of maximum explained Y-variance for retaining pls axis - pls_max_var = NULL, - # Chosen scenario (mandatory for parameter estimation) - chosen_scenario = NULL, - # number of oob testing samples (mandatory for parameter estimation) - noob = NULL, - # name of the parameter of interest (mandatory for parameter - # estimation) - parameter = NULL, - # subset and/or groups of models - groups = NULL - ) + data_file = NULL # observed data file name + ) + + # dataset metadata + tmp_md <- list( + # number of loci in the data file + n_loci = NULL, + # table of locus description: name, type, number + locus_des = NULL ) - # init project directory - tmp$proj_dir <- mk_proj_dir("diyabc_rf") + # training set simulation + tmp_ts <-list( + # list of historical models + hist_model = NULL, + # list number of parameters per model + n_param = NULL, + # list of model priors (discrete probabilities) + model_prior = NULL, + # table of historical model parameters (name, type, priors) + param = NULL, + # list of conditions on historical parameters + cond = NULL, + # table of loci description + loci_des = NULL, + # number of loci group + n_group = NULL, + # list of group priors for MSS data + mss_prior = NULL, + # specific ref table column names for MSS data + mss_reftab_colname = NULL + ) + + # random forest analysis + tmp_rf <- list( + # analysis mode: "param_estim" or "model_choice" + mode = NULL, + # number of samples + n_ref = NULL, + # minimal node size + min_node_size = NULL, + # number of tree + n_tree = NULL, + # number of noise columns + noise_columns = NULL, + # boolean: if TRUE, disable LDA for model choice or PLS for + # parameter estimation + no_linear = NULL, + # percentage of maximum explained Y-variance for retaining pls axis + pls_max_var = NULL, + # Chosen scenario (mandatory for parameter estimation) + chosen_scenario = NULL, + # number of oob testing samples (mandatory for parameter estimation) + noob = NULL, + # name of the parameter of interest (mandatory for parameter + # estimation) + parameter = NULL, + # subset and/or groups of models + groups = NULL + ) - # init env - assign("ap", tmp, env) + + ## clean up and define environment + list2reactiveValues(tmp_ap, ap) + list2reactiveValues(tmp_md, md) + list2reactiveValues(tmp_ts, ts) + list2reactiveValues(tmp_rf, rf) +} + +#' Reset environment for DIYABC-RF pipeline +#' @keywords internal +#' @author Ghislain Durif +reset_diyabcrf_env <- function() { + # proj dir + env$ap$proj_dir <- mk_proj_dir("diyabc_rf") } #' Initialize environment for data generation pipeline @@ -93,12 +119,12 @@ init_diyabcrf_env <- function() { init_datagen_env <- function() { # clean environment - tmp <- reactiveValues( + tmp_dp <- reactiveValues( ## project setup - proj_name = NULL, # project name - proj_dir = NULL, # project directory - locus_type = NULL, # "SNP" or "MSS" - seq_mode = NULL, # "IndSeq" or "PoolSeq" + proj_name = NULL, # project name + proj_dir = NULL, # project directory + locus_type = NULL, # "SNP" or "MSS" + seq_mode = NULL, # "IndSeq" or "PoolSeq" ## data description model = NULL, # historical model param = NULL, # list of parameter values @@ -110,13 +136,19 @@ init_datagen_env <- function() { sex_ratio = NULL # sex ratio in the simulation ) - # init project directory - tmp$proj_dir <- mk_proj_dir("diyabc_datagen") - - # init env - assign("dp", tmp, env) + ## clean up and define environment + list2reactiveValues(tmp_dp, dp) +} + +#' Reset environment for data generation pipeline +#' @keywords internal +#' @author Ghislain Durif +reset_datagen_env <- function() { + # proj dir + env$dp$proj_dir <- mk_proj_dir("diyabc_datagen") } + #' Print content of diyabc-rf project sub-environment for debugging purpose #' @keywords internal #' @author Ghislain Durif @@ -176,6 +208,27 @@ debug_dp <- function() { } } +#' Fill reactiveValues from package global environment with named list elements +#' @keywords internal +#' @author Ghislain Durif +#' @param named_list list of named elements to fill the reactiveValues `rval` +#' input arguments. +#' @param tag `reactiveValues` variable from package global environment to be +#' filled with named elements from `named_list` input arguments. +list2reactiveValues <- function(named_list, tag) { + # check + if(length(names(named_list)) != length(named_list)) { + stop("'named_list' input arg should be a named list.") + } + # elements names + name_vec <- names(named_list) + # fill + for(ind in 1:length(named_list)) { + env[[as.character(substitute(tag))]][[name_vec[ind]]] <<- + named_list[[ind]] + } +} + #' Get value from named list in diyabc local environment #' @keywords internal From e2eaa88adb5f6eee3604ba9c5c87dab78fa77284 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 14:31:26 +0100 Subject: [PATCH 12/82] rewrite project name module with global environment update --- R-pkg/R/31_project_utils.R | 106 ++++++++++--------------------------- 1 file changed, 28 insertions(+), 78 deletions(-) diff --git a/R-pkg/R/31_project_utils.R b/R-pkg/R/31_project_utils.R index 493173c8..1ce01a80 100644 --- a/R-pkg/R/31_project_utils.R +++ b/R-pkg/R/31_project_utils.R @@ -1,7 +1,7 @@ #' Project naming module ui #' @keywords internal #' @author Ghislain Durif -proj_name_ui <- function(id, label = "Project name", default = NULL) { +proj_name_ui <- function(id, label = "Project name") { ns <- NS(id) tagList( h3(label), @@ -11,113 +11,63 @@ proj_name_ui <- function(id, label = "Project name", default = NULL) { textInput( ns("proj_name"), label = NULL, - value = ifelse( - !is.null(default), - default, - "" - ), placeholder = "project name" ) ), column( width = 3, actionButton( - ns("validate_proj_name"), + ns("validate"), label = "Validate", icon = icon("check"), width = '100%' ) ) ), - uiOutput(ns("feedback_proj_name")) + uiOutput(ns("feedback")) ) } #' Project naming module server #' @keywords internal #' @author Ghislain Durif -proj_name_server <- function(input, output, session) { +#' @param tag character string, type of project identified by `"ap"` +#' (for diyabc-rf analysis project) or `"dp"` (data generation project). +proj_name_server <- function(input, output, session, tag = "ap") { # init local - local <- reactiveValues( - modified_proj_name = FALSE, - valid_proj_name = FALSE - ) - - # init output - out <- reactiveValues(proj_name = NULL, valid_proj_name = FALSE) + local <- reactiveValues(modified = TRUE) # new input observeEvent(input$proj_name, { - shinyjs::enable("validate_proj_name") - local$modified_proj_name <- TRUE - - # check project name - local$valid_proj_name <- FALSE - if(!is.null(input$proj_name)) { - if(str_length(input$proj_name) > 0) { - local$valid_proj_name <- TRUE - } - } + shinyjs::enable("validate") + local$modified <- TRUE }) # validate input - observeEvent(input$validate_proj_name, { - req(input$validate_proj_name) - - local$modified_proj_name <- FALSE - - out$proj_name <- input$proj_name - - out$valid_proj_name <- local$valid_proj_name - - shinyjs::disable("validate_proj_name") - }) - - # valid proj name ? - observeEvent(local$valid_proj_name, { - req(!is.null(local$valid_proj_name)) + observeEvent(input$validate, { + req(input$validate) - if(!local$valid_proj_name) { - shinyjs::enable("validate_proj_name") + local$modified <- FALSE + + # check project name + if(isTruthy(input$proj_name)) { + shinyjs::disable("validate") + env[[tag]]$proj_name <<- input$proj_name } }) + # # debugging + # observe({ + # logging("proj name:", env[[tag]]$proj_name) + # }) + # feedback on project name - output$feedback_proj_name <- renderUI({ - feedback<- tagList() - - # check if modified proj name - if(!is.null(local$modified_proj_name)) { - if(local$modified_proj_name) { - feedback <- tagAppendChild( - feedback, - helpText( - icon("warning"), - "Project name is not validated." - ) - ) - } - } - - # check proj name - if(!is.null(local$valid_proj_name)) { - if(!local$valid_proj_name) { - feedback <- tagAppendChild( - feedback, - helpText( - icon("warning"), "Project name is missing." - ) - ) - } - } else { - - } - - # feedback - feedback + observe({ + feedbackWarning( + "proj_name", + local$modified || !isTruthy(input$proj_name), + "Project name is missing or not validated." + ) }) - - # output - return(out) } From 9c7fca96b393aa1343c644842022e1c9ac463fd3 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 14:39:52 +0100 Subject: [PATCH 13/82] use shinyFeedback + reset project environment when starting new project --- R-pkg/R/app_index.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 50d8ed83..48c9cf42 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -31,10 +31,12 @@ app_sidebar <- function() { #' @keywords internal #' @author Ghislain Durif #' @importFrom shinydashboard dashboardBody tabItems tabItem +#' @importFrom shinyFeedback useShinyFeedback #' @importFrom shinyjs useShinyjs app_body <- function() { dashboardBody( useShinyjs(), + useShinyFeedback(), add_busy_spinner(spin = "fading-circle", margins = c(0, 10)), tabItems( tabItem( @@ -95,6 +97,7 @@ index_server <- function(input, output, session) { updateTabItems(session, "app_menu", selected = "analysis_tab") # init env init_diyabcrf_env() + reset_diyabcrf_env() }) ## new data generation project @@ -115,6 +118,7 @@ index_server <- function(input, output, session) { updateTabItems(session, "app_menu", selected = "datagen_tab") # init env init_datagen_env() + reset_datagen_env() }) ## analysis page From 304f1ea786f5d092c24fc44a64541afb71cc1b1e Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 14:42:58 +0100 Subject: [PATCH 14/82] WIP: refactoring analysis module, DONE: using new version of project name module --- R-pkg/R/61_analysis_module.R | 629 ++++++++++++++++++----------------- 1 file changed, 315 insertions(+), 314 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index ea3a9165..0d72abf3 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -211,321 +211,322 @@ analysis_proj_set_server <- function(input, output, session) { }) ## project name - proj_name_setup <- callModule(proj_name_server, "proj_name_setup") - - observeEvent(proj_name_setup$proj_name, { - req(proj_name_setup$proj_name) - out$proj_name <- proj_name_setup$proj_name - }) - - observeEvent(proj_name_setup$valid_proj_name, { - req(!is.null(proj_name_setup$valid_proj_name)) - local$valid_proj_name <- proj_name_setup$valid_proj_name - }) - - ## data type - data_type <- callModule(data_type_server, "data_type") - observe({ - req(data_type$locus_type) - req(data_type$seq_mode) - out$locus_type <- data_type$locus_type - out$seq_mode <- data_type$seq_mode - }) - - ## new or existing project - observe({ - req(input$proj_type) - if(input$proj_type == "new") { - out$new_proj <- TRUE - } else if(input$proj_type == "existing") { - out$new_proj <- FALSE - req(!is.null(local$local$proj_file_list)) - if("headerRF.txt" %in% local$proj_file_list) { - out$new_proj <- FALSE - } else { - out$new_proj <- TRUE - } - } else if(input$proj_type == "example") { - out$new_proj <- FALSE - } - }) - - ## Manage existing project - possible_files <- c("headerRF.txt", "reftableRF.bin", "statobsRF.txt") - # copy uploaded files to project working directory (server-side) - observeEvent(input$file_input, { - ## user file input - req(input$file_input) - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - req(is.data.frame(input$file_input)) - req(nrow(input$file_input) > 0) - - ## extraction - new_file_input <- input$file_input - - tmp_proj_check <- existing_proj_file_check( - new_file_input, possible_files, out$proj_dir, local$file_input - ) - - local$file_input <- tmp_proj_check$file_input - local$existing_proj_zip <- tmp_proj_check$existing_proj_zip - }) - - # print possible files when uploading existing projects - output$file_check <- renderUI({ - helpText( - icon("comment"), "Project-related files check", - tags$p( - tags$div( - style = "column-count:2;", - do.call( - tags$ul, - lapply( - possible_files, - function(item) tags$li( - if(item %in% local$file_input$name) { - ind <- which(item == local$file_input$name) - if(local$file_input$valid[ind]) { - tags$div( - tags$code(item), - icon("check") - ) - } else { - tags$div( - tags$code(item), - icon("times") - ) - } - } else { - tags$code(item) - } - ) - ) - ) - ) - ) - ) - }) - - ## Manage example project - # update possible input - observe({ - req(!is.null(data_type$locus_type)) - req(!is.null(data_type$seq_mode)) - - if(data_type$locus_type == "mss") { - updateSelectInput( - session, - "proj_example", - choices = c("", "Not available at the moment"), - selected = NULL - ) - } else if(data_type$locus_type == "snp" & - data_type$seq_mode == "indseq") { - possible_choices <- basename( - list.dirs( - example_dir() - ) - ) - possible_choices <- possible_choices[str_detect(possible_choices, - "IndSeq")] - updateSelectInput( - session, - "proj_example", - choices = c("", possible_choices), - selected = NULL - ) - } else if(data_type$locus_type == "snp" & - data_type$seq_mode == "poolseq") { - possible_choices <- basename( - list.dirs( - example_dir() - ) - ) - possible_choices <- possible_choices[str_detect(possible_choices, - "PoolSeq")] - updateSelectInput( - session, - "proj_example", - choices = c("", possible_choices), - selected = NULL - ) - } - }) - # copy files if required - observeEvent(input$proj_example, { - - req(input$proj_type == "example") - req(input$proj_example) - - # copy files - proj_files <- list.files( - file.path( - example_dir(), - input$proj_example - ) - ) - fs::file_copy( - path = file.path( - example_dir(), - input$proj_example, - proj_files - ), - new_path = file.path( - out$proj_dir, - proj_files - ), - overwrite = TRUE - ) - - # update file input - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - local$file_input <- data.frame( - name = proj_files, - size = rep(NA, length(proj_files)), - type = rep(NA, length(proj_files)), - datapath = file.path( - out$proj_dir, - proj_files - ), - valid = rep(TRUE, length(proj_files)) - ) - - ## file type - ind <- which(local$file_input$name == "headerRF.txt") - local$file_input$type[ind] <- "text/plain" - ind <- which(local$file_input$name == "reftableRF.bin") - local$file_input$type[ind] <- "application/octet-stream" - ind <- which(local$file_input$name == "statObsRF.txt") - local$file_input$type[ind] <- "text/plain" - }) - - ## check current project header file - observeEvent(local$file_input, { - - req(is.data.frame(local$file_input)) - req(nrow(local$file_input) > 0) - req(!is.null(input$proj_type)) - - # # debugging - # pprint("file input") - # pprint(local$file_input) - - ## header check - if("headerRF.txt" %in% local$file_input$name) { - ind <- which(local$file_input$name == "headerRF.txt") - header_file_content <- parse_diyabc_header( - file_name = local$file_input$datapath[ind], - file_type = local$file_input$type[ind], - locus_type = data_type$locus_type - ) - # valid header file - local$file_input$valid[ind] <- header_file_content$valid - # header data file name - local$header_data_file <- header_file_content$data_file - # header data file content - out$proj_header_content <- header_file_content - # data from example ? - if(input$proj_type == "example") { - local$data_file <- header_file_content$data_file - } - } - - # # debugging - # pprint("file input") - # pprint(local$file_input) - - ## delete non valid files - lapply( - split(local$file_input, seq(nrow(local$file_input))), - function(item) { - if(!item$valid) { - if(file.exists(item$datapath)) { - logging("deleting:", item$datapath) - fs::file_delete(item$datapath) - } - } - } - ) - - ## project file list - out$proj_file_list <- local$file_input$name[local$file_input$valid] - - # # debugging - # pprint("file_input") - # pprint(local$file_input) - }) - - ## Data file file - data_file <- callModule( - input_data_server, "input_data_file", - proj_dir = reactive(out$proj_dir), - existing_proj_zip = reactive(local$existing_proj_zip) - ) - - # update local if data file upload - observe({ - req(!is.null(data_file$name)) - local$data_file <- data_file$name - }) - - # data file extracted from existing project zip file - observe({ - req(!is.null(local$existing_proj_zip)) - req(!is.null(local$header_data_file)) - - if(local$existing_proj_zip) { - local$data_file <- local$header_data_file - } - }) - - ## Data file check - check_data <- callModule( - check_data_server, "check_data_file", - data_file = reactive(local$data_file), - expected_data_file = reactive(local$header_data_file), - locus_type = reactive(out$locus_type), - seq_mode = reactive(out$seq_mode), - proj_dir = reactive(out$proj_dir) - ) - - # update output - observe({ - out$data_file <- data_file$name - out$data_info <- check_data$info - out$valid_data_file <- check_data$valid - }) - - # valid set up ? - observe({ - - req(!is.null(out$valid_data_file)) - req(!is.null(local$valid_proj_name)) - - # check header if required - valid_header <- TRUE - if(!is.null(out$proj_header_content$valid)) { - valid_header <- out$proj_header_content$valid - } - - out$valid_proj <- local$valid_proj_name & valid_header & out$valid_data_file - - # # debugging - # logging("valid proj?", out$valid_proj) - }) - - output$proj_is_ready <- renderUI({ - if(!(out$valid_proj & out$valid_data_file)) { - h3(icon("warning"), "Project set up is not ready.", - style="color:red;text-align:center;") - } else { - h4(icon("check"), "Project set up is ok.", - style="text-align:center;") - } - }) + proj_name_setup <- callModule(proj_name_server, "proj_name_setup", + tag = "ap") + # observeEvent(proj_name_setup$proj_name, { + # req(proj_name_setup$proj_name) + # out$proj_name <- proj_name_setup$proj_name + # }) + # + # observeEvent(proj_name_setup$valid_proj_name, { + # req(!is.null(proj_name_setup$valid_proj_name)) + # local$valid_proj_name <- proj_name_setup$valid_proj_name + # }) + # + # ## data type + # data_type <- callModule(data_type_server, "data_type") + # observe({ + # req(data_type$locus_type) + # req(data_type$seq_mode) + # out$locus_type <- data_type$locus_type + # out$seq_mode <- data_type$seq_mode + # }) + # + # ## new or existing project + # observe({ + # req(input$proj_type) + # if(input$proj_type == "new") { + # out$new_proj <- TRUE + # } else if(input$proj_type == "existing") { + # out$new_proj <- FALSE + # req(!is.null(local$local$proj_file_list)) + # if("headerRF.txt" %in% local$proj_file_list) { + # out$new_proj <- FALSE + # } else { + # out$new_proj <- TRUE + # } + # } else if(input$proj_type == "example") { + # out$new_proj <- FALSE + # } + # }) + # + # ## Manage existing project + # possible_files <- c("headerRF.txt", "reftableRF.bin", "statobsRF.txt") + # # copy uploaded files to project working directory (server-side) + # observeEvent(input$file_input, { + # ## user file input + # req(input$file_input) + # # data.frame with 4 columns: + # # name (chr), size (int), type (chr), datapath (chr) + # req(is.data.frame(input$file_input)) + # req(nrow(input$file_input) > 0) + # + # ## extraction + # new_file_input <- input$file_input + # + # tmp_proj_check <- existing_proj_file_check( + # new_file_input, possible_files, out$proj_dir, local$file_input + # ) + # + # local$file_input <- tmp_proj_check$file_input + # local$existing_proj_zip <- tmp_proj_check$existing_proj_zip + # }) + # + # # print possible files when uploading existing projects + # output$file_check <- renderUI({ + # helpText( + # icon("comment"), "Project-related files check", + # tags$p( + # tags$div( + # style = "column-count:2;", + # do.call( + # tags$ul, + # lapply( + # possible_files, + # function(item) tags$li( + # if(item %in% local$file_input$name) { + # ind <- which(item == local$file_input$name) + # if(local$file_input$valid[ind]) { + # tags$div( + # tags$code(item), + # icon("check") + # ) + # } else { + # tags$div( + # tags$code(item), + # icon("times") + # ) + # } + # } else { + # tags$code(item) + # } + # ) + # ) + # ) + # ) + # ) + # ) + # }) + # + # ## Manage example project + # # update possible input + # observe({ + # req(!is.null(data_type$locus_type)) + # req(!is.null(data_type$seq_mode)) + # + # if(data_type$locus_type == "mss") { + # updateSelectInput( + # session, + # "proj_example", + # choices = c("", "Not available at the moment"), + # selected = NULL + # ) + # } else if(data_type$locus_type == "snp" & + # data_type$seq_mode == "indseq") { + # possible_choices <- basename( + # list.dirs( + # example_dir() + # ) + # ) + # possible_choices <- possible_choices[str_detect(possible_choices, + # "IndSeq")] + # updateSelectInput( + # session, + # "proj_example", + # choices = c("", possible_choices), + # selected = NULL + # ) + # } else if(data_type$locus_type == "snp" & + # data_type$seq_mode == "poolseq") { + # possible_choices <- basename( + # list.dirs( + # example_dir() + # ) + # ) + # possible_choices <- possible_choices[str_detect(possible_choices, + # "PoolSeq")] + # updateSelectInput( + # session, + # "proj_example", + # choices = c("", possible_choices), + # selected = NULL + # ) + # } + # }) + # # copy files if required + # observeEvent(input$proj_example, { + # + # req(input$proj_type == "example") + # req(input$proj_example) + # + # # copy files + # proj_files <- list.files( + # file.path( + # example_dir(), + # input$proj_example + # ) + # ) + # fs::file_copy( + # path = file.path( + # example_dir(), + # input$proj_example, + # proj_files + # ), + # new_path = file.path( + # out$proj_dir, + # proj_files + # ), + # overwrite = TRUE + # ) + # + # # update file input + # # data.frame with 4 columns: + # # name (chr), size (int), type (chr), datapath (chr) + # local$file_input <- data.frame( + # name = proj_files, + # size = rep(NA, length(proj_files)), + # type = rep(NA, length(proj_files)), + # datapath = file.path( + # out$proj_dir, + # proj_files + # ), + # valid = rep(TRUE, length(proj_files)) + # ) + # + # ## file type + # ind <- which(local$file_input$name == "headerRF.txt") + # local$file_input$type[ind] <- "text/plain" + # ind <- which(local$file_input$name == "reftableRF.bin") + # local$file_input$type[ind] <- "application/octet-stream" + # ind <- which(local$file_input$name == "statObsRF.txt") + # local$file_input$type[ind] <- "text/plain" + # }) + # + # ## check current project header file + # observeEvent(local$file_input, { + # + # req(is.data.frame(local$file_input)) + # req(nrow(local$file_input) > 0) + # req(!is.null(input$proj_type)) + # + # # # debugging + # # pprint("file input") + # # pprint(local$file_input) + # + # ## header check + # if("headerRF.txt" %in% local$file_input$name) { + # ind <- which(local$file_input$name == "headerRF.txt") + # header_file_content <- parse_diyabc_header( + # file_name = local$file_input$datapath[ind], + # file_type = local$file_input$type[ind], + # locus_type = data_type$locus_type + # ) + # # valid header file + # local$file_input$valid[ind] <- header_file_content$valid + # # header data file name + # local$header_data_file <- header_file_content$data_file + # # header data file content + # out$proj_header_content <- header_file_content + # # data from example ? + # if(input$proj_type == "example") { + # local$data_file <- header_file_content$data_file + # } + # } + # + # # # debugging + # # pprint("file input") + # # pprint(local$file_input) + # + # ## delete non valid files + # lapply( + # split(local$file_input, seq(nrow(local$file_input))), + # function(item) { + # if(!item$valid) { + # if(file.exists(item$datapath)) { + # logging("deleting:", item$datapath) + # fs::file_delete(item$datapath) + # } + # } + # } + # ) + # + # ## project file list + # out$proj_file_list <- local$file_input$name[local$file_input$valid] + # + # # # debugging + # # pprint("file_input") + # # pprint(local$file_input) + # }) + # + # ## Data file file + # data_file <- callModule( + # input_data_server, "input_data_file", + # proj_dir = reactive(out$proj_dir), + # existing_proj_zip = reactive(local$existing_proj_zip) + # ) + # + # # update local if data file upload + # observe({ + # req(!is.null(data_file$name)) + # local$data_file <- data_file$name + # }) + # + # # data file extracted from existing project zip file + # observe({ + # req(!is.null(local$existing_proj_zip)) + # req(!is.null(local$header_data_file)) + # + # if(local$existing_proj_zip) { + # local$data_file <- local$header_data_file + # } + # }) + # + # ## Data file check + # check_data <- callModule( + # check_data_server, "check_data_file", + # data_file = reactive(local$data_file), + # expected_data_file = reactive(local$header_data_file), + # locus_type = reactive(out$locus_type), + # seq_mode = reactive(out$seq_mode), + # proj_dir = reactive(out$proj_dir) + # ) + # + # # update output + # observe({ + # out$data_file <- data_file$name + # out$data_info <- check_data$info + # out$valid_data_file <- check_data$valid + # }) + # + # # valid set up ? + # observe({ + # + # req(!is.null(out$valid_data_file)) + # req(!is.null(local$valid_proj_name)) + # + # # check header if required + # valid_header <- TRUE + # if(!is.null(out$proj_header_content$valid)) { + # valid_header <- out$proj_header_content$valid + # } + # + # out$valid_proj <- local$valid_proj_name & valid_header & out$valid_data_file + # + # # # debugging + # # logging("valid proj?", out$valid_proj) + # }) + # + # output$proj_is_ready <- renderUI({ + # if(!(out$valid_proj & out$valid_data_file)) { + # h3(icon("warning"), "Project set up is not ready.", + # style="color:red;text-align:center;") + # } else { + # h4(icon("check"), "Project set up is ok.", + # style="text-align:center;") + # } + # }) + # ## output return(out) From c07e7eeeb99b370a2cc4c92eeaf913b404db4ccd Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 16:09:15 +0100 Subject: [PATCH 15/82] move help regarding project type in helper --- R-pkg/R/61_analysis_module.R | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 0d72abf3..13ba0097 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -73,12 +73,18 @@ analysis_proj_set_ui <- function(id) { h3("Data type"), data_type_ui(ns("data_type")), hr(), - h3("Project type"), - helpText( - "You can either: (i) start with a new project;", - "(ii) open one of your own an existing project;", - "or (iii) open one of the included examples." - ), + h3("Project type") %>% + helper( + type = "inline", + content = as.character(tagList( + "You can either:", + tags$ol( + tags$li("start with a new project;"), + tags$li("open one of your own an existing project;"), + tags$li("open one of the included examples.") + ) + )) + ), radioGroupButtons( ns("proj_type"), label = NULL, From c9ea9a4827972c39dd4db7b80133bc76436360a3 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 16:29:57 +0100 Subject: [PATCH 16/82] provide initial value for locus_type and seq_mode --- R-pkg/R/04_environment.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index 3ce01a0f..e3c889a3 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -33,8 +33,8 @@ init_diyabcrf_env <- function() { ## project setup proj_name = NULL, # project name proj_dir = NULL, # project directory - locus_type = NULL, # "SNP" or "MSS" - seq_mode = NULL, # "IndSeq" or "PoolSeq" + locus_type = "snp", # "snp" or "mss" + seq_mode = "indseq", # "indseq" or "poolseq" ## observed data data_file = NULL # observed data file name ) From 2df2236c9dfcf561561a7a7cc37aa7bef26d0fc9 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 16:33:55 +0100 Subject: [PATCH 17/82] update corresponding global environment when selecting data type --- R-pkg/R/23_genetic_loci.R | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/R-pkg/R/23_genetic_loci.R b/R-pkg/R/23_genetic_loci.R index 2b41e4b5..27c2ed22 100644 --- a/R-pkg/R/23_genetic_loci.R +++ b/R-pkg/R/23_genetic_loci.R @@ -29,13 +29,10 @@ data_type_ui <- function(id) { #' Data type module server #' @keywords internal #' @author Ghislain Durif -#' @importFrom shinyjs disable enable -data_type_server <- function(input, output, session) { - # init output - out <- reactiveValues( - locus_type = NULL, - seq_mode = NULL - ) +#' @param tag character string, type of project identified by `"ap"` +#' (for diyabc-rf analysis project) or `"dp"` (data generation project). +data_type_server <- function(input, output, session, tag = "ap") { + # disable seq mode if relevant observeEvent(input$locus_type, { req(input$locus_type) @@ -47,22 +44,16 @@ data_type_server <- function(input, output, session) { shinyjs::enable("seq_mode") } }) - # react + + # update env observeEvent(input$locus_type, { req(input$locus_type) - out$locus_type <- input$locus_type + env[[tag]]$locus_type <<- input$locus_type }) observeEvent(input$seq_mode, { req(input$seq_mode) - out$seq_mode <- input$seq_mode + env[[tag]]$seq_mode <<- input$seq_mode }) - # # debugging - # observe({ - # logging("locus type:", out$locus_type) - # logging("seq mode:", out$seq_mode) - # }) - # output - return(out) } From 02a82a74522bdbd9f25de73d02a5951f51d6834c Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 16:35:25 +0100 Subject: [PATCH 18/82] log project directory --- R-pkg/R/app_index.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index 48c9cf42..a9482cb0 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -82,6 +82,7 @@ index_server <- function(input, output, session) { ## new analysis project observeEvent(home_page$new_analysis_project, { req(home_page$new_analysis_project) + # rendering sidebar menu output$menu_tabs <- renderMenu({ sidebarMenu( @@ -93,16 +94,24 @@ index_server <- function(input, output, session) { ) ) }) + # update tab item updateTabItems(session, "app_menu", selected = "analysis_tab") + # init env init_diyabcrf_env() reset_diyabcrf_env() + + # verbosity + observe({ + logging("analysis project directory:", env$ap$proj_dir) + }) }) ## new data generation project observeEvent(home_page$new_datagen_project, { req(home_page$new_datagen_project) + # rendering sidebar menu output$menu_tabs <- renderMenu({ sidebarMenu( @@ -114,11 +123,18 @@ index_server <- function(input, output, session) { ) ) }) + # update tab item updateTabItems(session, "app_menu", selected = "datagen_tab") + # init env init_datagen_env() reset_datagen_env() + + # verbosity + observe({ + logging("data generation project directory:", env$dp$proj_dir) + }) }) ## analysis page From dc686a79438e131caaf85bb88838f7c3c78eee5f Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 16:36:52 +0100 Subject: [PATCH 19/82] move clean on exit to main analysis module + rename proj name module + use new data type module --- R-pkg/R/61_analysis_module.R | 82 +++++++++++++++--------------------- 1 file changed, 33 insertions(+), 49 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 13ba0097..4d5add51 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -60,6 +60,11 @@ analysis_module_server <- function(input, output, session) { req(proj_admin$reset) session$reload() }) + + ## clean on exit + session$onSessionEnded(function() { + isolate(tryCatch(fs::dir_delete(env$ap$proj_dir))) + }) } #' Analysis project setting ui @@ -68,7 +73,7 @@ analysis_module_server <- function(input, output, session) { analysis_proj_set_ui <- function(id) { ns <- NS(id) tagList( - proj_name_ui(ns("proj_name_setup")), + proj_name_ui(ns("proj_name")), hr(), h3("Data type"), data_type_ui(ns("data_type")), @@ -179,66 +184,45 @@ analysis_proj_set_ui <- function(id) { #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete analysis_proj_set_server <- function(input, output, session) { - # namespace - ns <- session$ns + + # init local local <- reactiveValues( - data_file = NULL, - file_input = NULL, - valid_proj_name = FALSE, - existing_proj_zip = FALSE, - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - header_data_file = NULL + # data_file = NULL, + # file_input = NULL, + # valid_proj_name = FALSE, + # existing_proj_zip = FALSE, + # # data.frame with 4 columns: + # # name (chr), size (int), type (chr), datapath (chr) + # header_data_file = NULL ) # init output out <- reactiveValues( - data_file = NULL, - data_info = NULL, - locus_type = NULL, - seq_mode = NULL, - new_proj = TRUE, - proj_dir = mk_proj_dir("diyabc_rf"), - proj_file_list = character(0), - proj_header_content = list(), - proj_name = NULL, - valid_data_file = FALSE, - valid_proj = FALSE + # data_file = NULL, + # data_info = NULL, + # locus_type = NULL, + # seq_mode = NULL, + # new_proj = TRUE, + # proj_dir = mk_proj_dir("diyabc_rf"), + # proj_file_list = character(0), + # proj_header_content = list(), + # proj_name = NULL, + # valid_data_file = FALSE, + # valid_proj = FALSE ) - # clean on exit - session$onSessionEnded(function() { - isolate(tryCatch(fs::dir_delete(out$proj_dir))) - }) + ## project name + proj_name <- callModule(proj_name_server, "proj_name", tag = "ap") + + ## data type + data_type <- callModule(data_type_server, "data_type", tag = "ap") # debugging observe({ - logging("project directory:", out$proj_dir) + logging("locus type:", env$ap$locus_type) + logging("seq mode:", env$ap$seq_mode) }) - ## project name - proj_name_setup <- callModule(proj_name_server, "proj_name_setup", - tag = "ap") - - # observeEvent(proj_name_setup$proj_name, { - # req(proj_name_setup$proj_name) - # out$proj_name <- proj_name_setup$proj_name - # }) - # - # observeEvent(proj_name_setup$valid_proj_name, { - # req(!is.null(proj_name_setup$valid_proj_name)) - # local$valid_proj_name <- proj_name_setup$valid_proj_name - # }) - # - # ## data type - # data_type <- callModule(data_type_server, "data_type") - # observe({ - # req(data_type$locus_type) - # req(data_type$seq_mode) - # out$locus_type <- data_type$locus_type - # out$seq_mode <- data_type$seq_mode - # }) - # # ## new or existing project # observe({ # req(input$proj_type) From 571e9180f63770d5bf85d60cc3e957545154fa5d Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 4 Mar 2021 17:15:41 +0100 Subject: [PATCH 20/82] WIP: writing project type module (to choose between new, existing or example) --- R-pkg/R/61_analysis_module.R | 165 +++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 75 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 4d5add51..6527a513 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -78,6 +78,56 @@ analysis_proj_set_ui <- function(id) { h3("Data type"), data_type_ui(ns("data_type")), hr(), + proj_type_ui(ns("proj_type")), + hr(), + uiOutput(ns("feedback")) + ) +} + +#' Analysis project setting server +#' @keywords internal +#' @author Ghislain Durif +#' @importFrom dplyr distinct +#' @importFrom fs file_copy file_delete +analysis_proj_set_server <- function(input, output, session) { + + + # init local + local <- reactiveValues() + + # init output + out <- reactiveValues() + + ## project name + proj_name <- callModule(proj_name_server, "proj_name", tag = "ap") + + ## data type + data_type <- callModule(data_type_server, "data_type", tag = "ap") + + ## project type + proj_type <- callModule(proj_type_server, "proj_type") + + # output$proj_is_ready <- renderUI({ + # if(!(out$valid_proj & out$valid_data_file)) { + # h3(icon("warning"), "Project set up is not ready.", + # style="color:red;text-align:center;") + # } else { + # h4(icon("check"), "Project set up is ok.", + # style="text-align:center;") + # } + # }) + # + + ## output + return(out) +} + +#' Project type setting ui +#' @keywords internal +#' @author Ghislain Durif +proj_type_ui <- function(id) { + ns <- NS(id) + tagList( h3("Project type") %>% helper( type = "inline", @@ -102,7 +152,41 @@ analysis_proj_set_ui <- function(id) { conditionalPanel( condition = "input.proj_type == 'existing'", ns = ns, - h4(tags$b("Project files")), + h4(tags$b("Project files")) %>% + helper( + type = "inline", + content = as.character(tagList( + tags$ul( + tags$li( + "You can", tags$b("either"), "upload:", + "a project", tags$code("zip"), + "file generated in a previous run", + tags$b("or"), + "single project-related files, including", + tags$code("headerRF.txt"), ", ", + tags$code("reftableRF.bin"), ", ", + tags$code("statobsRF.txt"), "." + ), + tags$li( + "You", tags$b("cannot"), "upload", + "both a project", + tags$code("zip"), "file", + "and single project-related files." + ), + tags$li( + "If you re-upload a file, it will over-write", + "the corresponding file", + "that you previously uploaded." + ), + tags$li( + "If some project files are missing", + "or have formating issues", + "you will be able to (re)configure", + "the corresponding settings below." + ) + ) + )) + ), helpText( "Use ctrl+click to select more than one file." ), @@ -116,37 +200,6 @@ analysis_proj_set_ui <- function(id) { ".bin", ".zip" ) - ), - helpText( - tags$ul( - tags$li( - "You can", tags$b("either"), "upload:", - "a project", tags$code("zip"), - "file generated in a previous run", - tags$b("or"), - "single project-related files, including", - tags$code("headerRF.txt"), ", ", - tags$code("reftableRF.bin"), ", ", - tags$code("statobsRF.txt"), "." - ), - tags$li( - "You", tags$b("cannot"), "upload both a project", - tags$code("zip"), "file", - "and single project-related files." - ), - tags$li( - "If you re-upload a file, it will over-write the", - "corresponding file that you previously uploaded." - ) - ) - ), - br(), - uiOutput(ns("file_check")), - helpText( - "If some files are missing or have formating issues", - "(identified with a", icon("times"),")", - "you will be able to (re)configure the corresponding", - "settings below." ) ), conditionalPanel( @@ -170,58 +223,24 @@ analysis_proj_set_ui <- function(id) { conditionalPanel( condition = "input.proj_type !== 'example'", ns = ns, - input_data_ui(ns("input_data_file")) + input_data_ui(ns("input_data_file")), ), check_data_ui(ns("check_data_file")), hr(), - uiOutput(ns("proj_is_ready")) + uiOutput(ns("feedback")) ) } -#' Analysis project setting ui +#' Project type setting server #' @keywords internal #' @author Ghislain Durif #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete -analysis_proj_set_server <- function(input, output, session) { +proj_type_server <- function(input, output, session) { # init local - local <- reactiveValues( - # data_file = NULL, - # file_input = NULL, - # valid_proj_name = FALSE, - # existing_proj_zip = FALSE, - # # data.frame with 4 columns: - # # name (chr), size (int), type (chr), datapath (chr) - # header_data_file = NULL - ) - # init output - out <- reactiveValues( - # data_file = NULL, - # data_info = NULL, - # locus_type = NULL, - # seq_mode = NULL, - # new_proj = TRUE, - # proj_dir = mk_proj_dir("diyabc_rf"), - # proj_file_list = character(0), - # proj_header_content = list(), - # proj_name = NULL, - # valid_data_file = FALSE, - # valid_proj = FALSE - ) - - ## project name - proj_name <- callModule(proj_name_server, "proj_name", tag = "ap") - - ## data type - data_type <- callModule(data_type_server, "data_type", tag = "ap") - - # debugging - observe({ - logging("locus type:", env$ap$locus_type) - logging("seq mode:", env$ap$seq_mode) - }) + local <- reactiveValues() # ## new or existing project # observe({ @@ -516,10 +535,6 @@ analysis_proj_set_server <- function(input, output, session) { # style="text-align:center;") # } # }) - # - - ## output - return(out) } #' Existing project related file check From aff2dd252bde52e1c79d074bece2d32ee965f2fc Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 10 Mar 2021 16:45:21 +0100 Subject: [PATCH 21/82] reformat project type/file help + manage project file input with potential warnings (TODO: project file check) --- R-pkg/R/61_analysis_module.R | 424 +++++++++++++++++++---------------- 1 file changed, 235 insertions(+), 189 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 6527a513..00e11036 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -127,19 +127,58 @@ analysis_proj_set_server <- function(input, output, session) { #' @author Ghislain Durif proj_type_ui <- function(id) { ns <- NS(id) + # inline help for project type + proj_type_help <- tagList( + "You can either:", + tags$ol( + tags$li("start with a new project;"), + tags$li("open one of your own an existing project;"), + tags$li("open one of the included examples.") + ) + ) + # inline help for proj file input + proj_file_help <- tagList( + tags$ul( + tags$li( + "You can", tags$b("either"), "upload:", + "a project", tags$code("zip"), + "file generated in a previous run", + tags$b("or"), + "single project-related files, including", + tags$code("headerRF.txt"), ", ", + tags$code("reftableRF.bin"), ", ", + tags$code("statobsRF.txt"), "and your observed data file." + ), + tags$li( + "You", tags$b("cannot"), "upload both a project", + tags$code("zip"), "file", + "and single project-related files, those will be ignored.", + style = "margin-top: 10px;" + ), + tags$li( + "When uploading single project-related files,", + "you", tags$b("should"), "upload all required files", + "at the same time (use", tags$code("CTRL+click"), + "to select multiple files).", + style = "margin-top: 10px;" + ), + tags$li( + "If you re-upload a file or a group of files, ", + "it will delete any previous upload.", + style = "margin-top: 10px;" + ), + tags$li( + "If some project files are missing or have formating issues", + "you will be able to (re)configure", + "the corresponding settings below.", + style = "margin-top: 10px;" + ) + ) + ) + # ui tagList( h3("Project type") %>% - helper( - type = "inline", - content = as.character(tagList( - "You can either:", - tags$ol( - tags$li("start with a new project;"), - tags$li("open one of your own an existing project;"), - tags$li("open one of the included examples.") - ) - )) - ), + helper(type = "inline", content = as.character(proj_type_help)), radioGroupButtons( ns("proj_type"), label = NULL, @@ -155,40 +194,10 @@ proj_type_ui <- function(id) { h4(tags$b("Project files")) %>% helper( type = "inline", - content = as.character(tagList( - tags$ul( - tags$li( - "You can", tags$b("either"), "upload:", - "a project", tags$code("zip"), - "file generated in a previous run", - tags$b("or"), - "single project-related files, including", - tags$code("headerRF.txt"), ", ", - tags$code("reftableRF.bin"), ", ", - tags$code("statobsRF.txt"), "." - ), - tags$li( - "You", tags$b("cannot"), "upload", - "both a project", - tags$code("zip"), "file", - "and single project-related files." - ), - tags$li( - "If you re-upload a file, it will over-write", - "the corresponding file", - "that you previously uploaded." - ), - tags$li( - "If some project files are missing", - "or have formating issues", - "you will be able to (re)configure", - "the corresponding settings below." - ) - ) - )) + content = as.character(proj_file_help) ), helpText( - "Use ctrl+click to select more than one file." + "Use", tags$code("CTRL+click"), "to select more than one file." ), fileInput( ns("file_input"), @@ -200,7 +209,8 @@ proj_type_ui <- function(id) { ".bin", ".zip" ) - ) + ), + uiOutput(ns("feedbak_existing")) ), conditionalPanel( condition = "input.proj_type == 'example'", @@ -227,7 +237,7 @@ proj_type_ui <- function(id) { ), check_data_ui(ns("check_data_file")), hr(), - uiOutput(ns("feedback")) + uiOutput(ns("feedback_data")) ) } @@ -238,9 +248,52 @@ proj_type_ui <- function(id) { #' @importFrom fs file_copy file_delete proj_type_server <- function(input, output, session) { - # init local - local <- reactiveValues() + local <- reactiveValues( + file_list = NULL, + valid_files = FALSE + ) + + ## Existing project + # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' + observe({ + pprint("file input") + print(input$file_input) + }) + + # Feedback on file upload + observe({ + req(input$proj_type) + req(input$proj_type == "existing") + + # feedback on missing file + feedbackWarning("file_input", !isTruthy(input$file_input), + "Missing file(s).") + }) + + # manage file upload (copy to project directory) + observe({ + req(input$proj_type) + req(input$proj_type == "existing") + req(input$file_input) + + tmp <- manage_existing_proj_file(input$file_input, env$ap$proj_dir) + + print(tmp) + + output$feedbak_existing <- renderUI({ + if(length(tmp$msg) > 0) { + msg <- "Issue(s) with uploaded file(s)." + feedbackWarning("file_input", length(tmp$msg) > 0, msg) + tags$div( + do.call(tags$ul, lapply(tmp$msg, tags$li)), + style = "color: #F89406; margin-top: -15px;" + ) + } else { + NULL + } + }) + }) # ## new or existing project # observe({ @@ -537,73 +590,47 @@ proj_type_server <- function(input, output, session) { # }) } -#' Existing project related file check +#' Manage uploading of existing project related files #' @keywords internal #' @author Ghislain Durif -existing_proj_file_check <- function( - new_file_input, possible_files, proj_dir, file_input -) { - # new_file_input - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - # possible files - # vector of possible files - # proj_dir - # character path - # file_input - # data.frame with already uploaded files +#' @param file_input data.frame with fields name (chr), size (int), +#' type (chr), datapath (chr) storing new uploaded files. +#' @param proj_dir character string, path to project directory. +manage_existing_proj_file <- function(file_input, proj_dir) { # init output - out <- list(existing_proj_zip = FALSE, - file_input = file_input) + out <- list(msg = list(), valid = FALSE) # # debugging - # pprint("new file input") - # pprint(new_file_input) + # pprint("file input") + # pprint(file_input) # check if project zip files was provided - check4zip <- manage_proj_zip_file(new_file_input, possible_files) - new_file_input <- check4zip$new_file_input - out$existing_proj_zip <- check4zip$existing_proj_zip + check4zip <- manage_proj_zip_file(file_input) + out$msg <- append(out$msg, check4zip$msg) - # # debugging - # pprint("new file input") - # pprint(new_file_input) - - ## prepare filename check (if relevant) - new_file_input$valid <- rep(TRUE, nrow(new_file_input)) - - ## delete non related files - if(!out$existing_proj_zip) { - - ## filename check - new_file_input$valid <- (new_file_input$name %in% possible_files) - - ## delete - lapply( - split(new_file_input, seq(nrow(new_file_input))), - function(item) { - if(!item$valid) { - if(file.exists(item$datapath)) { - # logging("deleting:", item$datapath) - fs::file_delete(item$datapath) - } - } - } - ) - - new_file_input <- new_file_input[new_file_input$valid,] + if(check4zip$zip_file) { + if(!check4zip$valid) { + out$valid <- FALSE + return(out) + } + file_input <- check4zip$file_input } # # debugging - # pprint("new file input") - # pprint(str(new_file_input)) - # pprint(new_file_input) + # pprint("file input") + # pprint(file_input) - ## copy files to project directory - if(nrow(new_file_input) > 0) { + # manage new uploaded project files + if(nrow(file_input) > 0) { + + # empty project directory + fs::dir_delete(list.dirs(proj_dir, recursive = FALSE)) + fs::file_delete(list.files(proj_dir, full.names = TRUE)) + + # copy files to project directory lapply( - split(new_file_input, seq(nrow(new_file_input))), + split(file_input, seq(nrow(file_input))), function(item) { if(item$type == "diyabc_dir") { fs::dir_copy( @@ -626,20 +653,9 @@ existing_proj_file_check <- function( } } ) - new_file_input$datapath <- file.path( - proj_dir, - new_file_input$name - ) - ## update file input list - if(is.null(file_input)) { - out$file_input <- new_file_input - } else { - out$file_input <- rbind( - file_input[!file_input$name %in% new_file_input$name,], - new_file_input - ) - } + # valid upload ? + out$valid <- TRUE } # output @@ -649,96 +665,126 @@ existing_proj_file_check <- function( #' Manage existing project zip file #' @keywords internal #' @author Ghislain Durif -manage_proj_zip_file <- function(new_file_input, possible_files) { - # new_file_input - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) +#' @param file_input data.frame with fields name (chr), size (int), +#' type (chr), datapath (chr) storing new uploaded files. +manage_proj_zip_file <- function(file_input) { # init output - out <- list(existing_proj_zip = FALSE, - new_file_input = new_file_input) + out <- list( + file_input = NULL, msg = list(), zip_file = FALSE, valid = FALSE + ) # any uploaded zip file ? - zip_file_ind <- str_detect(string = new_file_input$name, - pattern = "\\.zip$") - find_zip_file <- any(zip_file_ind) - if(find_zip_file) { + zip_file_ind <- (file_input$type == "application/zip") + + ## IF NOT + if(!any(zip_file_ind)) { + return(out) + } + + ## ELSE + out$zip_file <- TRUE + + # a single or multiple zip files ? + if(sum(zip_file_ind) > 1) { + msg <- tagList( + "You provided more than", tags$b("one"), "project", + tags$code("zip"), "files." + ) + out$msg <- append(out$msg, list(msg)) + out$valid <- FALSE + return(out) + } + + # a zip file and other files ? + if(nrow(file_input) > 1) { + msg <- tagList( + "You provided a project", tags$code("zip"), "file", + tags$b("and"), "other file(s)." + ) + out$msg <- append(out$msg, list(msg)) + out$valid <- FALSE + return(out) + } + + ## READY TO EXTRACT PROJECT FILES + out$valid <- TRUE + + # temp data dir + tmp_data_dir <- dirname(file_input$datapath[1]) + + # extract project files + unzip( + file_input$datapath[1], + exdir = dirname(file_input$datapath[1]) + ) + + # remove zip file + fs::file_delete(file_input$datapath[1]) + + # list content of zip file + tmp_file_list <- list.files(tmp_data_dir) + + if(length(tmp_file_list) > 0) { - # multiple zip files ? - if(sum(zip_file_ind) > 1) { - txt <- str_c( - "Multiple project zip files were provided. ", - "Only first one (by lexicographical order) ", - "will be considered." + # modify file_input with extracted files + # data.frame with 4 columns: + # name (chr), size (int), type (chr), datapath (chr) + out$file_input <- Reduce( + "rbind", + lapply( + tmp_file_list, + function(tmp_file) { + tmp_file_info <- file.info( + file.path(tmp_data_dir, tmp_file) + ) + return( + data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = ifelse( + tmp_file_info$isdir, + "diyabc_dir", + "diyabc_file" + ), + datapath = file.path(tmp_data_dir, tmp_file), + stringsAsFactors = FALSE + ) + ) + } ) - warnings(txt) - new_file_input <- head(new_file_input[zip_file_ind,],1) - } - - # temp data dir - tmp_data_dir <- dirname(new_file_input$datapath[1]) - - # extract project files - unzip( - new_file_input$datapath[1], - exdir = dirname(new_file_input$datapath[1]) ) - # remove zip file - fs::file_delete(new_file_input$datapath[1]) + ## specific file type + ind <- which(out$file_input$name == "headerRF.txt") + out$file_input$type[ind] <- "text/plain" + ind <- which(out$file_input$name == "reftableRF.bin") + out$file_input$type[ind] <- "application/octet-stream" + ind <- which(out$file_input$name == "statObsRF.txt") + out$file_input$type[ind] <- "text/plain" - # list content of zip file - tmp_file_list <- list.files(tmp_data_dir) - - if(length(tmp_file_list) > 0) { - - # check if project-related zip file was provided - out$existing_proj_zip <- any(tmp_file_list %in% possible_files) - - # modify new_file_input - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - out$new_file_input <- Reduce( - "rbind", - lapply( - tmp_file_list, - function(tmp_file) { - tmp_file_info <- file.info( - file.path(tmp_data_dir, tmp_file) - ) - return( - data.frame( - name = tmp_file, - size = tmp_file_info$size, - type = ifelse( - tmp_file_info$isdir, - "diyabc_dir", - "diyabc_file" - ), - datapath = file.path(tmp_data_dir, tmp_file), - stringsAsFactors = FALSE - ) - ) - } - ) - ) - - ## file type - ind <- which(out$new_file_input$name == "headerRF.txt") - out$new_file_input$type[ind] <- "text/plain" - ind <- which(out$new_file_input$name == "reftableRF.bin") - out$new_file_input$type[ind] <- "application/octet-stream" - ind <- which(out$new_file_input$name == "statObsRF.txt") - out$new_file_input$type[ind] <- "text/plain" - - } else { - out$new_file_input <- head(new_file_input, 0) - } + } else { + out$file_input <- head(file_input, 0) + out$valid <- FALSE + msg <- tagList( + "You provided an empty project", tags$code("zip"), "file.", + ) + out$msg <- append(out$msg, list(msg)) } + # output return(out) } +#' Check existing project related files +#' @keywords internal +#' @author Ghislain Durif +#' @param file_input data.frame with fields name (chr), size (int), +#' @param proj_dir character string, path to project directory. +check_proj_file <- function(proj_dir) { + # WRITEME +} + #' Input data ui #' @keywords internal #' @author Ghislain Durif From 0639a70b5461dd541870278edc96b58365822d5f Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 26 Apr 2021 10:08:35 +0200 Subject: [PATCH 22/82] update version number for the app refactoring --- R-pkg/DESCRIPTION | 4 ++-- version | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R-pkg/DESCRIPTION b/R-pkg/DESCRIPTION index 25db74f9..a225e458 100644 --- a/R-pkg/DESCRIPTION +++ b/R-pkg/DESCRIPTION @@ -1,8 +1,8 @@ Package: diyabcGUI Type: Package Title: Graphical User Interface for DYIABC-RF software -Version: 1.0.14 -Date: 2021-03-12 +Version: 1.1.0 +Date: 2021-04-26 Authors@R: c( person( "Ghislain", "Durif", comment="diyabcGUI main developer", diff --git a/version b/version index 5b09c67c..9084fa2f 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.0.14 +1.1.0 From 9b0b03f1067a06020c2dfe45e32187c06308e5fa Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 15:26:25 +0200 Subject: [PATCH 23/82] inline doc minor edition + WIP modification of project input file management in a dedicated source file --- R-pkg/R/61_analysis_module.R | 206 +---------------------------------- 1 file changed, 6 insertions(+), 200 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 00e11036..3cb11c66 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -156,21 +156,22 @@ proj_type_ui <- function(id) { style = "margin-top: 10px;" ), tags$li( - "When uploading single project-related files,", + "When uploading", tags$b("single project-related files"), "you", tags$b("should"), "upload all required files", "at the same time (use", tags$code("CTRL+click"), - "to select multiple files).", + "to select multiple files in the file chooser window).", style = "margin-top: 10px;" ), tags$li( - "If you re-upload a file or a group of files, ", - "it will delete any previous upload.", + "If you", tags$b("re-upload"), "a file or a group of files,", + "it will", tags$b("delete"), "and", tags$b("replace"), + "any previous upload.", style = "margin-top: 10px;" ), tags$li( "If some project files are missing or have formating issues", "you will be able to (re)configure", - "the corresponding settings below.", + "the corresponding settings in the next panel.", style = "margin-top: 10px;" ) ) @@ -590,201 +591,6 @@ proj_type_server <- function(input, output, session) { # }) } -#' Manage uploading of existing project related files -#' @keywords internal -#' @author Ghislain Durif -#' @param file_input data.frame with fields name (chr), size (int), -#' type (chr), datapath (chr) storing new uploaded files. -#' @param proj_dir character string, path to project directory. -manage_existing_proj_file <- function(file_input, proj_dir) { - - # init output - out <- list(msg = list(), valid = FALSE) - - # # debugging - # pprint("file input") - # pprint(file_input) - - # check if project zip files was provided - check4zip <- manage_proj_zip_file(file_input) - out$msg <- append(out$msg, check4zip$msg) - - if(check4zip$zip_file) { - if(!check4zip$valid) { - out$valid <- FALSE - return(out) - } - file_input <- check4zip$file_input - } - - # # debugging - # pprint("file input") - # pprint(file_input) - - # manage new uploaded project files - if(nrow(file_input) > 0) { - - # empty project directory - fs::dir_delete(list.dirs(proj_dir, recursive = FALSE)) - fs::file_delete(list.files(proj_dir, full.names = TRUE)) - - # copy files to project directory - lapply( - split(file_input, seq(nrow(file_input))), - function(item) { - if(item$type == "diyabc_dir") { - fs::dir_copy( - item$datapath, - file.path(proj_dir, item$name), - overwrite = TRUE - ) - if(dir.exists(item$datapath)) { - # logging("deleting:", item$datapath) - fs::dir_delete(item$datapath) - } - } else { - fs::file_copy(item$datapath, - file.path(proj_dir, item$name), - overwrite = TRUE) - if(file.exists(item$datapath)) { - # logging("deleting:", item$datapath) - fs::file_delete(item$datapath) - } - } - } - ) - - # valid upload ? - out$valid <- TRUE - } - - # output - return(out) -} - -#' Manage existing project zip file -#' @keywords internal -#' @author Ghislain Durif -#' @param file_input data.frame with fields name (chr), size (int), -#' type (chr), datapath (chr) storing new uploaded files. -manage_proj_zip_file <- function(file_input) { - - # init output - out <- list( - file_input = NULL, msg = list(), zip_file = FALSE, valid = FALSE - ) - - # any uploaded zip file ? - zip_file_ind <- (file_input$type == "application/zip") - - ## IF NOT - if(!any(zip_file_ind)) { - return(out) - } - - ## ELSE - out$zip_file <- TRUE - - # a single or multiple zip files ? - if(sum(zip_file_ind) > 1) { - msg <- tagList( - "You provided more than", tags$b("one"), "project", - tags$code("zip"), "files." - ) - out$msg <- append(out$msg, list(msg)) - out$valid <- FALSE - return(out) - } - - # a zip file and other files ? - if(nrow(file_input) > 1) { - msg <- tagList( - "You provided a project", tags$code("zip"), "file", - tags$b("and"), "other file(s)." - ) - out$msg <- append(out$msg, list(msg)) - out$valid <- FALSE - return(out) - } - - ## READY TO EXTRACT PROJECT FILES - out$valid <- TRUE - - # temp data dir - tmp_data_dir <- dirname(file_input$datapath[1]) - - # extract project files - unzip( - file_input$datapath[1], - exdir = dirname(file_input$datapath[1]) - ) - - # remove zip file - fs::file_delete(file_input$datapath[1]) - - # list content of zip file - tmp_file_list <- list.files(tmp_data_dir) - - if(length(tmp_file_list) > 0) { - - # modify file_input with extracted files - # data.frame with 4 columns: - # name (chr), size (int), type (chr), datapath (chr) - out$file_input <- Reduce( - "rbind", - lapply( - tmp_file_list, - function(tmp_file) { - tmp_file_info <- file.info( - file.path(tmp_data_dir, tmp_file) - ) - return( - data.frame( - name = tmp_file, - size = tmp_file_info$size, - type = ifelse( - tmp_file_info$isdir, - "diyabc_dir", - "diyabc_file" - ), - datapath = file.path(tmp_data_dir, tmp_file), - stringsAsFactors = FALSE - ) - ) - } - ) - ) - - ## specific file type - ind <- which(out$file_input$name == "headerRF.txt") - out$file_input$type[ind] <- "text/plain" - ind <- which(out$file_input$name == "reftableRF.bin") - out$file_input$type[ind] <- "application/octet-stream" - ind <- which(out$file_input$name == "statObsRF.txt") - out$file_input$type[ind] <- "text/plain" - - } else { - out$file_input <- head(file_input, 0) - out$valid <- FALSE - msg <- tagList( - "You provided an empty project", tags$code("zip"), "file.", - ) - out$msg <- append(out$msg, list(msg)) - } - - # output - return(out) -} - -#' Check existing project related files -#' @keywords internal -#' @author Ghislain Durif -#' @param file_input data.frame with fields name (chr), size (int), -#' @param proj_dir character string, path to project directory. -check_proj_file <- function(proj_dir) { - # WRITEME -} - #' Input data ui #' @keywords internal #' @author Ghislain Durif From eff44e4a7f5036711d1a9ebbf2160a0fe0b13732 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 15:27:15 +0200 Subject: [PATCH 24/82] WIP: analysis project file input management + corresponding tests (zip file input is ok) --- R-pkg/R/62_analysis_project_io.R | 208 ++++++++++++++++++ .../testthat/test-62_analysis_project_io.R | 153 +++++++++++++ 2 files changed, 361 insertions(+) create mode 100644 R-pkg/R/62_analysis_project_io.R create mode 100644 R-pkg/tests/testthat/test-62_analysis_project_io.R diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R new file mode 100644 index 00000000..b6358048 --- /dev/null +++ b/R-pkg/R/62_analysis_project_io.R @@ -0,0 +1,208 @@ +#' Manage uploading of existing project related files +#' @keywords internal +#' @author Ghislain Durif +#' @param file_input data.frame with fields name (chr), size (int), +#' type (chr), datapath (chr) storing new uploaded files. +#' @param proj_dir character string, path to project directory. +proj_file_input <- function(file_input, proj_dir) { + + # init output + out <- list(msg = list(), valid = FALSE) + + # # debugging + # pprint("file input") + # pprint(file_input) + + # check if project zip files was provided + check4zip <- manage_proj_zip_file(file_input) + out$msg <- append(out$msg, check4zip$msg) + + if(check4zip$zip_file) { + if(!check4zip$valid) { + out$valid <- FALSE + return(out) + } + file_input <- check4zip$file_input + } + + # # debugging + # pprint("file input") + # pprint(file_input) + + # manage new uploaded project files + if(nrow(file_input) > 0) { + + # empty project directory + fs::dir_delete(list.dirs(proj_dir, recursive = FALSE)) + fs::file_delete(list.files(proj_dir, full.names = TRUE)) + + # copy files to project directory + lapply( + split(file_input, seq(nrow(file_input))), + function(item) { + if(item$type == "diyabc_dir") { + fs::dir_copy( + item$datapath, + file.path(proj_dir, item$name), + overwrite = TRUE + ) + if(dir.exists(item$datapath)) { + # logging("deleting:", item$datapath) + fs::dir_delete(item$datapath) + } + } else { + fs::file_copy(item$datapath, + file.path(proj_dir, item$name), + overwrite = TRUE) + if(file.exists(item$datapath)) { + # logging("deleting:", item$datapath) + fs::file_delete(item$datapath) + } + } + } + ) + + # valid upload ? + out$valid <- TRUE + } + + # output + return(out) +} + + +#' Manage existing project zip file +#' @keywords internal +#' @author Ghislain Durif +#' @param file_input data.frame with fields name (chr), size (int), +#' type (chr), datapath (chr) storing new uploaded files. +proj_zip_file_input <- function(file_input) { + + # init output + out <- list( + file_input = NULL, msg = list(), zip_file = FALSE, valid = FALSE + ) + + # any uploaded zip file ? + zip_file_ind <- (file_input$type == "application/zip") + + ## IF NOT + if(!any(zip_file_ind)) { + return(out) + } + + ## ELSE + out$zip_file <- TRUE + + # a single or multiple zip files ? + if(sum(zip_file_ind) > 1) { + msg <- tagList( + "You provided more than", tags$b("one"), "project", + tags$code("zip"), "files." + ) + out$msg <- append(out$msg, list(msg)) + out$valid <- FALSE + return(out) + } + + # a zip file and other files ? + if(nrow(file_input) > 1) { + msg <- tagList( + "You provided a project", tags$code("zip"), "file", + tags$b("and"), "other file(s)." + ) + out$msg <- append(out$msg, list(msg)) + out$valid <- FALSE + return(out) + } + + ## READY TO EXTRACT PROJECT FILES + out$valid <- TRUE + + # tmp dir for extraction + tmp_dir <- mk_proj_dir("diyabc_zip_extract") + + # extract project files + unzip( + file_input$datapath[1], + exdir = tmp_dir + ) + + # list content of zip file + tmp_file_list <- list.files(tmp_dir) + + # check if zip file content was at zip root or inside a root directory + if(length(tmp_file_list) == 1) { + + # if extracted project files inside a root directory + if(file.info(tmp_dir)$isdir) { + # move into extracted project directory + tmp_dir <- file.path(tmp_dir, tmp_file_list) + # update list content of zip file + tmp_file_list <- list.files(tmp_dir) + } + } + + # check extracted project files + if(length(tmp_file_list) > 0) { + + # modify file_input with extracted files + # data.frame with 4 columns: + # name (chr), size (int), type (chr), datapath (chr) + out$file_input <- Reduce( + "rbind", + lapply( + tmp_file_list, + function(tmp_file) { + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + return( + data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = ifelse( + tmp_file_info$isdir, + "diyabc_dir", + "diyabc_file" + ), + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + ) + } + ) + ) + + ## specific file type + ind <- which(out$file_input$name == "headerRF.txt") + out$file_input$type[ind] <- "text/plain" + ind <- which(out$file_input$name == "header.txt") + out$file_input$type[ind] <- "text/plain" + ind <- which(out$file_input$name == "reftableRF.bin") + out$file_input$type[ind] <- "application/octet-stream" + ind <- which(out$file_input$name == "statobsRF.txt") + out$file_input$type[ind] <- "text/plain" + + } else { + + out$file_input <- head(file_input, 0) + out$valid <- FALSE + msg <- tagList( + "You provided an empty project", tags$code("zip"), "file.", + ) + out$msg <- append(out$msg, list(msg)) + } + + # output + return(out) +} + +#' Check existing project related files +#' @keywords internal +#' @author Ghislain Durif +#' @param file_input data.frame with fields name (chr), size (int), +#' @param proj_dir character string, path to project directory. +check_proj_file <- function(proj_dir) { + # WRITEME +} \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R new file mode 100644 index 00000000..a589f32d --- /dev/null +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -0,0 +1,153 @@ +context("test-62_analysis_project_io") + +test_that("proj_file_input", { + + test_dir <- mk_proj_dir("test-62_analysis_project_io") + + ## clean project files + test_proj <- "IndSeq_SNP_estim_param" + + # setup test case input + tmp_dir <- file.path(data4test_dir(), test_proj) + tmp_file <- list.files(tmp_dir) + + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + + # input parameters + file_input <- data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = ifelse( + tmp_file_info$isdir, + "diyabc_dir", + "diyabc_file" + ), + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + proj_dir <- test_dir + + proj_file_input(file_input, proj_dir) + + + + zip::zip(file, list.files(env[[tag]]$proj_dir)) +}) + + + +test_that("proj_zip_file_input", { + + ## clean project zip file with root directory + test_proj <- "IndSeq_SNP_estim_param" + + # setup test case + tmp_dir <- mk_proj_dir("test-62_analysis_project_io") + test_dir <- file.path(data4test_dir(), test_proj) + zip_file <- file.path(tmp_dir, paste0(test_proj, ".zip")) + zip::zip( + zipfile = zip_file, files = test_proj, root = data4test_dir(), + recurse = TRUE, include_directories = TRUE + ) + + tmp_file <- list.files(tmp_dir) + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + + # input parameters + file_input <- data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = "application/zip", + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + + + zip_proj <- proj_zip_file_input(file_input) + + expect_true(zip_proj$zip_file) + expect_true(zip_proj$valid) + expect_true(length(zip_proj$msg) == 0) + expect_true(nrow(zip_proj$file_input) == length(list.files(test_dir))) + + ## providing two zip files + file_input <- rbind(file_input, file_input) + + zip_proj <- proj_zip_file_input(file_input) + + expect_true(zip_proj$zip_file) + expect_false(zip_proj$valid) + expect_true(length(zip_proj$msg) == 1) + + + ## clean project zip file without root directory + test_proj <- "IndSeq_SNP_estim_param" + + # setup test case + tmp_dir <- mk_proj_dir("test-62_analysis_project_io") + test_dir <- file.path(data4test_dir(), test_proj) + zip_file <- file.path(tmp_dir, paste0(test_proj, ".zip")) + zip::zip( + zipfile = zip_file, files = list.files(test_dir), root = test_dir, + recurse = TRUE, include_directories = TRUE + ) + + tmp_file <- list.files(tmp_dir) + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + + # input parameters + file_input <- data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = "application/zip", + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + + zip_proj <- proj_zip_file_input(file_input) + + expect_true(zip_proj$zip_file) + expect_true(zip_proj$valid) + expect_true(length(zip_proj$msg) == 0) + expect_true(nrow(zip_proj$file_input) == length(list.files(test_dir))) + + + ## providing empty zip file + + # setup test case + tmp_dir <- mk_proj_dir("test-62_analysis_project_io") + test_dir <- file.path(tmp_dir, "empty_project") + fs::dir_create(test_dir) + zip_file <- file.path(tmp_dir, "empty_project.zip") + zip::zip( + zipfile = zip_file, files = "empty_project", root = tmp_dir, + recurse = TRUE, include_directories = TRUE + ) + fs::dir_delete(test_dir) + + tmp_file <- list.files(tmp_dir) + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + + # input parameters + file_input <- data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = "application/zip", + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + + zip_proj <- proj_zip_file_input(file_input) + + expect_true(zip_proj$zip_file) + expect_false(zip_proj$valid) + expect_true(length(zip_proj$msg) == 1) +}) From 45bc5dd60e04c5dbabc29702ccce1c83540bd539 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 15:54:29 +0200 Subject: [PATCH 25/82] manage project file input (+tests) --- R-pkg/R/62_analysis_project_io.R | 17 ++-- .../testthat/test-62_analysis_project_io.R | 79 ++++++++++++++++--- 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index b6358048..7dd2dbe9 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -14,15 +14,15 @@ proj_file_input <- function(file_input, proj_dir) { # pprint(file_input) # check if project zip files was provided - check4zip <- manage_proj_zip_file(file_input) - out$msg <- append(out$msg, check4zip$msg) + is_zip <- proj_zip_file_input(file_input) + out$msg <- append(out$msg, is_zip$msg) - if(check4zip$zip_file) { - if(!check4zip$valid) { + if(is_zip$zip_file) { + if(!is_zip$valid) { out$valid <- FALSE return(out) } - file_input <- check4zip$file_input + file_input <- is_zip$file_input } # # debugging @@ -64,6 +64,13 @@ proj_file_input <- function(file_input, proj_dir) { # valid upload ? out$valid <- TRUE + + } else { + msg <- tagList( + "No file was provided." + ) + out$msg <- append(out$msg, list(msg)) + out$valid <- FALSE } # output diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R index a589f32d..f22da900 100644 --- a/R-pkg/tests/testthat/test-62_analysis_project_io.R +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -2,38 +2,90 @@ context("test-62_analysis_project_io") test_that("proj_file_input", { - test_dir <- mk_proj_dir("test-62_analysis_project_io") - ## clean project files test_proj <- "IndSeq_SNP_estim_param" - # setup test case input - tmp_dir <- file.path(data4test_dir(), test_proj) - tmp_file <- list.files(tmp_dir) + # setup test case + tmp_dir <- file.path(mk_proj_dir("test-62_analysis_project_io"), test_proj) + test_dir <- file.path(data4test_dir(), test_proj) + fs::dir_copy(test_dir, tmp_dir, overwrite = TRUE) + tmp_file_list <- list.files(tmp_dir) tmp_file_info <- file.info( - file.path(tmp_dir, tmp_file) + file.path(tmp_dir, tmp_file_list) ) # input parameters file_input <- data.frame( - name = tmp_file, + name = tmp_file_list, size = tmp_file_info$size, type = ifelse( tmp_file_info$isdir, "diyabc_dir", "diyabc_file" ), - datapath = file.path(tmp_dir, tmp_file), + datapath = file.path(tmp_dir, tmp_file_list), stringsAsFactors = FALSE ) - proj_dir <- test_dir - proj_file_input(file_input, proj_dir) + # specific file type + ind <- which(out$file_input$name == "headerRF.txt") + file_input$type[ind] <- "text/plain" + ind <- which(file_input$name == "header.txt") + file_input$type[ind] <- "text/plain" + ind <- which(file_input$name == "reftableRF.bin") + file_input$type[ind] <- "application/octet-stream" + ind <- which(file_input$name == "statobsRF.txt") + file_input$type[ind] <- "text/plain" + + # tmp proj dir + proj_dir <- mk_proj_dir("test-62_analysis_project_io") + + # run + proj_file <- proj_file_input(file_input, proj_dir) + + expect_true(proj_file$valid) + expect_true(length(list.files(proj_dir)) == length(list.files(test_dir))) + + + ## no input + proj_file <- proj_file_input(head(file_input, 0), proj_dir) + + expect_false(proj_file$valid) + expect_true(length(proj_file$msg) == 1) + + + ## clean project zip file with root directory + test_proj <- "IndSeq_SNP_estim_param" + + # setup test case + tmp_dir <- mk_proj_dir("test-62_analysis_project_io") + test_dir <- file.path(data4test_dir(), test_proj) + zip_file <- file.path(tmp_dir, paste0(test_proj, ".zip")) + zip::zip( + zipfile = zip_file, files = test_proj, root = data4test_dir(), + recurse = TRUE, include_directories = TRUE + ) + + tmp_file <- list.files(tmp_dir) + tmp_file_info <- file.info( + file.path(tmp_dir, tmp_file) + ) + # input parameters + file_input <- data.frame( + name = tmp_file, + size = tmp_file_info$size, + type = "application/zip", + datapath = file.path(tmp_dir, tmp_file), + stringsAsFactors = FALSE + ) + # run + proj_file <- proj_file_input(file_input, proj_dir) - zip::zip(file, list.files(env[[tag]]$proj_dir)) + expect_true(proj_file$valid) + expect_true(length(list.files(proj_dir)) == length(list.files(test_dir))) }) @@ -66,7 +118,7 @@ test_that("proj_zip_file_input", { stringsAsFactors = FALSE ) - + # run zip_proj <- proj_zip_file_input(file_input) expect_true(zip_proj$zip_file) @@ -77,6 +129,7 @@ test_that("proj_zip_file_input", { ## providing two zip files file_input <- rbind(file_input, file_input) + # run zip_proj <- proj_zip_file_input(file_input) expect_true(zip_proj$zip_file) @@ -110,6 +163,7 @@ test_that("proj_zip_file_input", { stringsAsFactors = FALSE ) + # run zip_proj <- proj_zip_file_input(file_input) expect_true(zip_proj$zip_file) @@ -145,6 +199,7 @@ test_that("proj_zip_file_input", { stringsAsFactors = FALSE ) + # run zip_proj <- proj_zip_file_input(file_input) expect_true(zip_proj$zip_file) From 5b02db9a23d91988c375bb1432d73251ffda9ef5 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 16:27:19 +0200 Subject: [PATCH 26/82] test function + WIP parsing statobs --- R-pkg/R/41_io_management.R | 39 ++++++++++++++++++-- R-pkg/tests/testthat/test-41_io_management.R | 21 +++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/R-pkg/R/41_io_management.R b/R-pkg/R/41_io_management.R index c7a53af8..5f0d2a87 100644 --- a/R-pkg/R/41_io_management.R +++ b/R-pkg/R/41_io_management.R @@ -3,10 +3,8 @@ #' @author Ghislain Durif check_file_name <- function(file_name) { valid <- TRUE - if(!is.character(file_name)) - valid <- FALSE - else if(!file.exists(file_name)) - valid <- FALSE + if((length(file_name) != 1) || !is.character(file_name) || + !file.exists(file_name)) valid <- FALSE return(valid) } @@ -1720,6 +1718,39 @@ check_header_loci_des <- function(strng, type = "mss") { return(valid) } +#' Parse diyabc statobsRF.txt file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param file_name string, (server-side) path to a headersim file. +#' @param file_type string, MIME file type. +#' @param n_stat integer, number of summary statistics in reftable. +parse_diyabc_statobs <- function(file_name, file_type, n_stat) { + + # init output + out <- list(msg = list(), valid = FALSE) + + # check file_name + tmp <- check_file_name(file_name) + + # check file_type + if(file_type != "text/plain") { + out$valid <- FALSE + msg <- tagList("Wrong file type.") + out$msg <- append(out$msg, list(msg)) + return(out) + } + + + + # try reading it + tmp <- tryCatch( + read.table(file_name) + ) + +} + #' Parse diyabc simulation header file #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-41_io_management.R b/R-pkg/tests/testthat/test-41_io_management.R index 42048cd7..f039ca83 100644 --- a/R-pkg/tests/testthat/test-41_io_management.R +++ b/R-pkg/tests/testthat/test-41_io_management.R @@ -1,5 +1,26 @@ context("io_management") +test_that("check_file_name", { + + # existing file + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + expect_true(check_file_name(file_name)) + + # unexisting file + file_name <- file.path(test_dir, "toto.txt") + expect_false(check_file_name(file_name)) + + # two input + file_name <- c("1", "2") + expect_false(check_file_name(file_name)) + + # non character input + file_name <- 19 + expect_false(check_file_name(file_name)) +}) + test_that("check_data_file", { # snp indseq data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" From d120a0a66b5d09fca92e5a5a347e7337319e97f6 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 16:47:10 +0200 Subject: [PATCH 27/82] parse and check statobs file (+test) --- R-pkg/R/41_io_management.R | 47 ++++++++++++++++++-- R-pkg/tests/testthat/test-41_io_management.R | 35 ++++++++++++++- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/R-pkg/R/41_io_management.R b/R-pkg/R/41_io_management.R index 5f0d2a87..914da472 100644 --- a/R-pkg/R/41_io_management.R +++ b/R-pkg/R/41_io_management.R @@ -1729,26 +1729,65 @@ check_header_loci_des <- function(strng, type = "mss") { parse_diyabc_statobs <- function(file_name, file_type, n_stat) { # init output - out <- list(msg = list(), valid = FALSE) + out <- list(msg = list(), valid = TRUE) # check file_name tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid file file.") + out$msg <- append(out$msg, list(msg)) + } # check file_type if(file_type != "text/plain") { out$valid <- FALSE msg <- tagList("Wrong file type.") out$msg <- append(out$msg, list(msg)) - return(out) } - + # continue ? + if(!out$valid) { + return(out) + } # try reading it tmp <- tryCatch( - read.table(file_name) + read.table(file_name), + error = function(e) return(NULL) ) + if(is.null(tmp)) { + out$valid <- FALSE + msg <- tagList("Issue with statobs file format.") + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check number of rows and columns + if(nrow(tmp) != 2) { + out$valid <- FALSE + msg <- tagList( + "The statobs file is supposed to contain two lines:", + "one with the names of the summary statistics,", + "and a second with the corresponding values", + "computed on the dataset." + ) + out$msg <- append(out$msg, list(msg)) + } + + if(ncol(tmp) != n_stat) { + out$valid <- FALSE + msg <- tagList( + "The number of summary statistics in the statobs file", + "does not correspond to the number of summary statistics", + "in the reftable file." + ) + out$msg <- append(out$msg, list(msg)) + } + + # output + return(out) } #' Parse diyabc simulation header file diff --git a/R-pkg/tests/testthat/test-41_io_management.R b/R-pkg/tests/testthat/test-41_io_management.R index f039ca83..d41fa3de 100644 --- a/R-pkg/tests/testthat/test-41_io_management.R +++ b/R-pkg/tests/testthat/test-41_io_management.R @@ -369,4 +369,37 @@ test_that("parse_diyabc_header_scenarii", { expect_equal(parse_diyabc_header_scenarii(raw_content[5:12]), list(id=NULL, n_param=NULL, prior=NULL, raw_scenario=NULL, valid=FALSE)) -}) \ No newline at end of file +}) + +test_that("parse_diyabc_statobs", { + + # existing file + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + file_type <- "text/plain" + tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) + expect_true(tmp$valid) + + # bad number of summary stats + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + file_type <- "text/plain" + res <- parse_diyabc_statobs(file_name, file_type, n_stat = 120) + expect_false(res$valid) + expect_true(length(res$msg) == 1) + + # unexisting file + file_name <- file.path(test_dir, "toto.txt") + tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 1) + + # unexisting file and wrong file type + file_name <- file.path(test_dir, "toto.txt") + file_type <- "toto" + tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 2) +}) From 42a0412d8987ae1420548e851ab334e047910a02 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 17:08:11 +0200 Subject: [PATCH 28/82] WIP: rewriting project input file reading, parsing and checking --- R-pkg/R/41_input_read.R | 71 +++++++++++++++++ R-pkg/R/41_io_management.R | 82 -------------------- R-pkg/R/42_input_check.R | 9 +++ R-pkg/tests/testthat/test-41_io_management.R | 54 ------------- R-pkg/tests/testthat/test_41-input_read.R | 34 ++++++++ R-pkg/tests/testthat/test_42-input_check.R | 22 ++++++ 6 files changed, 136 insertions(+), 136 deletions(-) create mode 100644 R-pkg/R/41_input_read.R create mode 100644 R-pkg/R/42_input_check.R create mode 100644 R-pkg/tests/testthat/test_41-input_read.R create mode 100644 R-pkg/tests/testthat/test_42-input_check.R diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R new file mode 100644 index 00000000..3f1c160b --- /dev/null +++ b/R-pkg/R/41_input_read.R @@ -0,0 +1,71 @@ +#' Read and parse statobsRF.txt file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param file_name string, (server-side) path to a headersim file. +#' @param file_type string, MIME file type. +#' @param n_stat integer, number of summary statistics in reftable. +read_statobs <- function(file_name, file_type, n_stat) { + + # init output + out <- list(msg = list(), valid = TRUE) + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid file file.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(file_type != "text/plain") { + out$valid <- FALSE + msg <- tagList("Wrong file type.") + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + # try reading it + tmp <- tryCatch( + read.table(file_name), + error = function(e) return(NULL) + ) + + if(is.null(tmp)) { + out$valid <- FALSE + msg <- tagList("Issue with statobs file format.") + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check number of rows and columns + if(nrow(tmp) != 2) { + out$valid <- FALSE + msg <- tagList( + "The statobs file is supposed to contain two lines:", + "one with the names of the summary statistics,", + "and a second with the corresponding values", + "computed on the dataset." + ) + out$msg <- append(out$msg, list(msg)) + } + + if(ncol(tmp) != n_stat) { + out$valid <- FALSE + msg <- tagList( + "The number of summary statistics in the statobs file", + "does not correspond to the number of summary statistics", + "in the reftable file." + ) + out$msg <- append(out$msg, list(msg)) + } + + # output + return(out) +} diff --git a/R-pkg/R/41_io_management.R b/R-pkg/R/41_io_management.R index 914da472..22a8aa5d 100644 --- a/R-pkg/R/41_io_management.R +++ b/R-pkg/R/41_io_management.R @@ -1,13 +1,3 @@ -#' Check file_name -#' @keywords internal -#' @author Ghislain Durif -check_file_name <- function(file_name) { - valid <- TRUE - if((length(file_name) != 1) || !is.character(file_name) || - !file.exists(file_name)) valid <- FALSE - return(valid) -} - #' Check data file #' @keywords internal #' @author Ghislain Durif @@ -1718,78 +1708,6 @@ check_header_loci_des <- function(strng, type = "mss") { return(valid) } -#' Parse diyabc statobsRF.txt file -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param file_name string, (server-side) path to a headersim file. -#' @param file_type string, MIME file type. -#' @param n_stat integer, number of summary statistics in reftable. -parse_diyabc_statobs <- function(file_name, file_type, n_stat) { - - # init output - out <- list(msg = list(), valid = TRUE) - - # check file_name - tmp <- check_file_name(file_name) - if(!tmp) { - out$valid <- FALSE - msg <- tagList("Invalid file file.") - out$msg <- append(out$msg, list(msg)) - } - - # check file_type - if(file_type != "text/plain") { - out$valid <- FALSE - msg <- tagList("Wrong file type.") - out$msg <- append(out$msg, list(msg)) - } - - # continue ? - if(!out$valid) { - return(out) - } - - # try reading it - tmp <- tryCatch( - read.table(file_name), - error = function(e) return(NULL) - ) - - if(is.null(tmp)) { - out$valid <- FALSE - msg <- tagList("Issue with statobs file format.") - out$msg <- append(out$msg, list(msg)) - return(out) - } - - # check number of rows and columns - if(nrow(tmp) != 2) { - out$valid <- FALSE - msg <- tagList( - "The statobs file is supposed to contain two lines:", - "one with the names of the summary statistics,", - "and a second with the corresponding values", - "computed on the dataset." - ) - out$msg <- append(out$msg, list(msg)) - } - - if(ncol(tmp) != n_stat) { - out$valid <- FALSE - msg <- tagList( - "The number of summary statistics in the statobs file", - "does not correspond to the number of summary statistics", - "in the reftable file." - ) - out$msg <- append(out$msg, list(msg)) - } - - # output - return(out) -} - #' Parse diyabc simulation header file #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R new file mode 100644 index 00000000..e4ae0a97 --- /dev/null +++ b/R-pkg/R/42_input_check.R @@ -0,0 +1,9 @@ +#' Check file_name +#' @keywords internal +#' @author Ghislain Durif +check_file_name <- function(file_name) { + valid <- TRUE + if((length(file_name) != 1) || !is.character(file_name) || + !file.exists(file_name)) valid <- FALSE + return(valid) +} \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-41_io_management.R b/R-pkg/tests/testthat/test-41_io_management.R index d41fa3de..a86bddf0 100644 --- a/R-pkg/tests/testthat/test-41_io_management.R +++ b/R-pkg/tests/testthat/test-41_io_management.R @@ -1,26 +1,5 @@ context("io_management") -test_that("check_file_name", { - - # existing file - test_proj <- "IndSeq_SNP_estim_param" - test_dir <- file.path(data4test_dir(), test_proj) - file_name <- file.path(test_dir, "statobsRF.txt") - expect_true(check_file_name(file_name)) - - # unexisting file - file_name <- file.path(test_dir, "toto.txt") - expect_false(check_file_name(file_name)) - - # two input - file_name <- c("1", "2") - expect_false(check_file_name(file_name)) - - # non character input - file_name <- 19 - expect_false(check_file_name(file_name)) -}) - test_that("check_data_file", { # snp indseq data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" @@ -370,36 +349,3 @@ test_that("parse_diyabc_header_scenarii", { list(id=NULL, n_param=NULL, prior=NULL, raw_scenario=NULL, valid=FALSE)) }) - -test_that("parse_diyabc_statobs", { - - # existing file - test_proj <- "IndSeq_SNP_estim_param" - test_dir <- file.path(data4test_dir(), test_proj) - file_name <- file.path(test_dir, "statobsRF.txt") - file_type <- "text/plain" - tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) - expect_true(tmp$valid) - - # bad number of summary stats - test_proj <- "IndSeq_SNP_estim_param" - test_dir <- file.path(data4test_dir(), test_proj) - file_name <- file.path(test_dir, "statobsRF.txt") - file_type <- "text/plain" - res <- parse_diyabc_statobs(file_name, file_type, n_stat = 120) - expect_false(res$valid) - expect_true(length(res$msg) == 1) - - # unexisting file - file_name <- file.path(test_dir, "toto.txt") - tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) - expect_false(tmp$valid) - expect_true(length(tmp$msg) == 1) - - # unexisting file and wrong file type - file_name <- file.path(test_dir, "toto.txt") - file_type <- "toto" - tmp <- parse_diyabc_statobs(file_name, file_type, n_stat = 130) - expect_false(tmp$valid) - expect_true(length(tmp$msg) == 2) -}) diff --git a/R-pkg/tests/testthat/test_41-input_read.R b/R-pkg/tests/testthat/test_41-input_read.R new file mode 100644 index 00000000..3973b091 --- /dev/null +++ b/R-pkg/tests/testthat/test_41-input_read.R @@ -0,0 +1,34 @@ +context("41_input_read") + +test_that("read_statobs", { + + # existing file + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + file_type <- "text/plain" + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_true(tmp$valid) + + # bad number of summary stats + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + file_type <- "text/plain" + res <- read_statobs(file_name, file_type, n_stat = 120) + expect_false(res$valid) + expect_true(length(res$msg) == 1) + + # unexisting file + file_name <- file.path(test_dir, "toto.txt") + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 1) + + # unexisting file and wrong file type + file_name <- file.path(test_dir, "toto.txt") + file_type <- "toto" + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 2) +}) diff --git a/R-pkg/tests/testthat/test_42-input_check.R b/R-pkg/tests/testthat/test_42-input_check.R new file mode 100644 index 00000000..895f9359 --- /dev/null +++ b/R-pkg/tests/testthat/test_42-input_check.R @@ -0,0 +1,22 @@ +context("42_input_check") + +test_that("check_file_name", { + + # existing file + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + expect_true(check_file_name(file_name)) + + # unexisting file + file_name <- file.path(test_dir, "toto.txt") + expect_false(check_file_name(file_name)) + + # two input + file_name <- c("1", "2") + expect_false(check_file_name(file_name)) + + # non character input + file_name <- 19 + expect_false(check_file_name(file_name)) +}) \ No newline at end of file From 2bc4cfda26ae2b9f03b6a9d765e465e858b6f494 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 29 Apr 2021 17:21:56 +0200 Subject: [PATCH 29/82] fix typo in file names --- .../tests/testthat/{test_41-input_read.R => test-41_input_read.R} | 0 .../testthat/{test_42-input_check.R => test-42_input_check.R} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename R-pkg/tests/testthat/{test_41-input_read.R => test-41_input_read.R} (100%) rename R-pkg/tests/testthat/{test_42-input_check.R => test-42_input_check.R} (100%) diff --git a/R-pkg/tests/testthat/test_41-input_read.R b/R-pkg/tests/testthat/test-41_input_read.R similarity index 100% rename from R-pkg/tests/testthat/test_41-input_read.R rename to R-pkg/tests/testthat/test-41_input_read.R diff --git a/R-pkg/tests/testthat/test_42-input_check.R b/R-pkg/tests/testthat/test-42_input_check.R similarity index 100% rename from R-pkg/tests/testthat/test_42-input_check.R rename to R-pkg/tests/testthat/test-42_input_check.R From b35baa0797e8c0b2783f6915ae31573af52591a4 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 09:59:58 +0200 Subject: [PATCH 30/82] parse scenario in header file OK + WIP read header file (and TODO read reftable) --- R-pkg/R/41_input_read.R | 313 ++++++++++++++++++++++ R-pkg/tests/testthat/test-41_input_read.R | 68 +++++ 2 files changed, 381 insertions(+) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 3f1c160b..0388ee5d 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -1,3 +1,280 @@ +#' Internal function to read header file +#' #' @keywords internal +#' #' @author Ghislain Durif +#' #' @description +#' #' Content: see doc +#' #' @param file_name string, (server-side) path to a headersim file. +#' #' @param file_type string, MIME file type. +#' #' @importFrom readr read_file +#' header_reader <- function(file_name) { +#' +#' ## Headers parsing +#' header <- read_file(file_name) # whole file in one string +#' +#' ### First part, scenarios +#' +#' ## extraction of scenarios descriptions +#' regex_scen <- "\\bscenario\\s+(\\d+)\\s+.*\\n((?:(?!(?:scenario|\\n)).*\\n)+)" +#' +#' # split by scenario +#' scen_prematch <- str_extract_all(header,regex_scen)[[1]] +#' +#' # number of scenarios +#' n_scen <- length(scen_prematch) +#' +#' # scenario match +#' scen_match <- str_match_all(scen_prematch,rescen) +#' +#' ## vector of scenario order +#' scen_ord <- unlist(lapply( +#' 1:n_scen, function(ind) return(as.integer(scen_match[[ind]][2])) +#' )) +#' +#' ## vector of scenario descriptions +#' scen_desc <- unlist(lapply( +#' 1:n_scen, function(ind) return(scen_match[[i]][3]) +#' )) +#' scen_desc <- scen_desc[scen_ord] +#' +#' ### Second part, hist params +#' +#' +#' ## extraction of number of parameters +#' regex_paramtot <- "historical parameters priors \\((\\d+)\\D" +#' ## total number of hist params in header +#' n_paramtot <- as.integer(str_match( +#' str_extract(header, regex_paramtot)[[1]], +#' regex_paramtot +#' )[2]) +#' +#' ## extraction of parameters names (and the corresponding law too, +#' ## in order to check if it is a dirac base, aka constant) +#' regex_paramlist <- paste0( +#' "\\bhistorical parameters priors.*\\n((?:\\w+\\W[^\\n]*\\n){", +#' n_paramtot, "})" +#' ) +#' paramlistmatch <- str_match(str_extract_all(header,reparamlist),reparamlist)[2] +#' ## regexp to extract : param name, mini and maximum of the law +#' reparam <- "(\\w+)\\W+\\w\\W+\\w\\w\\[([^,\\]]+),([^,\\]]+)[,\\]][^\\n]*\\n" +#' paramsh <- str_match_all(str_extract_all(paramlistmatch,reparam)[[1]],reparam) +#' paramsdesc <- list() +#' reali <- 1; +#' ## loop on all hist parameters to "filter" all constants +#' ## parameters +#' for(i in 1:nparamtoth) +#' { +#' mini <- as.numeric(paramsh[[i]][3]) +#' maxi <- as.numeric(paramsh[[i]][4]) +#' if(maxi != 0.0) +#' if ((maxi-mini)/maxi > 0.000001) +#' { +#' paramsdesc[paramsh[[i]][2]] <- reali +#' reali <- reali + 1 +#' } +#' } +#' realparamtot <- reali - 1 # this is the real number of non constant +#' # historical parameters +#' +#' ### Third part, extract parameters for each scenario +#' # parambyscenh stores the indices of used parameters for each +#' # scenario +#' parambyscenh <- vector(mode="numeric",length=nscenh) +#' # Loop for all scenarios +#' for(i in 1:nscenh) +#' { +#' templist <- list() +#' # list of terms used in scenario descriptions +#' listterms <- strsplit(scendesc[i],"\\W")[[1]] +#' m <- 1 +#' for(j in 1:length(listterms)) +#' { +#' if (!is.null(paramsdesc[listterms[j]][[1]])) +#' { +#' templist[m] <- paramsdesc[listterms[j]][[1]] +#' m <- m + 1 +#' } +#' } +#' parambyscenh[i] <- list((unique(unlist(templist)))) +#' } +#' +#' # Get all names +#' restatsname <- "\\n\\nscenario\\s+.*" +#' allcolspre <- tail(strsplit(str_extract(header,restatsname),"\\s+")[[1]],-2) +#' +#' } + + +#' Read and parse headerRF.txt and header.txt file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param file_name string, (server-side) path to a headersim file. +#' @param file_type string, MIME file type. +#' @importFrom readr read_file +read_header <- function(file_name, file_type) { + + # init output + out <- list( + msg = list(), valid = TRUE, + data_file = NULL, loci_desc = NULL, + n_param = NULL, n_prior = NULL, n_sumstat = NULL, + cond_list = NULL, prior_list = NULL, group_prior_list = NULL, + n_scenario = NULL, scenario_list = NULL, + simu_mode = NULL + ) + + current_line <- 0 + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid file file.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(file_type != "text/plain") { + out$valid <- FALSE + msg <- tagList("Wrong file type.") + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + ## HEADER FILE CONTENT + # read whole file in one string and split it by new line + header <- str_split(read_file(file_name), "\n", simplify = TRUE) + + ## data file + out$data_file <- header[1] + header <- header[-1] + current_line <- current_line + 1 + + ## number of parameters and statistics + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + pttrn <- "^[0-9]+ parameters and [0-9]+ summary statistics$" + if(!str_detect(strng, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + } else { + pttrn <- "[0-9]+(?= parameters)" + out$n_param <- as.integer(str_extract(strng, pttrn)) + pttrn <- "[0-9]+(?= summary statistics)" + out$n_sumstat <- as.integer(str_extract(strng, pttrn)) + } + + ## empty line + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(strng != "") { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should be empty." + ) + out$msg <- append(out$msg, list(msg)) + } + + ## scenarios + pttrn <- "^[0-9]+ scenarios:( [0-9]+)+ ?$" + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + # find section + if(!str_detect(strng, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + } else { + pttrn <- "[0-9]+(?= scenarios:)" + out$n_scenario <- as.integer(str_extract(strng, pttrn)) + pttrn <- "(?<= )[0-9]+" + nrow_per_scenario <- as.integer(unlist(str_extract_all(strng, pttrn))) + # extract scenarii + row_seq <- cumsum(c(1, nrow_per_scenario+1)) + scenario_list <- lapply( + split( + raw_content[(min(line_seq):(max(line_seq)-1)) + + next_sec_line], + rep(seq(line_seq), diff(c(line_seq, max(line_seq)))) + ), + function(content) { + parse_diyabc_header_scenarii(content) + }) + # check extracted scnenarii + valid <- all(unlist(lapply(scenario_list, + function(item) item$valid))) + # extract raw scenario list + raw_scenario_list <- unlist(unname( + lapply( + scenario_list, + function(item) item$raw_scenario + ) + )) + # next section + next_sec_line <- next_sec_line + max(line_seq) + } + + + # output + return(out) +} + + +#' Parse scenario in header file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param content string vector, scenario description +parse_scenario_header <- function(content) { + + # init output + out <- list( + valid = TRUE, + id = NULL, n_param = NULL, + prior = NULL, scenario = NULL + ) + + # description (1st line) + strng <- content[1] + pttrn <- str_c("^scenario [0-9]+ \\[", num_regex(), "\\] \\([0-9]+\\)$") + if(!str_detect(strng, pttrn)) { + out$valid <- FALSE + } else { + # scenario id + pttrn <- "(?<=^scenario )[0-9]+" + out$id <- as.integer(str_extract(strng, pttrn)) + # scenario prior + pttrn <- str_c("(?<= \\[)", num_regex(), "(?=\\] )") + out$prior <- as.numeric(str_extract(strng, pttrn)) + # number of parameters in scenario + pttrn <- "(?<= \\()[0-9]+(?=\\)$)" + out$n_param <- as.integer(str_extract(strng, pttrn)) + ## scenario + out$scenario <- str_c(content[-1], collapse = "\n") + } + ## output + return(out) +} + + + #' Read and parse statobsRF.txt file #' @keywords internal #' @author Ghislain Durif @@ -69,3 +346,39 @@ read_statobs <- function(file_name, file_type, n_stat) { # output return(out) } + +#' Read and parse reftableRF.bin file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param file_name string, (server-side) path to a headersim file. +#' @param file_type string, MIME file type. +read_reftable <- function(file_name, file_type) { + + # init output + out <- list(msg = list(), valid = TRUE) + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid file file.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(file_type != "application/octet-stream") { + out$valid <- FALSE + msg <- tagList("Wrong file type.") + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + # output + return(out) +} \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 3973b091..cf9e18c7 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -1,5 +1,73 @@ context("41_input_read") +# test_that("header_reader", { +# +# # existing file +# test_proj <- "IndSeq_SNP_model_choice" +# test_dir <- file.path(data4test_dir(), test_proj) +# file_name <- file.path(test_dir, "headerRF.txt") +# file_type <- "text/plain" +# +# tmp <- header_reader(file_name, file_type) +# expect_true(tmp$valid) +# +# }) + +test_that("read_header", { + + # existing file + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "headerRF.txt") + file_type <- "text/plain" + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_true(tmp$valid) + + # bad number of summary stats + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "statobsRF.txt") + file_type <- "text/plain" + res <- read_statobs(file_name, file_type, n_stat = 120) + expect_false(res$valid) + expect_true(length(res$msg) == 1) + + # unexisting file + file_name <- file.path(test_dir, "toto.txt") + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 1) + + # unexisting file and wrong file type + file_name <- file.path(test_dir, "toto.txt") + file_type <- "toto" + tmp <- read_statobs(file_name, file_type, n_stat = 130) + expect_false(tmp$valid) + expect_true(length(tmp$msg) == 2) +}) + +test_that("parse_scenario_header", { + # model choice + test_proj <- "IndSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "headerRF.txt") + content <- str_split(read_file(file_name), "\n", simplify = TRUE) + + res <- parse_scenario_header(content[5:13]) + + expect_equal( + res, + list( + valid=TRUE, id=1, n_param=8, prior=0.16667, + scenario=str_c(content[6:13], collapse = "\n") + ) + ) + + res <- parse_scenario_header(content[6:13]) + + expect_false(res$valid) +}) + test_that("read_statobs", { # existing file From 4560ee640838f3f9bd0a7af6d5b31225bbd3b7d0 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 12:16:03 +0200 Subject: [PATCH 31/82] check prior extracted from header file --- R-pkg/R/42_input_check.R | 20 ++++++++++++++++++++ R-pkg/tests/testthat/test-42_input_check.R | 14 +++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index e4ae0a97..2180f67c 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -6,4 +6,24 @@ check_file_name <- function(file_name) { if((length(file_name) != 1) || !is.character(file_name) || !file.exists(file_name)) valid <- FALSE return(valid) +} + +#' Check header file prior definition +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param cstrng string, prior description. +check_header_prior <- function(strng) { + # init output + valid <- TRUE + # check + pttrn <- str_c(single_param_regex(), " ", + "(N|T|A)", " ", + "(UN|LU|NO|LN)", "\\[", + str_c(rep(num_regex(), 4), collapse = ","), + "\\]") + valid <- str_detect(strng, pttrn) + ## output + return(valid) } \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-42_input_check.R b/R-pkg/tests/testthat/test-42_input_check.R index 895f9359..d6349b9a 100644 --- a/R-pkg/tests/testthat/test-42_input_check.R +++ b/R-pkg/tests/testthat/test-42_input_check.R @@ -19,4 +19,16 @@ test_that("check_file_name", { # non character input file_name <- 19 expect_false(check_file_name(file_name)) -}) \ No newline at end of file +}) + + +test_that("check_header_prior", { + expect_true(check_header_prior("N N UN[100,10000,0.0,0.0]")) + expect_true(check_header_prior("ra A UN[0.05,0.95,0.0,0.0]")) + expect_true(check_header_prior("t T UN[10,1000,0.0,0.0]")) + + expect_false(check_header_prior("N N UN[100,10000,0.0]")) + expect_false(check_header_prior("ra A [0.05,0.95,0.0,0.0]")) + expect_false(check_header_prior("T UN[10,1000,0.0,0.0]")) +}) + From 7e66331f3840b79699ec16140a36a19b7ec65fa0 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 13:29:13 +0200 Subject: [PATCH 32/82] check condition extracted from header file --- R-pkg/R/42_input_check.R | 18 ++++++++++++++++++ R-pkg/tests/testthat/test-42_input_check.R | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index 2180f67c..3871d881 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -26,4 +26,22 @@ check_header_prior <- function(strng) { valid <- str_detect(strng, pttrn) ## output return(valid) +} + +#' Check header file condition definition +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param strng string, prior description. +check_header_cond <- function(strng) { + # init output + issues <- list() + valid <- TRUE + # check + pttrn <- str_c("^", single_param_regex(), "(<|=<|>|>=)", + single_param_regex(), "$") + valid <- str_detect(strng, pttrn) + ## output + return(valid) } \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-42_input_check.R b/R-pkg/tests/testthat/test-42_input_check.R index d6349b9a..c0bc5fcc 100644 --- a/R-pkg/tests/testthat/test-42_input_check.R +++ b/R-pkg/tests/testthat/test-42_input_check.R @@ -32,3 +32,14 @@ test_that("check_header_prior", { expect_false(check_header_prior("T UN[10,1000,0.0,0.0]")) }) +test_that("check_header_cond", { + expect_true(check_header_cond("t1>t2")) + expect_true(check_header_cond("t1>=t2")) + expect_true(check_header_cond("t1")) + expect_false(check_header_cond("t1>")) + expect_false(check_header_cond(" Date: Fri, 30 Apr 2021 14:16:44 +0200 Subject: [PATCH 33/82] check loci description extracted from header file --- R-pkg/R/42_input_check.R | 43 ++++++++++++++++++++- R-pkg/tests/testthat/test-42_input_check.R | 44 ++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index 3871d881..040fe2d4 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -44,4 +44,45 @@ check_header_cond <- function(strng) { valid <- str_detect(strng, pttrn) ## output return(valid) -} \ No newline at end of file +} + +#' Check header file loci description +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param strng string, prior description. +#' @param type string, `"mss"` or `"snp"` +check_header_loci_desc <- function(strng, type = "mss") { + # init output + valid <- TRUE + # Microsat/Sequence + if(type == "mss") { + # Locus_M_A_1_ [M] G1 2 40 + pttrn1 <- str_c("^", single_param_regex(), " ", + "<(A|H|X|Y|M)>", " ", + "\\[M\\]", " ", + "G[0-9]+", " ", + "[0-9]+", " ", "[0-9]+", "$") + # Locus_S_A_21_ [S] G4 1000 + pttrn2 <- str_c("^", single_param_regex(), " ", + "<(A|H|X|Y|M)>", " ", + "\\[S\\]", " ", + "G[0-9]+", " ", + "[0-9]+", "$") + + # check + valid <- str_detect(strng, pttrn1) || str_detect(strng, pttrn2) + } else if(type == "snp") { + # 5000 G1 from 1 + pttrn <- str_c("^", "[0-9]+", " ", + "<(A|H|X|Y|M)>", " ", + "G[0-9]+", " ", + "from ", "[0-9]+", "$") + + # check + valid <- str_detect(strng, pttrn) + } + ## output + return(valid) +} diff --git a/R-pkg/tests/testthat/test-42_input_check.R b/R-pkg/tests/testthat/test-42_input_check.R index c0bc5fcc..3dc8d325 100644 --- a/R-pkg/tests/testthat/test-42_input_check.R +++ b/R-pkg/tests/testthat/test-42_input_check.R @@ -43,3 +43,47 @@ test_that("check_header_cond", { expect_false(check_header_cond(" [M] G1 2 40", type = "mss" + )) + expect_true(check_header_loci_desc( + "Locus_yyy [S] G2 1000", type = "mss" + )) + + expect_false(check_header_loci_desc( + "Locus_xxx [M G1 2 40", type = "mss" + )) + expect_false(check_header_loci_desc( + " [M] G1 2 40", type = "mss" + )) + expect_false(check_header_loci_desc( + "Locus_xxx [M] G1 2", type = "mss" + )) + expect_false(check_header_loci_desc( + "Locus_yyy G1 from 1", type = "snp" + )) + expect_true(check_header_loci_desc( + "5000 G2 from 10", type = "snp" + )) + + expect_false(check_header_loci_desc( + " G2 from 10", type = "snp" + )) + expect_false(check_header_loci_desc( + "5000 G2 from ", type = "snp" + )) + expect_false(check_header_loci_desc( + "5000 G from 10", type = "snp" + )) +}) + From e884c68a8ab29ffd41c7fb94cb896e0115c1bacb Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 14:20:04 +0200 Subject: [PATCH 34/82] remove useless lines + init at FALSE before branching checks --- R-pkg/R/42_input_check.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index 040fe2d4..586b565f 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -15,8 +15,6 @@ check_file_name <- function(file_name) { #' Content: see doc #' @param cstrng string, prior description. check_header_prior <- function(strng) { - # init output - valid <- TRUE # check pttrn <- str_c(single_param_regex(), " ", "(N|T|A)", " ", @@ -35,9 +33,6 @@ check_header_prior <- function(strng) { #' Content: see doc #' @param strng string, prior description. check_header_cond <- function(strng) { - # init output - issues <- list() - valid <- TRUE # check pttrn <- str_c("^", single_param_regex(), "(<|=<|>|>=)", single_param_regex(), "$") @@ -55,7 +50,7 @@ check_header_cond <- function(strng) { #' @param type string, `"mss"` or `"snp"` check_header_loci_desc <- function(strng, type = "mss") { # init output - valid <- TRUE + valid <- FALSE # Microsat/Sequence if(type == "mss") { # Locus_M_A_1_ [M] G1 2 40 From c187023ea1921b675789a603aa65f012d4dc8d7e Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 15:47:27 +0200 Subject: [PATCH 35/82] new regex (to extract 1E10 notation) + tests --- R-pkg/R/02_regex.R | 7 +++++++ R-pkg/tests/testthat/test-02_regex.R | 29 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 R-pkg/tests/testthat/test-02_regex.R diff --git a/R-pkg/R/02_regex.R b/R-pkg/R/02_regex.R index cd418dd5..50d28337 100644 --- a/R-pkg/R/02_regex.R +++ b/R-pkg/R/02_regex.R @@ -12,6 +12,13 @@ num_regex <- function() { return("[0-9]+\\.?[0-9]*") } +#' return numerical (xxEyy notation) regex +#' @keywords internal +#' @author Ghislain Durif +numexp_regex <- function() { + return("[0-9]+\\.?[0-9]*(E\\-?[0-9]+)?") +} + #' return event numerical rate regex #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-02_regex.R b/R-pkg/tests/testthat/test-02_regex.R new file mode 100644 index 00000000..007834e0 --- /dev/null +++ b/R-pkg/tests/testthat/test-02_regex.R @@ -0,0 +1,29 @@ +context("02_regex") + +test_that("int_regex", { + expect_true(str_detect("12", int_regex())) + expect_false(str_detect("xxx", int_regex())) +}) + +test_that("num_regex", { + expect_true(str_detect("12", num_regex())) + expect_true(str_detect("0.12", num_regex())) + expect_true(str_detect("12.0", num_regex())) + expect_true(str_detect("12.", num_regex())) + expect_true(str_detect(".12", num_regex())) + expect_false(str_detect("xxx", num_regex())) +}) + +test_that("numexp_regex", { + expect_true(str_detect("12", numexp_regex())) + expect_true(str_detect("0.12", numexp_regex())) + expect_true(str_detect("12.0", numexp_regex())) + expect_true(str_detect("12.", numexp_regex())) + expect_true(str_detect(".12", numexp_regex())) + expect_true(str_detect("12E7", numexp_regex())) + expect_true(str_detect("0.12E-6", numexp_regex())) + expect_true(str_detect("12.0E-8", numexp_regex())) + expect_true(str_detect("12.E-001", numexp_regex())) + expect_true(str_detect(".12E10", numexp_regex())) + expect_false(str_detect("xxx", numexp_regex())) +}) From 5d8c708bf0e087d6d4dc8b679c0280b7646aad06 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 16:30:11 +0200 Subject: [PATCH 36/82] check MSS group prior --- R-pkg/R/42_input_check.R | 69 ++++++++++++ R-pkg/tests/testthat/test-42_input_check.R | 125 ++++++++++++++++++--- 2 files changed, 181 insertions(+), 13 deletions(-) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index 586b565f..b5907967 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -81,3 +81,72 @@ check_header_loci_desc <- function(strng, type = "mss") { ## output return(valid) } + +#' Check header file group prior definition +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param content string vector of group prior descriptions. +#' @param type string, locus type `"M"` or `"S"`. +check_header_group_prior <- function(content, type = "M") { + + prior_regex_list <- NULL + + prior_param_regex4 <- str_c(rep(numexp_regex(), 4), collapse = ",") + prior_param_regex2 <- str_c(rep(numexp_regex(), 2), collapse = ",") + + # vector of regex + if(type == "M") { + prior_regex_list <- c( + str_c("^MEANMU (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMMU GA\\[", prior_param_regex2, ",Mean_u,", + numexp_regex(), "\\]$" + ), + str_c("^MEANP (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMP GA\\[", prior_param_regex2, ",Mean_P,", + numexp_regex(), "\\]$" + ), + str_c("^MEANSNI (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMSNI GA\\[", prior_param_regex2, ",Mean_u_SNI,", + numexp_regex(), "\\]$" + ) + ) + } else if(type == "S") { + prior_regex_list <- c( + str_c("^MEANMU (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMMU GA\\[", prior_param_regex2, ",Mean_u,", + numexp_regex(), "\\]$" + ), + str_c("^MEANK1 (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMK1 GA\\[", prior_param_regex2, ",Mean_k1,", + numexp_regex(), "\\]$" + ), + str_c("^MEANK2 (UN|LU|GA)\\[", prior_param_regex4, "\\]$"), + str_c( + "^GAMK2 GA\\[", prior_param_regex2, ",Mean_k2,", + numexp_regex(), "\\]$" + ), + str_c("^MODEL (TN|HKY|JK|K2P) ", int_regex(), " ", numexp_regex()) + ) + } + + # check number of group prior + if(length(content) != length(prior_regex_list)) { + return(FALSE) + } + + # check all pattern + valid <- all(unlist(lapply( + 1:length(content), + function(ind) return(str_detect(content[ind], prior_regex_list[ind])) + ))) + + ## output + return(valid) +} diff --git a/R-pkg/tests/testthat/test-42_input_check.R b/R-pkg/tests/testthat/test-42_input_check.R index 3dc8d325..4efb9397 100644 --- a/R-pkg/tests/testthat/test-42_input_check.R +++ b/R-pkg/tests/testthat/test-42_input_check.R @@ -43,47 +43,146 @@ test_that("check_header_cond", { expect_false(check_header_cond(" [M] G1 2 40", type = "mss" )) - expect_true(check_header_loci_desc( + expect_true(check_header_group_prior( "Locus_yyy [S] G2 1000", type = "mss" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( "Locus_xxx [M G1 2 40", type = "mss" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( " [M] G1 2 40", type = "mss" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( "Locus_xxx [M] G1 2", type = "mss" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( "Locus_yyy G1 from 1", type = "snp" )) - expect_true(check_header_loci_desc( + expect_true(check_header_group_prior( "5000 G2 from 10", type = "snp" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( " G2 from 10", type = "snp" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( "5000 G2 from ", type = "snp" )) - expect_false(check_header_loci_desc( + expect_false(check_header_group_prior( "5000 G from 10", type = "snp" )) }) + +test_that("check_header_group_prior", { + ## MSS + content <- c( + "MEANMU UN[1.00E-004,1.00E-3,0.0005,2]", + "GAMMU GA[1.00E-005,1.00E-002,Mean_u,2]", + "MEANP UN[1.00E-001,3.00E-001,0.22,2]", + "GAMP GA[1.00E-002,9.00E-001,Mean_P,2]", + "MEANSNI LU[1.00E-008,1.00E-005,1.00E-007,2]", + "GAMSNI GA[1.00E-009,1.00E-004,Mean_u_SNI,2]" + ) + expect_true(check_header_group_prior(content, type = "M")) + + content <- c( + "MEANMU UN[1.00E-004,1.00E-3,0.0005,2]", + "GAMMU GA[1.00E-005,1.00E-002,Mean_u,2]", + "MEANP UN[1.00E-001,3.00E-001,0.22,2]", + "GAMP GA[1.00E-002,9.00E-001,Mean_P,2]" + ) + expect_false(check_header_group_prior(content, type = "M")) + + content <- c( + "MEANMU UN[1.00E-004,1.00E-3,0.0005,2", + "GAMMU GA[1.00E-005,1.00E-002,Mean_u,2]", + "MEANP UN[1.00E-001,3.00E-001,0.22,2]", + "GAMP GA[1.00E-002,9.00E-001,Mean_P,2]", + "MEANSNI LU[1.00E-008,1.00E-005,1.00E-007,2]", + "GAMSNI GA[1.00E-009,1.00E-004,Mean_u_SNI,2]" + ) + expect_false(check_header_group_prior(content, type = "M")) + + content <- c( + "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", + "MEANK1 UN[0.050,20,10,2]", + "GAMK1 GA[0.050,20,Mean_k1,2]", + "MEANK2 UN[0.050,20,10,2]", + "GAMK2 GA[0.050,20,Mean_k2,2]", + "MODEL TN 10 2.00" + ) + expect_false(check_header_group_prior(content, type = "M")) + + + ## SNP + content <- c( + "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", + "MEANK1 UN[0.050,20,10,2]", + "GAMK1 GA[0.050,20,Mean_k1,2]", + "MEANK2 UN[0.050,20,10,2]", + "GAMK2 GA[0.050,20,Mean_k2,2]", + "MODEL TN 10 2.00" + ) + expect_true(check_header_group_prior(content, type = "S")) + + content <- c( + "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", + "MEANK1 UN[0.050,20,10,2]", + "GAMK1 GA[0.050,20,Mean_k1,2]", + "MEANK2 UN[0.050,20,10,2]", + "GAMK2 GA[0.050,20,Mean_k2,2]", + "MODEL TN 10.99 2.00" + ) + expect_false(check_header_group_prior(content, type = "S")) + + content <- c( + "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", + "MEANK2 UN[0.050,20,10,2]", + "GAMK2 GA[0.050,20,Mean_k2,2]", + "MODEL TN 10 2.00" + ) + expect_false(check_header_group_prior(content, type = "S")) + + content <- c( + "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", + "MEANK1 UN[0.050,20,10,2]", + "GAMK1 GA[0.050,20,0,2]", + "MEANK2 UN[0.050,20,10,2]", + "GAMK2 GA[0.050,20,Mean_k2,2]", + "MODEL TN 10 2.00" + ) + expect_false(check_header_group_prior(content, type = "S")) + + + content <- c( + "MEANMU UN[1.00E-004,1.00E-3,0.0005,2]", + "GAMMU GA[1.00E-005,1.00E-002,Mean_u,2]", + "MEANP UN[1.00E-001,3.00E-001,0.22,2]", + "GAMP GA[1.00E-002,9.00E-001,Mean_P,2]", + "MEANSNI LU[1.00E-008,1.00E-005,1.00E-007,2]", + "GAMSNI GA[1.00E-009,1.00E-004,Mean_u_SNI,2]" + ) + expect_false(check_header_group_prior(content, type = "S")) +}) + From 594442dd2b3cef015f01760157b75e9b0ff91979 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 17:08:07 +0200 Subject: [PATCH 37/82] forgot notation 1e12 --- R-pkg/R/02_regex.R | 2 +- R-pkg/tests/testthat/test-02_regex.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/02_regex.R b/R-pkg/R/02_regex.R index 50d28337..6f0f72ea 100644 --- a/R-pkg/R/02_regex.R +++ b/R-pkg/R/02_regex.R @@ -16,7 +16,7 @@ num_regex <- function() { #' @keywords internal #' @author Ghislain Durif numexp_regex <- function() { - return("[0-9]+\\.?[0-9]*(E\\-?[0-9]+)?") + return("[0-9]+\\.?[0-9]*((E|e)\\-?[0-9]+)?") } #' return event numerical rate regex diff --git a/R-pkg/tests/testthat/test-02_regex.R b/R-pkg/tests/testthat/test-02_regex.R index 007834e0..09097c41 100644 --- a/R-pkg/tests/testthat/test-02_regex.R +++ b/R-pkg/tests/testthat/test-02_regex.R @@ -21,7 +21,7 @@ test_that("numexp_regex", { expect_true(str_detect("12.", numexp_regex())) expect_true(str_detect(".12", numexp_regex())) expect_true(str_detect("12E7", numexp_regex())) - expect_true(str_detect("0.12E-6", numexp_regex())) + expect_true(str_detect("0.12e-6", numexp_regex())) expect_true(str_detect("12.0E-8", numexp_regex())) expect_true(str_detect("12.E-001", numexp_regex())) expect_true(str_detect(".12E10", numexp_regex())) From 9645aaeab5f74a88f87f2cf99dcf195ee13a3700 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 17:11:04 +0200 Subject: [PATCH 38/82] easier debugging --- R-pkg/R/42_input_check.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index b5907967..caf9a2d4 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -142,10 +142,11 @@ check_header_group_prior <- function(content, type = "M") { } # check all pattern - valid <- all(unlist(lapply( + check_pttrn <- unlist(lapply( 1:length(content), function(ind) return(str_detect(content[ind], prior_regex_list[ind])) - ))) + )) + valid <- all(check_pttrn) ## output return(valid) From ed6c435fb86cf19662c5b10f63c1f4aa409764e8 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 17:23:13 +0200 Subject: [PATCH 39/82] additional check case --- R-pkg/tests/testthat/test-42_input_check.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-pkg/tests/testthat/test-42_input_check.R b/R-pkg/tests/testthat/test-42_input_check.R index 4efb9397..06b42e1c 100644 --- a/R-pkg/tests/testthat/test-42_input_check.R +++ b/R-pkg/tests/testthat/test-42_input_check.R @@ -95,7 +95,7 @@ test_that("check_header_group_prior", { "MEANMU UN[1.00E-004,1.00E-3,0.0005,2]", "GAMMU GA[1.00E-005,1.00E-002,Mean_u,2]", "MEANP UN[1.00E-001,3.00E-001,0.22,2]", - "GAMP GA[1.00E-002,9.00E-001,Mean_P,2]", + "GAMP GA[1.00e-002,9.00E-001,Mean_P,2]", "MEANSNI LU[1.00E-008,1.00E-005,1.00E-007,2]", "GAMSNI GA[1.00E-009,1.00E-004,Mean_u_SNI,2]" ) @@ -133,7 +133,7 @@ test_that("check_header_group_prior", { ## SNP content <- c( - "MEANMU UN[1.00E-9,1.00E-7,5E-9,2]", + "MEANMU UN[1.00e-9,1.00E-7,5E-9,2]", "GAMMU GA[1.00E-9,1.00E-6,Mean_u,2]", "MEANK1 UN[0.050,20,10,2]", "GAMK1 GA[0.050,20,Mean_k1,2]", From b065000229112ed94b1e406ab871c09be8a7b582 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 19:07:15 +0200 Subject: [PATCH 40/82] useless file --- R-pkg/inst/test_input/bad_files/diyabcGUI_proj.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 R-pkg/inst/test_input/bad_files/diyabcGUI_proj.txt diff --git a/R-pkg/inst/test_input/bad_files/diyabcGUI_proj.txt b/R-pkg/inst/test_input/bad_files/diyabcGUI_proj.txt deleted file mode 100644 index 1fe91afa..00000000 --- a/R-pkg/inst/test_input/bad_files/diyabcGUI_proj.txt +++ /dev/null @@ -1 +0,0 @@ -proj_name:IndSeq_SNP_estim_param From 3619f1d691683292d24638c763634ff7ae23c0fb Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 19:14:47 +0200 Subject: [PATCH 41/82] implement header reader and related functions --- R-pkg/R/41_input_read.R | 411 ++++++++++++++++++++-- R-pkg/tests/testthat/test-41_input_read.R | 227 ++++++++++-- 2 files changed, 577 insertions(+), 61 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 0388ee5d..c9c075ae 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -111,16 +111,18 @@ #' Content: see doc #' @param file_name string, (server-side) path to a headersim file. #' @param file_type string, MIME file type. +#' @param locus_type string, "snp" or "mss" #' @importFrom readr read_file -read_header <- function(file_name, file_type) { +read_header <- function(file_name, file_type, locus_type = "snp") { # init output out <- list( msg = list(), valid = TRUE, data_file = NULL, loci_desc = NULL, n_param = NULL, n_prior = NULL, n_sumstat = NULL, - cond_list = NULL, prior_list = NULL, group_prior_list = NULL, - n_scenario = NULL, scenario_list = NULL, + cond_list = NULL, prior_list = NULL, + n_group = NULL, group_prior_list = NULL, + n_scenario = NULL, scenario_list = NULL, simu_mode = NULL ) @@ -156,10 +158,10 @@ read_header <- function(file_name, file_type) { current_line <- current_line + 1 ## number of parameters and statistics + pttrn <- "^[0-9]+ parameters and [0-9]+ summary statistics$" strng <- header[1] header <- header[-1] current_line <- current_line + 1 - pttrn <- "^[0-9]+ parameters and [0-9]+ summary statistics$" if(!str_detect(strng, pttrn)) { out$valid <- FALSE msg <- tagList( @@ -167,13 +169,14 @@ read_header <- function(file_name, file_type) { "that should match the pattern", tags$code(pttrn), "." ) out$msg <- append(out$msg, list(msg)) - } else { - pttrn <- "[0-9]+(?= parameters)" - out$n_param <- as.integer(str_extract(strng, pttrn)) - pttrn <- "[0-9]+(?= summary statistics)" - out$n_sumstat <- as.integer(str_extract(strng, pttrn)) + return(out) } + pttrn <- "[0-9]+(?= parameters)" + out$n_param <- as.integer(str_extract(strng, pttrn)) + pttrn <- "[0-9]+(?= summary statistics)" + out$n_sumstat <- as.integer(str_extract(strng, pttrn)) + ## empty line strng <- header[1] header <- header[-1] @@ -185,6 +188,7 @@ read_header <- function(file_name, file_type) { "that should be empty." ) out$msg <- append(out$msg, list(msg)) + return(out) } ## scenarios @@ -200,36 +204,313 @@ read_header <- function(file_name, file_type) { "that should match the pattern", tags$code(pttrn), "." ) out$msg <- append(out$msg, list(msg)) - } else { - pttrn <- "[0-9]+(?= scenarios:)" - out$n_scenario <- as.integer(str_extract(strng, pttrn)) - pttrn <- "(?<= )[0-9]+" - nrow_per_scenario <- as.integer(unlist(str_extract_all(strng, pttrn))) - # extract scenarii - row_seq <- cumsum(c(1, nrow_per_scenario+1)) - scenario_list <- lapply( - split( - raw_content[(min(line_seq):(max(line_seq)-1)) - + next_sec_line], - rep(seq(line_seq), diff(c(line_seq, max(line_seq)))) - ), - function(content) { - parse_diyabc_header_scenarii(content) - }) - # check extracted scnenarii - valid <- all(unlist(lapply(scenario_list, - function(item) item$valid))) - # extract raw scenario list - raw_scenario_list <- unlist(unname( - lapply( - scenario_list, - function(item) item$raw_scenario + return(out) + } + + ## scenario config + pttrn <- "[0-9]+(?= scenarios:)" + out$n_scenario <- as.integer(str_extract(strng, pttrn)) + pttrn <- "(?<= )[0-9]+" + nrow_per_scenario <- as.integer(unlist(str_extract_all(strng, pttrn))) + ## extract scenarii + row_seq <- cumsum(c(1, nrow_per_scenario+1)) + parsed_scenario_list <- lapply( + split( + header[(min(row_seq):(max(row_seq)-1))], + rep(seq(row_seq), diff(c(row_seq, max(row_seq)))) + ), + function(content) parse_header_scenario(content) + ) + # next + header <- header[-(1:(max(row_seq)-1))] + current_line <- current_line + max(row_seq) - 1 + # check extracted scnenarii + scen_check <- unlist(lapply( + parsed_scenario_list, function(item) item$valid + )) + scen_id <- unlist(lapply( + parsed_scenario_list, function(item) item$id + )) + if(!all(scen_check)) { + out$valid <- FALSE + msg <- tagList( + "Issue with format of following scenarii:", + str_c(scen_id[!scen_check], collapse = ", "), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + # extract raw scenario list + out$scenario_list <- unlist(unname(lapply( + parsed_scenario_list, + function(item) item$scenario + ))) + + ## empty line + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(strng != "") { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should be empty." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## historical parameters + pttrn <- "^historical parameters priors \\([0-9]+,[0-9]+\\)$" + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + # find section + if(!any(str_detect(strng, pttrn))) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # number of priors + pttrn <- "(?<= \\()[0-9]+" + out$n_prior <- as.integer(str_extract(strng, pttrn)) + # number of conditions + pttrn <- "[0-9]+(?=\\)$)" + out$n_cond <- as.integer(str_extract(strng, pttrn)) + + ## parameter config + # extract priors + out$prior_list <- header[1:out$n_prior] + header <- header[-(1:out$n_prior)] + current_line <- current_line + out$n_prior + # check extracted priors + prior_check <- unlist(lapply(out$prior_list, check_header_prior)) + if(!all(prior_check)) { + out$valid <- FALSE + msg <- tagList( + "Issue with format of parameter priors at lines:", + str_c(which(!prior_check) + current_line, collapse = ", "), + "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # extract conditions + if(out$n_cond > 0) { + + out$cond_list <- header[1:out$n_cond] + header <- header[-(1:out$n_cond)] + current_line <- current_line + out$n_cond + # check extracted conds + cond_check <- unlist(lapply(out$cond_list, check_header_cond)) + if(!all(cond_check)) { + out$valid <- FALSE + msg <- tagList( + "Issue with format of parameter conditions at lines:", + str_c(which(!cond_check) + current_line, collapse = ", "), + "." ) - )) - # next section - next_sec_line <- next_sec_line + max(line_seq) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # generation mode + pttrn <- "DRAW UNTIL" + out$simu_mode <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(out$simu_mode != pttrn) { + out$valid <- FALSE + msg <- tagList( + "Missing 'DRAW UNTIL' after conditions at lines:", + as.character(current_line+1), + "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + } + ## empty line (and check for unnecessary simulation mode) + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(strng != "") { + if(strng == "DRAW UNTIL") { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "unnecessary 'DRAW UNTIL'." + ) + out$msg <- append(out$msg, list(msg)) + } else { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should be empty." + ) + out$msg <- append(out$msg, list(msg)) + } + return(out) + } + + ## loci description + pttrn <- "^loci description \\([0-9]+\\)$" + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + # find section + if(!any(str_detect(strng, pttrn))) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # number of loci description + pttrn <- "(?<=^loci description \\()[0-9]+" + out$n_loci_desc <- as.integer(str_extract(strng, pttrn)) + + # extract loci description + out$loci_desc <- header[1:out$n_loci_desc] + header <- header[-(1:out$n_loci_desc)] + current_line <- current_line + out$n_loci_desc + # check extracted loci description + loci_desc_check <- unlist(lapply( + out$loci_desc, + check_header_loci_desc, type = locus_type + )) + if(!all(loci_desc_check)) { + out$valid <- FALSE + msg <- tagList( + "Issue with format of locus description at lines:", + str_c(which(!loci_desc_check) + current_line, collapse = ", "), + "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## empty line + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(strng != "") { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should be empty." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## group prior (for microsat/sequence) + if(locus_type == "mss") { + + pttrn <- "^group priors \\([0-9]+\\)$" + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + # find section + if(!any(str_detect(strng, pttrn))) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # number of group prior + pttrn <- "(?<=^group priors \\()[0-9]" + out$n_group <- as.integer(str_extract(strng, pttrn)) + + # find next section + tmp_current_line <- current_line + end_sec <- head(which(header == ""), 1) + content <- header[1:(end_sec-1)] + header <- header[-(1:(end_sec-1))] + current_line <- current_line + end_sec - 1 + + # extract group prior + check_group_prior <- parse_header_group_prior( + content, out$n_group, tmp_current_line + ) + + if(!check_group_prior$valid) { + out$valid <- FALSE + msg <- tagList( + "Issue with format of group prior around line ", + tags$b(as.character(check_group_prior$current_line)), + "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + current_line <- check_group_prior$current_line + out$group_prior_list <- check_group_prior$group_prior + + ## empty line + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + if(strng != "") { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should be empty." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + } + + ## group summary statistics + pttrn <- "^group summary statistics \\([0-9]+\\)$" + strng <- header[1] + header <- header[-1] + current_line <- current_line + 1 + # find section + if(!any(str_detect(strng, pttrn))) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "that should match the pattern", tags$code(pttrn), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # number of summary stats + pttrn <- "(?<=^group summary statistics \\()[0-9]+" + tmp_n_sumstat <- sum(as.integer(str_extract(strng, pttrn)), na.rm = TRUE) + # check + if(out$n_sumstat != tmp_n_sumstat) { + out$valid <- FALSE + msg <- tagList( + "Issue with line", tags$b(as.character(current_line)), + "the number of summary statistics is different", + "from the number given in line 2." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## + # TODO: parse end of file # output return(out) @@ -242,7 +523,7 @@ read_header <- function(file_name, file_type) { #' @description #' Content: see doc #' @param content string vector, scenario description -parse_scenario_header <- function(content) { +parse_header_scenario <- function(content) { # init output out <- list( @@ -274,6 +555,62 @@ parse_scenario_header <- function(content) { } +#' Parse scenario in header file +#' @keywords internal +#' @author Ghislain Durif +#' @description +#' Content: see doc +#' @param content string vector, scenario description +parse_header_group_prior <- function(content, n_group, current_line) { + + # init output + out <- list( + valid = FALSE, ind = NULL, group_prior = NULL, mss_type = NULL, + current_line = current_line + ) + + # loop over group priors + for(ind in 1:n_group) { + # which group + out$ind <- ind + # description (1st line) + pttrn <- str_c("^group G[0-9]+ \\[(M|S)\\]$") + strng <- content[1] + content <- content[-1] + out$current_line <- out$current_line + 1 + if(!str_detect(strng, pttrn)) { + out$valid <- FALSE + return(out) + } else { + group_prior_head <- strng + # locus type + pttrn <- "(?<=\\[)(M|S)(?=\\])" + out$mss_type[ind] <- str_extract(strng, pttrn) + n_line <- switch ( + out$mss_type[ind], + "M" = 6, + "S" = 7, + NA + ) + out$valid <- check_header_group_prior( + head(content, n_line), type = out$mss_type[ind] + ) + if(!out$valid) { + return(out) + } + out$group_prior[ind] <- str_c( + c(group_prior_head, head(content, n_line)), collapse = "\n" + ) + content <- content[-(1:n_line)] + out$current_line <- out$current_line + n_line + } + } + + ## output + return(out) +} + + #' Read and parse statobsRF.txt file #' @keywords internal diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index cf9e18c7..964c74c3 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -15,45 +15,203 @@ context("41_input_read") test_that("read_header", { - # existing file + ## SNP IndSeq + # estim param test_proj <- "IndSeq_SNP_estim_param" test_dir <- file.path(data4test_dir(), test_proj) file_name <- file.path(test_dir, "headerRF.txt") file_type <- "text/plain" - tmp <- read_statobs(file_name, file_type, n_stat = 130) - expect_true(tmp$valid) + locus_type = "snp" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") + expect_equal(res$n_param, 8) + expect_equal(res$n_sumstat, 130) + expect_equal(res$n_scenario, 1) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 2) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 1) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 8) + expect_equal(res$n_prior, length(res$prior_list)) - # bad number of summary stats - test_proj <- "IndSeq_SNP_estim_param" - test_dir <- file.path(data4test_dir(), test_proj) - file_name <- file.path(test_dir, "statobsRF.txt") + # indeseq -> bad header file + test_proj <- "bad_files" + test_dir <- file.path(test_input_dir(), "bad_files") + file_name <- file.path(test_dir, "headerRF.txt") file_type <- "text/plain" - res <- read_statobs(file_name, file_type, n_stat = 120) + locus_type = "mss" + res <- read_header(file_name, file_type, locus_type) expect_false(res$valid) - expect_true(length(res$msg) == 1) + expect_equal(length(res$msg), 1) - # unexisting file - file_name <- file.path(test_dir, "toto.txt") - tmp <- read_statobs(file_name, file_type, n_stat = 130) - expect_false(tmp$valid) - expect_true(length(tmp$msg) == 1) + # model choice + test_proj <- "IndSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "headerRF.txt") + file_type <- "text/plain" + locus_type = "snp" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") + expect_equal(res$n_param, 13) + expect_equal(res$n_sumstat, 130) + expect_equal(res$n_scenario, 6) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 6) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 1) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 13) + expect_equal(res$n_prior, length(res$prior_list)) - # unexisting file and wrong file type - file_name <- file.path(test_dir, "toto.txt") - file_type <- "toto" - tmp <- read_statobs(file_name, file_type, n_stat = 130) - expect_false(tmp$valid) - expect_true(length(tmp$msg) == 2) + ## SNP PoolSeq + # estim param + test_proj <- "PoolSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "headerRF.txt") + file_type <- "text/plain" + locus_type = "snp" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") + expect_equal(res$n_param, 8) + expect_equal(res$n_sumstat, 130) + expect_equal(res$n_scenario, 1) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 2) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 1) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 8) + expect_equal(res$n_prior, length(res$prior_list)) + + # model choice + test_proj <- "PoolSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "headerRF.txt") + file_type <- "text/plain" + locus_type = "snp" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") + expect_equal(res$n_param, 13) + expect_equal(res$n_sumstat, 130) + expect_equal(res$n_scenario, 6) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 6) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 1) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 13) + expect_equal(res$n_prior, length(res$prior_list)) + + ## MSS + # microsat + test_proj <- "Microsat" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "header.txt") + file_type <- "text/plain" + locus_type = "mss" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal( + res$data_file, + "simu_dataset_microsat_one_pop_bottleneck_multisamples_001.mss" + ) + expect_equal(res$n_param, 6) + expect_equal(res$n_sumstat, 40) + expect_equal(res$n_scenario, 2) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 0) + expect_equal(res$n_cond, length(res$cond_list)) + expect_null(res$simu_mode) + expect_equal(res$n_loci_desc, 50) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 3) + expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$n_group, 1) + expect_equal(res$n_group, length(res$group_prior_list)) + + # microsat sequence 1 + test_proj <- "Microsat_Sequences" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "header.txt") + file_type <- "text/plain" + locus_type = "mss" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "mss_example_001.mss") + expect_equal(res$n_param, 9) + expect_equal(res$n_sumstat, 2) + expect_equal(res$n_scenario, 1) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 2) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 28) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 9) + expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$n_group, 2) + expect_equal(res$n_group, length(res$group_prior_list)) + + # microsat sequence 1 -> bad header file + test_proj <- "bad_files" + test_dir <- file.path(test_input_dir(), "bad_files") + file_name <- file.path(test_dir, "header.txt") + file_type <- "text/plain" + locus_type = "mss" + res <- read_header(file_name, file_type, locus_type) + expect_false(res$valid) + expect_equal(length(res$msg), 1) + + # microsat sequence 2 + test_proj <- "Microsat_Sequences2" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "header.txt") + file_type <- "text/plain" + locus_type = "mss" + res <- read_header(file_name, file_type, locus_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, "toytest2_micro_seq_complexe_001.mss") + expect_equal(res$n_param, 22) + expect_equal(res$n_sumstat, 85) + expect_equal(res$n_scenario, 2) + expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_cond, 2) + expect_equal(res$n_cond, length(res$cond_list)) + expect_equal(res$simu_mode, "DRAW UNTIL") + expect_equal(res$n_loci_desc, 28) + expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_prior, 9) + expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$n_group, 5) + expect_equal(res$n_group, length(res$group_prior_list)) }) -test_that("parse_scenario_header", { + +test_that("parse_header_scenario", { # model choice test_proj <- "IndSeq_SNP_model_choice" test_dir <- file.path(data4test_dir(), test_proj) file_name <- file.path(test_dir, "headerRF.txt") - content <- str_split(read_file(file_name), "\n", simplify = TRUE) + header <- str_split(read_file(file_name), "\n", simplify = TRUE) - res <- parse_scenario_header(content[5:13]) + content <- header[5:13] + res <- parse_header_scenario(content) expect_equal( res, @@ -63,11 +221,32 @@ test_that("parse_scenario_header", { ) ) - res <- parse_scenario_header(content[6:13]) + content <- header[6:13] + res <- parse_header_scenario(content) expect_false(res$valid) }) + +test_that("parse_header_group_prior", { + # microsat + test_proj <- "Microsat_Sequences" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "header.txt") + header <- str_split(read_file(file_name), "\n", simplify = TRUE) + + content <- header[60:74] + n_group <- 2 + current_line <- 59 + res <- parse_header_group_prior(content, n_group, current_line) + + expect_true(res$valid) + expect_equal(length(res$group_prior), 2) + expect_equal(length(res$mss_type), 2) + expect_equal(res$current_line, 74) +}) + + test_that("read_statobs", { # existing file From 4022458b4ff88fa1eef661202467a1c4592571ff Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 30 Apr 2021 19:15:18 +0200 Subject: [PATCH 42/82] remove old functions to parse project header --- R-pkg/R/41_io_management.R | 334 ------------------- R-pkg/tests/testthat/test-41_io_management.R | 119 ------- 2 files changed, 453 deletions(-) diff --git a/R-pkg/R/41_io_management.R b/R-pkg/R/41_io_management.R index 22a8aa5d..757e1e39 100644 --- a/R-pkg/R/41_io_management.R +++ b/R-pkg/R/41_io_management.R @@ -1374,340 +1374,6 @@ check_mss_data_file <- function(data_file, data_dir, return(out) } -#' Parse diyabc header file -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param file_name string, (server-side) path to a header file. -#' @param file_type string, MIME file type. -#' @param locus_type string, `"mss"` for MicroSat/Sequence or `"snp"` for SNP. -parse_diyabc_header <- function(file_name, file_type, locus_type) { - # init output - data_file <- NULL - issues <- list() - loci_description <- NULL - n_loci_des <- NULL - n_param <- NULL - n_prior <- NULL - n_sumstat <- NULL - raw_cond_list <- NULL - raw_prior_list <- NULL - raw_group_prior_list <- NULL - raw_scenario_list <- NULL - simu_mode <- NULL - valid <- TRUE - # check file name - valid <- check_file_name(file_name) - # check file type - if(file_type != "text/plain") - valid <- FALSE - ## read file - if(valid) { - # local variable - next_sec_line <- 3 - # file content - raw_content <- readLines(file_name, warn = FALSE) - raw_content <- raw_content[raw_content != ""] - ## data file - data_file <- raw_content[1] - ## number of paramters and statistics - strng <- raw_content[2] - pttrn <- "^[0-9]+ parameters and [0-9]+ summary statistics$" - if(!str_detect(strng, pttrn)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - pttrn <- "[0-9]+(?= parameters)" - n_param <- as.integer(str_extract(strng, pttrn)) - pttrn <- "[0-9]+(?= summary statistics)" - n_sumstat <- as.integer(str_extract(strng, pttrn)) - } - - ## scenarios - pttrn <- "^[0-9]+ scenarios:( [0-9]+)+ ?$" - # find section - if(!any(str_detect(raw_content, pttrn))) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(!(which(str_detect(raw_content, pttrn)) == next_sec_line)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - strng <- raw_content[3] - pttrn <- "[0-9]+(?= scenarios:)" - n_scenario <- as.integer(str_extract(strng, pttrn)) - pttrn <- "(?<= )[0-9]+" - lines_per_scenario <- as.integer(unlist(str_extract_all(strng, - pttrn))) - # extract scenarii - line_seq <- cumsum(c(1, lines_per_scenario+1)) - scenario_list <- lapply( - split( - raw_content[(min(line_seq):(max(line_seq)-1)) - + next_sec_line], - rep(seq(line_seq), diff(c(line_seq, max(line_seq)))) - ), - function(content) { - parse_diyabc_header_scenarii(content) - }) - # check extracted scnenarii - valid <- all(unlist(lapply(scenario_list, - function(item) item$valid))) - # extract raw scenario list - raw_scenario_list <- unlist(unname( - lapply( - scenario_list, - function(item) item$raw_scenario - ) - )) - # next section - next_sec_line <- next_sec_line + max(line_seq) - } - - ## historical parameters - pttrn <- "^historical parameters priors \\([0-9]+,[0-9]+\\)$" - # find section - if(!any(str_detect(raw_content, pttrn))) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(!(which(str_detect(raw_content, pttrn)) == next_sec_line)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - strng <- raw_content[next_sec_line] - next_sec_line <- next_sec_line + 1 - # number of priors - pttrn <- "(?<= \\()[0-9]+" - n_prior <- as.integer(str_extract(strng, pttrn)) - # number of conditions - pttrn <- "[0-9]+(?=\\)$)" - n_cond <- as.integer(str_extract(strng, pttrn)) - # extract priors - raw_prior_list <- raw_content[next_sec_line:(next_sec_line - + n_prior - 1)] - next_sec_line <- next_sec_line + n_prior - # check extracted priors - valid <- all(unlist(lapply(raw_prior_list, check_header_prior))) - # extract conditions - if(n_cond > 0) { - raw_cond_list <- raw_content[next_sec_line:(next_sec_line - + n_cond - 1)] - next_sec_line <- next_sec_line + n_cond - # check extracted conditions - valid <- all(unlist(lapply(raw_cond_list, check_header_cond))) - - # generation mode - simu_mode <- raw_content[next_sec_line] - next_sec_line <- next_sec_line + 1 - if(simu_mode != "DRAW UNTIL") { - issues <- append(issues, "missing 'DRAW UNTIL'") - valid <- FALSE - } - } else { - if(str_detect(raw_content[next_sec_line], "DRAW UNTIL")) { - issues <- append(issues, "unnecessary 'DRAW UNTIL'") - valid <- FALSE - } - } - } - - ## loci description - pttrn <- "^loci description \\([0-9]+\\)$" - # find section - if(!any(str_detect(raw_content, pttrn))) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(!(which(str_detect(raw_content, pttrn)) == next_sec_line)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - strng <- raw_content[next_sec_line] - next_sec_line <- next_sec_line + 1 - # number of loci description - pttrn <- "(?<=^loci description \\()[0-9]+" - n_loci_des <- as.integer(str_extract(strng, pttrn)) - # extract loci description - loci_description <- raw_content[next_sec_line:(next_sec_line - + n_loci_des - 1)] - next_sec_line <- next_sec_line + n_loci_des - # check loci description - valid <- all(unlist(lapply(loci_description, - check_header_loci_des))) - } - - ## group prior (for microsat/sequence) - if(locus_type == "mss") { - pttrn <- "^group priors \\([0-9]+\\)$" - # find section - if(!any(str_detect(raw_content, pttrn))) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(!(which(str_detect(raw_content, pttrn)) == next_sec_line)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - strng <- raw_content[next_sec_line] - next_sec_line <- next_sec_line + 1 - # check next section - pttrn <- "^group summary statistics \\([0-9]+\\)$" - tmp_next <- head(which(str_detect(raw_content, pttrn)), 1) - if(length(tmp_next) == 0) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(tmp_next <= next_sec_line) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - # extract info - raw_group_prior_list <- raw_content[next_sec_line:(tmp_next-1)] - - # next section - next_sec_line <- tmp_next - } - } - } - - ## group summary statistics - pttrn <- "^group summary statistics \\([0-9]+\\)$" - # find section - if(!any(str_detect(raw_content, pttrn))) { - issues <- append(issues, pttrn) - valid <- FALSE - } else if(!(head(which(str_detect(raw_content, pttrn)), 1) - == next_sec_line)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - strng <- raw_content[next_sec_line:length(raw_content)] - next_sec_line <- next_sec_line + 1 - # number of summary stats - pttrn <- "(?<=^group summary statistics \\()[0-9]+" - tmp_n_sumstat <- sum(as.integer(str_extract(strng, pttrn)), - na.rm = TRUE) - # check - valid <- (n_sumstat == tmp_n_sumstat) - # TODO: parse end of file - } - } - ## output - return(lst(data_file, loci_description, - n_loci_des, n_param, n_prior, n_sumstat, - raw_cond_list, raw_prior_list, raw_group_prior_list, - raw_scenario_list, simu_mode, valid)) -} - -#' Parse diyabc header file scenarii -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param content string vector, scenario description -parse_diyabc_header_scenarii <- function(content) { - # init output - issues <- list() - id <- NULL - n_param <- NULL - prior <- NULL - raw_scenario <- NULL - valid <- TRUE - # first line - strng <- content[1] - pttrn <- str_c("^scenario [0-9]+ \\[", num_regex(), "\\] \\([0-9]+\\)$") - if(!str_detect(strng, pttrn)) { - issues <- append(issues, pttrn) - valid <- FALSE - } else { - # scenario id - pttrn <- "(?<=^scenario )[0-9]+" - id <- as.integer(str_extract(strng, pttrn)) - # scenario prior - pttrn <- str_c("(?<= \\[)", num_regex(), "(?=\\] )") - prior <- as.numeric(str_extract(strng, pttrn)) - # number of parameters in scenario - pttrn <- "(?<= \\()[0-9]+(?=\\)$)" - n_param <- as.integer(str_extract(strng, pttrn)) - ## raw scenario - raw_scenario <- str_c(content[-1], collapse = "\n") - } - ## output - return(lst(id, n_param, prior, raw_scenario, valid)) -} - -#' Check header file prior definition -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param cstrng string, prior description. -check_header_prior <- function(strng) { - # init output - issues <- list() - valid <- TRUE - # check - pttrn <- str_c(single_param_regex(), " ", - "(N|T|A)", " ", - "(UN|LU|NO|LN)", "\\[", - str_c(rep(num_regex(), 4), collapse = ","), - "\\]") - valid <- str_detect(strng, pttrn) - ## output - return(valid) -} - -#' Check header file condition definition -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param strng string, prior description. -check_header_cond <- function(strng) { - # init output - issues <- list() - valid <- TRUE - # check - pttrn <- str_c("^", single_param_regex(), "(<|=<|>|>=)", - single_param_regex(), "$") - valid <- str_detect(strng, pttrn) - ## output - return(valid) -} - -#' Check header file loci description -#' @keywords internal -#' @author Ghislain Durif -#' @description -#' Content: see doc -#' @param strng string, prior description. -#' @param type string, `"mss"` or `"snp"` -check_header_loci_des <- function(strng, type = "mss") { - # init output - issues <- list() - valid <- TRUE - # init local - pttrn <- NULL - # Microsat/Sequence - if(type == "mss") { - # Locus_M_A_1_ [M] G1 2 40 - # Locus_S_A_21_ [S] G4 1000 - pttrn <- str_c("^", single_param_regex(), " ", - "<(A|H|X|Y|M)>", " ", - "\\[(M|S)\\]", " ", - "G[0-9]+", " ", - "[0-9]+", "( [0-9]+)?", "$") - } else if(type == "snp") { - # 5000 G1 from 1 - pttrn <- str_c("^", "[0-9]+", " ", - "<(A|H|X|Y|M)>", " ", - "G[0-9]+", " ", - "from ", "[0-9]+", "$") - } - # check - valid <- str_detect(strng, pttrn) - ## output - return(valid) -} - #' Parse diyabc simulation header file #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-41_io_management.R b/R-pkg/tests/testthat/test-41_io_management.R index a86bddf0..62c58633 100644 --- a/R-pkg/tests/testthat/test-41_io_management.R +++ b/R-pkg/tests/testthat/test-41_io_management.R @@ -230,122 +230,3 @@ test_that("check_file_name", { expect_false(check_file_name(file.path(fs::path_home(), "not_existing_file"))) }) - - -test_that("parse_diyabc_header", { - # model choice - file_name <- file.path(example_dir(), - "IndSeq_SNP_model_choice", - "headerRF.txt") - file_type = "text/plain" - locus_type = "snp" - expect_equal( - parse_diyabc_header(file_name, file_type, locus_type), - list( - data_file="indseq_SNP_sim_dataset_4POP_001.snp", - loci_description="5000 G1 from 1", - n_loci_des=1, n_param=13, n_prior=13, n_sumstat=2, - raw_cond_list=c("t21>t32", "t42t423", - "t431t32", "t431 Date: Mon, 3 May 2021 10:10:19 +0200 Subject: [PATCH 43/82] function to read reftable header --- R-pkg/R/41_input_read.R | 27 +++++++++++- R-pkg/tests/testthat/test-41_input_read.R | 54 +++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index c9c075ae..cf55f76d 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -687,6 +687,7 @@ read_statobs <- function(file_name, file_type, n_stat) { #' Read and parse reftableRF.bin file #' @keywords internal #' @author Ghislain Durif +#' @author François-David Collin #' @description #' Content: see doc #' @param file_name string, (server-side) path to a headersim file. @@ -694,7 +695,11 @@ read_statobs <- function(file_name, file_type, n_stat) { read_reftable <- function(file_name, file_type) { # init output - out <- list(msg = list(), valid = TRUE) + out <- list( + msg = list(), valid = TRUE, + n_rec = NULL, n_scen = NULL, n_rec_scen = NULL, n_param = NULL, + n_stat = NULL + ) # check file_name tmp <- check_file_name(file_name) @@ -716,6 +721,26 @@ read_reftable <- function(file_name, file_type) { return(out) } + + ## Reftable feed + # Stream from reftable file + to_read = file(file_name,"rb") + # number of records + out$n_rec <- readBin(to_read, integer(), endian = "little") + # number of scenarios + out$n_scen = readBin(to_read, integer(), endian = "little") + # number of records for each scenario + out$n_rec_scen = readBin(to_read, integer(), n = out$n_scen, + endian = "little") + # number of used parameters (non constant) + out$n_param = readBin(to_read, integer(), n = out$n_scen, + endian = "little") + # number of stats + out$n_stat = readBin(to_read, integer(), endian = "little") + + # close stream + close(to_read) + # output return(out) } \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 964c74c3..0344d28d 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -279,3 +279,57 @@ test_that("read_statobs", { expect_false(tmp$valid) expect_true(length(tmp$msg) == 2) }) + + +test_that("read_reftable", { + + ## SNP estim param + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "reftableRF.bin") + file_type <- "application/octet-stream" + res <- read_reftable(file_name, file_type) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$n_rec, 12000) + expect_equal(res$n_scen, 1) + expect_equal(res$n_rec_scen, 12000) + expect_equal(res$n_param, 8) + expect_equal(res$n_stat, 130) + + ## SNP model choice + test_proj <- "IndSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "reftableRF.bin") + file_type <- "application/octet-stream" + res <- read_reftable(file_name, file_type) + expect_equal(length(res$msg), 0) + expect_equal(res$n_rec, 12000) + expect_equal(res$n_scen, 6) + expect_equal(res$n_rec_scen, c(1942L, 1950L, 2028L, 2001L, 1994L, 2085L)) + expect_equal(res$n_param, c(8L, 8L, 8L, 7L, 7L, 7L)) + expect_equal(res$n_stat, 130) + + ## wrong file type + test_proj <- "IndSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + file_name <- file.path(test_dir, "reftableRF.bin") + file_type <- "text/plain" + res <- read_reftable(file_name, file_type) + expect_false(res$valid) + expect_true(length(res$msg) == 1) + + ## unexisting file + file_name <- file.path(test_dir, "toto.txt") + file_type <- "application/octet-stream" + res <- read_reftable(file_name, file_type) + expect_false(res$valid) + expect_true(length(res$msg) == 1) + + ## unexisting file and wrong file type + file_name <- file.path(test_dir, "toto.txt") + file_type <- "toto" + res <- read_reftable(file_name, file_type) + expect_false(res$valid) + expect_true(length(res$msg) == 2) +}) From eedcbee450ce056519c70683f524ad480e4d7a10 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 10:11:28 +0200 Subject: [PATCH 44/82] remove commented lines regarding header reader unused internal function --- R-pkg/R/41_input_read.R | 106 ---------------------- R-pkg/tests/testthat/test-41_input_read.R | 13 --- 2 files changed, 119 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index cf55f76d..64b1d7d4 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -1,109 +1,3 @@ -#' Internal function to read header file -#' #' @keywords internal -#' #' @author Ghislain Durif -#' #' @description -#' #' Content: see doc -#' #' @param file_name string, (server-side) path to a headersim file. -#' #' @param file_type string, MIME file type. -#' #' @importFrom readr read_file -#' header_reader <- function(file_name) { -#' -#' ## Headers parsing -#' header <- read_file(file_name) # whole file in one string -#' -#' ### First part, scenarios -#' -#' ## extraction of scenarios descriptions -#' regex_scen <- "\\bscenario\\s+(\\d+)\\s+.*\\n((?:(?!(?:scenario|\\n)).*\\n)+)" -#' -#' # split by scenario -#' scen_prematch <- str_extract_all(header,regex_scen)[[1]] -#' -#' # number of scenarios -#' n_scen <- length(scen_prematch) -#' -#' # scenario match -#' scen_match <- str_match_all(scen_prematch,rescen) -#' -#' ## vector of scenario order -#' scen_ord <- unlist(lapply( -#' 1:n_scen, function(ind) return(as.integer(scen_match[[ind]][2])) -#' )) -#' -#' ## vector of scenario descriptions -#' scen_desc <- unlist(lapply( -#' 1:n_scen, function(ind) return(scen_match[[i]][3]) -#' )) -#' scen_desc <- scen_desc[scen_ord] -#' -#' ### Second part, hist params -#' -#' -#' ## extraction of number of parameters -#' regex_paramtot <- "historical parameters priors \\((\\d+)\\D" -#' ## total number of hist params in header -#' n_paramtot <- as.integer(str_match( -#' str_extract(header, regex_paramtot)[[1]], -#' regex_paramtot -#' )[2]) -#' -#' ## extraction of parameters names (and the corresponding law too, -#' ## in order to check if it is a dirac base, aka constant) -#' regex_paramlist <- paste0( -#' "\\bhistorical parameters priors.*\\n((?:\\w+\\W[^\\n]*\\n){", -#' n_paramtot, "})" -#' ) -#' paramlistmatch <- str_match(str_extract_all(header,reparamlist),reparamlist)[2] -#' ## regexp to extract : param name, mini and maximum of the law -#' reparam <- "(\\w+)\\W+\\w\\W+\\w\\w\\[([^,\\]]+),([^,\\]]+)[,\\]][^\\n]*\\n" -#' paramsh <- str_match_all(str_extract_all(paramlistmatch,reparam)[[1]],reparam) -#' paramsdesc <- list() -#' reali <- 1; -#' ## loop on all hist parameters to "filter" all constants -#' ## parameters -#' for(i in 1:nparamtoth) -#' { -#' mini <- as.numeric(paramsh[[i]][3]) -#' maxi <- as.numeric(paramsh[[i]][4]) -#' if(maxi != 0.0) -#' if ((maxi-mini)/maxi > 0.000001) -#' { -#' paramsdesc[paramsh[[i]][2]] <- reali -#' reali <- reali + 1 -#' } -#' } -#' realparamtot <- reali - 1 # this is the real number of non constant -#' # historical parameters -#' -#' ### Third part, extract parameters for each scenario -#' # parambyscenh stores the indices of used parameters for each -#' # scenario -#' parambyscenh <- vector(mode="numeric",length=nscenh) -#' # Loop for all scenarios -#' for(i in 1:nscenh) -#' { -#' templist <- list() -#' # list of terms used in scenario descriptions -#' listterms <- strsplit(scendesc[i],"\\W")[[1]] -#' m <- 1 -#' for(j in 1:length(listterms)) -#' { -#' if (!is.null(paramsdesc[listterms[j]][[1]])) -#' { -#' templist[m] <- paramsdesc[listterms[j]][[1]] -#' m <- m + 1 -#' } -#' } -#' parambyscenh[i] <- list((unique(unlist(templist)))) -#' } -#' -#' # Get all names -#' restatsname <- "\\n\\nscenario\\s+.*" -#' allcolspre <- tail(strsplit(str_extract(header,restatsname),"\\s+")[[1]],-2) -#' -#' } - - #' Read and parse headerRF.txt and header.txt file #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 0344d28d..d8fbc1e3 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -1,18 +1,5 @@ context("41_input_read") -# test_that("header_reader", { -# -# # existing file -# test_proj <- "IndSeq_SNP_model_choice" -# test_dir <- file.path(data4test_dir(), test_proj) -# file_name <- file.path(test_dir, "headerRF.txt") -# file_type <- "text/plain" -# -# tmp <- header_reader(file_name, file_type) -# expect_true(tmp$valid) -# -# }) - test_that("read_header", { ## SNP IndSeq From adee4497b4ec00bc64d25a3e16d3b31dc451b836 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 10:50:46 +0200 Subject: [PATCH 45/82] fix typo --- R-pkg/R/41_input_read.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 64b1d7d4..3bda0f0f 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -26,7 +26,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { tmp <- check_file_name(file_name) if(!tmp) { out$valid <- FALSE - msg <- tagList("Invalid file file.") + msg <- tagList("Invalid file name.") out$msg <- append(out$msg, list(msg)) } @@ -523,7 +523,7 @@ read_statobs <- function(file_name, file_type, n_stat) { tmp <- check_file_name(file_name) if(!tmp) { out$valid <- FALSE - msg <- tagList("Invalid file file.") + msg <- tagList("Invalid file name.") out$msg <- append(out$msg, list(msg)) } @@ -599,7 +599,7 @@ read_reftable <- function(file_name, file_type) { tmp <- check_file_name(file_name) if(!tmp) { out$valid <- FALSE - msg <- tagList("Invalid file file.") + msg <- tagList("Invalid file name.") out$msg <- append(out$msg, list(msg)) } @@ -615,7 +615,6 @@ read_reftable <- function(file_name, file_type) { return(out) } - ## Reftable feed # Stream from reftable file to_read = file(file_name,"rb") From 4959b021d90748d06ed0f3b76d4c61198d98db3a Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 10:51:20 +0200 Subject: [PATCH 46/82] function to test all input files (except the data file) from a project --- R-pkg/R/62_analysis_project_io.R | 95 ++++++++++++++++++- .../testthat/test-62_analysis_project_io.R | 93 ++++++++++++++++++ 2 files changed, 185 insertions(+), 3 deletions(-) diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index 7dd2dbe9..528808da 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -208,8 +208,97 @@ proj_zip_file_input <- function(file_input) { #' Check existing project related files #' @keywords internal #' @author Ghislain Durif -#' @param file_input data.frame with fields name (chr), size (int), #' @param proj_dir character string, path to project directory. -check_proj_file <- function(proj_dir) { - # WRITEME +#' @param locus_type character string, `"snp"` or `"mss"`. +#' @importFrom mime guess_type +check_proj_file <- function(proj_dir, locus_type = "snp") { + + # init output + out <- list( + msg = list(), valid = TRUE, + header_check = NULL, statobs_check = NULL, + reftable_check = NULL + ) + + # check project directory + if(!dir.exists(proj_dir)) { + out$valid <- FALSE + msg <- tagList("Input project directory does not exist.") + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # project files + proj_file_list <- list.files(proj_dir) + + # check header + if(any(c("header.txt", "headerRF.txt") %in% proj_file_list)) { + header_file <- file.path( + proj_dir, + ifelse( + "headerRF.txt" %in% proj_file_list, + "headerRF.txt", "header.txt" + ) + ) + out$header_check <- tryCatch( + read_header( + file_name = header_file, + file_type = mime::guess_type(header_file), + locus_type = locus_type + ), + error = function(e) return(NULL) + ) + if(is.null(out$header_check)) { + out$valid <- FALSE + msg <- tagList("Issue when checking project header file.") + out$msg <- append(out$msg, list(msg)) + } + } + + # check reftable + if("reftableRF.bin" %in% proj_file_list) { + reftable_file <- file.path(proj_dir, "reftableRF.bin") + out$reftable_check <- tryCatch( + read_reftable( + file_name = reftable_file, + file_type = mime::guess_type(reftable_file) + ), + error = function(e) return(NULL) + ) + if(is.null(out$reftable_check)) { + out$valid <- FALSE + msg <- tagList("Issue when checking project reftable file.") + out$msg <- append(out$msg, list(msg)) + } + } + + # check statobs + if("statobsRF.txt" %in% proj_file_list) { + if(!is.null(out$reftable_check) && + !is.null(out$reftable_check$n_stat)) { + statobs_file <- file.path(proj_dir, "statobsRF.txt") + out$statobs_check <- tryCatch( + read_statobs( + file_name = statobs_file, + file_type = mime::guess_type(statobs_file), + n_stat = out$reftable_check$n_stat + ), + error = function(e) return(NULL) + ) + if(is.null(out$statobs_check)) { + out$valid <- FALSE + msg <- tagList("Issue when checking project statobs file.") + out$msg <- append(out$msg, list(msg)) + } + } else { + out$valid <- FALSE + msg <- tagList( + "Impossible to check project statobs file", + "because issue with reftable file.") + out$msg <- append(out$msg, list(msg)) + } + } + + # output + return(out) } \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R index f22da900..f61c1b96 100644 --- a/R-pkg/tests/testthat/test-62_analysis_project_io.R +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -206,3 +206,96 @@ test_that("proj_zip_file_input", { expect_false(zip_proj$valid) expect_true(length(zip_proj$msg) == 1) }) + + +test_that("check_proj_file", { + + ## SNP IndSeq + # estim param + test_proj <- "IndSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "snp") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_true(res$reftable_check$valid) + expect_true(res$statobs_check$valid) + + # bad locus type + res <- check_proj_file(proj_dir = test_dir, locus_type = "mss") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_false(res$header_check$valid) + expect_true(res$reftable_check$valid) + expect_true(res$statobs_check$valid) + + # model choice + test_proj <- "IndSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "snp") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_true(res$reftable_check$valid) + expect_true(res$statobs_check$valid) + + ## SNP PoolSeq + # estim param + test_proj <- "PoolSeq_SNP_estim_param" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "snp") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_true(res$reftable_check$valid) + expect_true(res$statobs_check$valid) + + # model choice + test_proj <- "PoolSeq_SNP_model_choice" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "snp") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_true(res$reftable_check$valid) + expect_true(res$statobs_check$valid) + + ## MSS + # microsat + test_proj <- "Microsat" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "mss") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_null(res$reftable_check) + expect_null(res$statobs_check) + + # bad locus type + res <- check_proj_file(proj_dir = test_dir, locus_type = "snp") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_false(res$header_check$valid) + expect_null(res$reftable_check) + expect_null(res$statobs_check) + + # microsat sequence 1 + test_proj <- "Microsat_Sequences" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "mss") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_null(res$reftable_check) + expect_null(res$statobs_check) + + # microsat sequence 2 + test_proj <- "Microsat_Sequences2" + test_dir <- file.path(data4test_dir(), test_proj) + res <- check_proj_file(proj_dir = test_dir, locus_type = "mss") + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(res$header_check$valid) + expect_null(res$reftable_check) + expect_null(res$statobs_check) +}) From 8a567349d2c49bacc02079571ab7ace3d75c09ba Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 11:32:23 +0200 Subject: [PATCH 47/82] manage project file input for existing project --- R-pkg/R/61_analysis_module.R | 63 ++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 3cb11c66..494deb89 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -211,7 +211,8 @@ proj_type_ui <- function(id) { ".zip" ) ), - uiOutput(ns("feedbak_existing")) + uiOutput(ns("feedbak_existing")), + uiOutput(ns("proj_file_list")) ), conditionalPanel( condition = "input.proj_type == 'example'", @@ -278,24 +279,68 @@ proj_type_server <- function(input, output, session) { req(input$proj_type == "existing") req(input$file_input) - tmp <- manage_existing_proj_file(input$file_input, env$ap$proj_dir) - - print(tmp) + input_check <- tryCatch( + proj_file_input(input$file_input, env$ap$proj_dir), + error = function(e) return(NULL) + ) output$feedbak_existing <- renderUI({ - if(length(tmp$msg) > 0) { + if(is.null(input_check) || !input_check$valid) { msg <- "Issue(s) with uploaded file(s)." - feedbackWarning("file_input", length(tmp$msg) > 0, msg) - tags$div( - do.call(tags$ul, lapply(tmp$msg, tags$li)), - style = "color: #F89406; margin-top: -15px;" + feedbackWarning( + "file_input", is.null(input_check) || !input_check$valid, + msg ) + if(length(input_check$msg) > 0) { + tags$div( + icon("warning"), "Issue(s) with uploaded file(s):", + do.call(tags$ul, lapply(input_check$msg, tags$li)), + style = "color: #F89406; margin-top: -15px;" + ) + } else { + NULL + } } else { NULL } }) }) + # list of uploaded project files + observeEvent({c(input$proj_type, input$file_input)}, { + req(input$proj_type) + req(input$proj_type == "existing") + + output$proj_file_list <- renderUI({ + proj_file_list <- list.files(env$ap$proj_dir) + + if(length(proj_file_list) > 0) { + helpText( + icon("comment"), "List of upload files:", + tags$div( + do.call(tags$ul, lapply( + list.files(env$ap$proj_dir), + function(item) return(tags$li(tags$code(item))) + )), + style = "column-count:2;" + ) + ) + } else { + tags$div( + icon("warning"), "No file was uploaded.", + style = "color: #F89406;" + ) + } + }) + }) + + # reset file upload when another mode is chosen + observe({ + req(input$proj_type) + req(input$proj_type != "existing") + shinyjs::reset("file_input") + }) + # ## new or existing project # observe({ # req(input$proj_type) From c6a35b47554916e163da06f5c1309e9d1b8c2465 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 16:54:50 +0200 Subject: [PATCH 48/82] feedback when uploading files to an existing project --- R-pkg/R/61_analysis_module.R | 45 +++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 494deb89..385870b7 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -279,11 +279,13 @@ proj_type_server <- function(input, output, session) { req(input$proj_type == "existing") req(input$file_input) + # upload input_check <- tryCatch( proj_file_input(input$file_input, env$ap$proj_dir), error = function(e) return(NULL) ) + # feedback output$feedbak_existing <- renderUI({ if(is.null(input_check) || !input_check$valid) { msg <- "Issue(s) with uploaded file(s)." @@ -314,21 +316,52 @@ proj_type_server <- function(input, output, session) { output$proj_file_list <- renderUI({ proj_file_list <- list.files(env$ap$proj_dir) + expected_files1 <- c("headerRF.txt", "header.txt") + expected_files2 <- c("statobsRF.txt", "reftableRF.bin") + missing_files <- NULL + + missing_header <- !any(expected_files1 %in% proj_file_list) + if(missing_header) { + missing_files <- c(missing_files, "headerRF.txt") + } + + missing_files2 <- !(expected_files2 %in% proj_file_list) + if(any(missing_files2)) { + missing_files <- c(missing_files, + expected_files2[missing_files2]) + } + if(length(proj_file_list) > 0) { - helpText( - icon("comment"), "List of upload files:", + item1 <- helpText( + icon("comment"), "List of uploaded files:", tags$div( do.call(tags$ul, lapply( - list.files(env$ap$proj_dir), + proj_file_list, function(item) return(tags$li(tags$code(item))) - )), - style = "column-count:2;" + )) ) ) + item2 <- NULL + if(length(missing_files) > 0) { + item2 <- tags$div( + icon("warning"), + "Potentially missing files for an existing project:", + tags$div( + do.call(tags$ul, lapply( + missing_files, + function(item) return(tags$li(tags$code(item))) + )) + ), + tags$b("Note:"), + "you will be able to generate them below.", + style = "color: #F89406;" + ) + } + tagList(item1, item2) } else { tags$div( icon("warning"), "No file was uploaded.", - style = "color: #F89406;" + style = "color: #F89406; margin-top: -15px;" ) } }) From 7a98246793b4d6abfe5ad287c41505b0af490743 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 17:16:46 +0200 Subject: [PATCH 49/82] function to clean the current project directory --- R-pkg/R/01_directory.R | 23 +++++++++++++++++++++++ R-pkg/tests/testthat/test-01_directory.R | 21 ++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/01_directory.R b/R-pkg/R/01_directory.R index b85a4e29..a1a2ebea 100644 --- a/R-pkg/R/01_directory.R +++ b/R-pkg/R/01_directory.R @@ -75,3 +75,26 @@ mk_proj_dir <- function(tag = "diyabc") { # output return(tmp_dir) } + +#' Clean project directory +#' @keywords internal +#' @description Remove all files and sub-folders in a given project directory. +#' @author Ghislain Durif +#' @param proj_dir character string, path to project directory. +clean_proj_dir <- function(proj_dir) { + # check if project directory exists + if(length(proj_dir) != 1 && !dir.exists(proj_dir)) { + stop("Input argument should be a valid path to a project directory") + } + # sub-folders + subdir_list <- list.dirs(proj_dir, full.names = FALSE, recursive = FALSE) + subdir_list <- subdir_list[subdir_list != ""] + if(length(subdir_list) > 0) { + fs::dir_delete(file.path(proj_dir, subdir_list)) + } + # files + file_list <- list.files(proj_dir) + if(length(file_list) > 0) { + fs::file_delete(file.path(proj_dir, file_list)) + } +} diff --git a/R-pkg/tests/testthat/test-01_directory.R b/R-pkg/tests/testthat/test-01_directory.R index 96627880..49b434ad 100644 --- a/R-pkg/tests/testthat/test-01_directory.R +++ b/R-pkg/tests/testthat/test-01_directory.R @@ -34,4 +34,23 @@ test_that("mk_proj_dir", { path <- mk_proj_dir() expect_true(dir.exists(path)) expect_error(fs::dir_delete(path), NA) -}) \ No newline at end of file +}) + +test_that("clean_proj_dir", { + # test dir + tmp_dir <- mk_proj_dir("test_clean_proj_dir") + # setup test + fs::file_create(file.path(tmp_dir, "file1")) + fs::file_create(file.path(tmp_dir, "file2")) + fs::dir_create(file.path(tmp_dir, "subdir1")) + fs::file_create(file.path(tmp_dir, "subdir1", "file1")) + fs::file_create(file.path(tmp_dir, "subdir1", "file1")) + fs::dir_create(file.path(tmp_dir, "subdir1", "subdir2")) + fs::file_create(file.path(tmp_dir, "subdir1", "subdir2", "file1")) + fs::file_create(file.path(tmp_dir, "subdir1", "subdir2", "file1")) + # run + clean_proj_dir(tmp_dir) + # check + expect_equal(length(list.files(tmp_dir)), 0) + +}) From c764cf3edbd78329f635f1bbfd8b88c4bae745f7 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 17:20:29 +0200 Subject: [PATCH 50/82] move the bin dir cleaning function with other directory related functions --- R-pkg/R/01_directory.R | 18 ++++++++++++++++++ R-pkg/R/03_utils.R | 15 --------------- R-pkg/tests/testthat/test-01_directory.R | 8 ++++++++ R-pkg/tests/testthat/test-03_utils.R | 8 -------- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/R-pkg/R/01_directory.R b/R-pkg/R/01_directory.R index a1a2ebea..954abbe1 100644 --- a/R-pkg/R/01_directory.R +++ b/R-pkg/R/01_directory.R @@ -76,6 +76,7 @@ mk_proj_dir <- function(tag = "diyabc") { return(tmp_dir) } + #' Clean project directory #' @keywords internal #' @description Remove all files and sub-folders in a given project directory. @@ -98,3 +99,20 @@ clean_proj_dir <- function(proj_dir) { fs::file_delete(file.path(proj_dir, file_list)) } } + + +#' Clean binary directory +#' @keywords internal +#' @author Ghislain Durif +clean_bin_dir <- function() { + # bin directory + path <- bin_dir() + # existing binary file + existing_bin_files <- list.files(path) + existing_bin_files <- existing_bin_files[str_detect(existing_bin_files, + "diyabc|abcranger|dll")] + # delete diyabc/abcranger files + if(length(existing_bin_files) > 0) { + fs::file_delete(file.path(path, existing_bin_files)) + } +} diff --git a/R-pkg/R/03_utils.R b/R-pkg/R/03_utils.R index 39c33bc2..8cff1382 100644 --- a/R-pkg/R/03_utils.R +++ b/R-pkg/R/03_utils.R @@ -42,21 +42,6 @@ find_bin <- function(prog = "diyabc") { return(bin_file) } -#' Clean binary directory -#' @keywords internal -#' @author Ghislain Durif -clean_bin_dir <- function() { - # bin directory - path <- bin_dir() - # existing binary file - existing_bin_files <- list.files(path) - existing_bin_files <- existing_bin_files[str_detect(existing_bin_files, - "diyabc|abcranger|dll")] - # delete diyabc/abcranger files - if(length(existing_bin_files) > 0) { - fs::file_delete(file.path(path, existing_bin_files)) - } -} #' Find which OS is running #' @keywords internal diff --git a/R-pkg/tests/testthat/test-01_directory.R b/R-pkg/tests/testthat/test-01_directory.R index 49b434ad..5af11b8d 100644 --- a/R-pkg/tests/testthat/test-01_directory.R +++ b/R-pkg/tests/testthat/test-01_directory.R @@ -54,3 +54,11 @@ test_that("clean_proj_dir", { expect_equal(length(list.files(tmp_dir)), 0) }) + +test_that("clean_bin_dir", { + # test cleaning + clean_bin_dir() + expect_true(all(list.files(bin_dir()) %in% c("LICENSE", "README.md"))) + # dl latest bin + dl_all_latest_bin() +}) diff --git a/R-pkg/tests/testthat/test-03_utils.R b/R-pkg/tests/testthat/test-03_utils.R index 6926674b..d83e2cd8 100644 --- a/R-pkg/tests/testthat/test-03_utils.R +++ b/R-pkg/tests/testthat/test-03_utils.R @@ -22,14 +22,6 @@ test_that("find_bin", { expect_true(file.exists(path)) }) -test_that("clean_bin_dir", { - # test cleaning - clean_bin_dir() - expect_true(all(list.files(bin_dir()) %in% c("LICENSE", "README.md"))) - # dl latest bin - dl_all_latest_bin() -}) - test_that("get_os", { expect_true(is.character(get_os("abcranger"))) }) From 03d846cbac32299946a47396d856d0f2497c87ca Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 17:31:39 +0200 Subject: [PATCH 51/82] additional environment reactive values storing project related metadata --- R-pkg/R/04_environment.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index e3c889a3..5b691fda 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -35,6 +35,10 @@ init_diyabcrf_env <- function() { proj_dir = NULL, # project directory locus_type = "snp", # "snp" or "mss" seq_mode = "indseq", # "indseq" or "poolseq" + proj_file_list = NULL, # content of the project + header_check = NULL, # result of header file check + reftable_check = NULL, # result of reftable file check + statobs_check = NULL, # result of statobs file check ## observed data data_file = NULL # observed data file name ) @@ -125,6 +129,7 @@ init_datagen_env <- function() { proj_dir = NULL, # project directory locus_type = NULL, # "SNP" or "MSS" seq_mode = NULL, # "IndSeq" or "PoolSeq" + proj_file_list = NULL, # content of the project ## data description model = NULL, # historical model param = NULL, # list of parameter values From 3a24d0d57f93517d98fd1f7e9e6d95a7a6331549 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 18:52:06 +0200 Subject: [PATCH 52/82] add counter for file modification in project directory in environment reactive values --- R-pkg/R/04_environment.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index 5b691fda..f523a213 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -36,6 +36,8 @@ init_diyabcrf_env <- function() { locus_type = "snp", # "snp" or "mss" seq_mode = "indseq", # "indseq" or "poolseq" proj_file_list = NULL, # content of the project + file_modif = NULL, # counter for project file modification + # (upload, new header, ...) header_check = NULL, # result of header file check reftable_check = NULL, # result of reftable file check statobs_check = NULL, # result of statobs file check @@ -130,6 +132,8 @@ init_datagen_env <- function() { locus_type = NULL, # "SNP" or "MSS" seq_mode = NULL, # "IndSeq" or "PoolSeq" proj_file_list = NULL, # content of the project + file_modif = NULL, # counter for project file modification + # (upload, new header, ...) ## data description model = NULL, # historical model param = NULL, # list of parameter values From 110502ed7c13596c4ded661ac7e735835076c17e Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 18:54:56 +0200 Subject: [PATCH 53/82] dedicated module to input files for an existing project (WIP, todo: file check) --- R-pkg/R/61_analysis_module.R | 484 ++++++++++++++++++++++------------- 1 file changed, 299 insertions(+), 185 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 385870b7..08747631 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -136,46 +136,6 @@ proj_type_ui <- function(id) { tags$li("open one of the included examples.") ) ) - # inline help for proj file input - proj_file_help <- tagList( - tags$ul( - tags$li( - "You can", tags$b("either"), "upload:", - "a project", tags$code("zip"), - "file generated in a previous run", - tags$b("or"), - "single project-related files, including", - tags$code("headerRF.txt"), ", ", - tags$code("reftableRF.bin"), ", ", - tags$code("statobsRF.txt"), "and your observed data file." - ), - tags$li( - "You", tags$b("cannot"), "upload both a project", - tags$code("zip"), "file", - "and single project-related files, those will be ignored.", - style = "margin-top: 10px;" - ), - tags$li( - "When uploading", tags$b("single project-related files"), - "you", tags$b("should"), "upload all required files", - "at the same time (use", tags$code("CTRL+click"), - "to select multiple files in the file chooser window).", - style = "margin-top: 10px;" - ), - tags$li( - "If you", tags$b("re-upload"), "a file or a group of files,", - "it will", tags$b("delete"), "and", tags$b("replace"), - "any previous upload.", - style = "margin-top: 10px;" - ), - tags$li( - "If some project files are missing or have formating issues", - "you will be able to (re)configure", - "the corresponding settings in the next panel.", - style = "margin-top: 10px;" - ) - ) - ) # ui tagList( h3("Project type") %>% @@ -192,39 +152,12 @@ proj_type_ui <- function(id) { conditionalPanel( condition = "input.proj_type == 'existing'", ns = ns, - h4(tags$b("Project files")) %>% - helper( - type = "inline", - content = as.character(proj_file_help) - ), - helpText( - "Use", tags$code("CTRL+click"), "to select more than one file." - ), - fileInput( - ns("file_input"), - label = NULL, - buttonLabel = "Select file(s)", - multiple = TRUE, - accept = c( - ".txt", - ".bin", - ".zip" - ) - ), - uiOutput(ns("feedbak_existing")), - uiOutput(ns("proj_file_list")) + existing_proj_ui(ns("existing_proj")) ), conditionalPanel( condition = "input.proj_type == 'example'", ns = ns, - h4(tags$b("Select an example")), - selectInput( - ns("proj_example"), - label = NULL, - choices = c("", "Not available at the moment"), - selected = NULL, - multiple = FALSE - ), + example_proj_ui(ns("example_proj")), ), hr(), h3("Data file"), @@ -257,122 +190,10 @@ proj_type_server <- function(input, output, session) { ) ## Existing project - # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' - observe({ - pprint("file input") - print(input$file_input) - }) - - # Feedback on file upload - observe({ - req(input$proj_type) - req(input$proj_type == "existing") - - # feedback on missing file - feedbackWarning("file_input", !isTruthy(input$file_input), - "Missing file(s).") - }) - - # manage file upload (copy to project directory) - observe({ - req(input$proj_type) - req(input$proj_type == "existing") - req(input$file_input) - - # upload - input_check <- tryCatch( - proj_file_input(input$file_input, env$ap$proj_dir), - error = function(e) return(NULL) - ) - - # feedback - output$feedbak_existing <- renderUI({ - if(is.null(input_check) || !input_check$valid) { - msg <- "Issue(s) with uploaded file(s)." - feedbackWarning( - "file_input", is.null(input_check) || !input_check$valid, - msg - ) - if(length(input_check$msg) > 0) { - tags$div( - icon("warning"), "Issue(s) with uploaded file(s):", - do.call(tags$ul, lapply(input_check$msg, tags$li)), - style = "color: #F89406; margin-top: -15px;" - ) - } else { - NULL - } - } else { - NULL - } - }) - }) - - # list of uploaded project files - observeEvent({c(input$proj_type, input$file_input)}, { - req(input$proj_type) - req(input$proj_type == "existing") - - output$proj_file_list <- renderUI({ - proj_file_list <- list.files(env$ap$proj_dir) - - expected_files1 <- c("headerRF.txt", "header.txt") - expected_files2 <- c("statobsRF.txt", "reftableRF.bin") - missing_files <- NULL - - missing_header <- !any(expected_files1 %in% proj_file_list) - if(missing_header) { - missing_files <- c(missing_files, "headerRF.txt") - } - - missing_files2 <- !(expected_files2 %in% proj_file_list) - if(any(missing_files2)) { - missing_files <- c(missing_files, - expected_files2[missing_files2]) - } - - if(length(proj_file_list) > 0) { - item1 <- helpText( - icon("comment"), "List of uploaded files:", - tags$div( - do.call(tags$ul, lapply( - proj_file_list, - function(item) return(tags$li(tags$code(item))) - )) - ) - ) - item2 <- NULL - if(length(missing_files) > 0) { - item2 <- tags$div( - icon("warning"), - "Potentially missing files for an existing project:", - tags$div( - do.call(tags$ul, lapply( - missing_files, - function(item) return(tags$li(tags$code(item))) - )) - ), - tags$b("Note:"), - "you will be able to generate them below.", - style = "color: #F89406;" - ) - } - tagList(item1, item2) - } else { - tags$div( - icon("warning"), "No file was uploaded.", - style = "color: #F89406; margin-top: -15px;" - ) - } - }) - }) - - # reset file upload when another mode is chosen - observe({ - req(input$proj_type) - req(input$proj_type != "existing") - shinyjs::reset("file_input") - }) + callModule( + existing_proj_server, "existing_proj", + proj_type = reactive(input$proj_type) + ) # ## new or existing project # observe({ @@ -669,6 +490,299 @@ proj_type_server <- function(input, output, session) { # }) } +#' Existing project ui +#' @keywords internal +#' @author Ghislain Durif +existing_proj_ui <- function(id) { + ns <- NS(id) + # inline help for proj file input + proj_file_help <- tagList( + tags$ul( + tags$li( + "You can", tags$b("either"), "upload:", + "a project", tags$code("zip"), + "file generated in a previous run", + tags$b("or"), + "single project-related files, including", + tags$code("headerRF.txt"), ", ", + tags$code("reftableRF.bin"), ", ", + tags$code("statobsRF.txt"), "and your observed data file." + ), + tags$li( + "You", tags$b("cannot"), "upload both a project", + tags$code("zip"), "file", + "and single project-related files, those will be ignored.", + style = "margin-top: 10px;" + ), + tags$li( + "When uploading", tags$b("single project-related files"), + "you", tags$b("should"), "upload all required files", + "at the same time (use", tags$code("CTRL+click"), + "to select multiple files in the file chooser window).", + style = "margin-top: 10px;" + ), + tags$li( + "If you", tags$b("re-upload"), "a file or a group of files,", + "it will", tags$b("delete"), "and", tags$b("replace"), + "any previous upload.", + style = "margin-top: 10px;" + ), + tags$li( + "If some project files are missing or have formating issues", + "you will be able to (re)configure", + "the corresponding settings in the next panel.", + style = "margin-top: 10px;" + ) + ) + ) + # ui + tagList( + h4(tags$b("Project files")) %>% + helper( + type = "inline", + content = as.character(proj_file_help) + ), + helpText( + "Use", tags$code("CTRL+click"), "to select more than one file." + ), + fileInput( + ns("file_input"), + label = NULL, + buttonLabel = "Select file(s)", + multiple = TRUE, + accept = c( + ".txt", + ".bin", + ".zip" + ) + ), + uiOutput(ns("feedback_existing")), + uiOutput(ns("feedback_file_list")), + uiOutput(ns("feedback_header")), + uiOutput(ns("feedback_statobs")), + uiOutput(ns("feedback_reftable")) + ) +} + +#' Existing project server +#' @keywords internal +#' @author Ghislain Durif +existing_proj_server <- function(input, output, session, + proj_type = reactive({NULL})) { + # init local + local <- reactiveValues(proj_type = NULL) + + # get input + observe({ + local$proj_type <- proj_type() + }) + + # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' + # debugging + observe({ + pprint("file input") + print(input$file_input) + }) + + # Feedback on file upload + observe({ + req(local$proj_type) + req(local$proj_type == "existing") + + # feedback on missing file + feedbackWarning("file_input", !isTruthy(input$file_input), + "Missing file(s).") + }) + + + # reset file upload when another mode is chosen + observe({ + req(local$proj_type) + req(local$proj_type != "existing") + shinyjs::reset("file_input") + }) + + # manage file upload (copy to project directory) + observe({ + req(local$proj_type) + req(local$proj_type == "existing") + req(input$file_input) + + # clean before upload + clean_proj_dir(env$ap$proj_dir) + + # upload + input_check <- tryCatch( + proj_file_input(input$file_input, env$ap$proj_dir), + error = function(e) return(NULL) + ) + + # feedback + output$feedback_existing <- renderUI({ + + if(is.null(input_check) || !input_check$valid) { + msg <- "Issue(s) with uploaded file(s)." + feedbackWarning( + "file_input", is.null(input_check) || !input_check$valid, + msg + ) + if(length(input_check$msg) > 0) { + tags$div( + icon("warning"), "Issue(s) with uploaded file(s):", + do.call(tags$ul, lapply(input_check$msg, tags$li)), + style = "color: #F89406; margin-top: -15px;" + ) + } else { + NULL + } + } else { + NULL + } + }) + + # update project file list and check files + if(!is.null(input$check) && input_check$valid) { + # file list + env$ap$proj_file_list <- list.files(proj_dir) + # file check + file_check <- check_proj_file( + env$ap$proj_dir, env$ap$locus_type + ) + # header ? + if(!is.null(file_check$header_check)) { + # FIXME + } + } else { + env$ap$proj_file_list <- NULL + env$ap$header_check <- NULL + env$ap$reftable_check <- NULL + env$ap$statobs_check <- NULL + } + }) + + # list of uploaded project files + observeEvent({c(local$proj_type, input$file_input)}, { + req(local$proj_type) + req(local$proj_type == "existing") + + # list of files + proj_file_list <- list.files(env$ap$proj_dir) + + # expected files + expected_files1 <- c("headerRF.txt", "header.txt") + expected_files2 <- c("statobsRF.txt", "reftableRF.bin") + expected_files <- c(expected_files1, expected_files2) + + # important project files that are present + important_files <- expected_files[expected_files %in% proj_file_list] + + # additional files + additional_files <- proj_file_list[!proj_file_list %in% + important_files] + + # missing files ? + missing_files <- NULL + + missing_header <- !any(expected_files1 %in% proj_file_list) + if(missing_header) { + missing_files <- c(missing_files, "headerRF.txt") + } + + missing_files2 <- !(expected_files2 %in% proj_file_list) + if(any(missing_files2)) { + missing_files <- c(missing_files, + expected_files2[missing_files2]) + } + + # output + output$feedback_file_list <- renderUI({ + if(length(proj_file_list) > 0) { + # project core files + subitem1 <- NULL + if(length(important_files) > 0) { + subitem1 <- tags$div( + do.call(tags$ul, lapply( + important_files, + function(item) return(tags$li(tags$code(item))) + )) + ) + } else { + subitem1 <- tags$b("none") + } + # additional files + subitem2 <- NULL + if(length(additional_files) > 0) { + subitem2 <- tags$div( + do.call(tags$ul, lapply( + additional_files, + function(item) return(tags$li(tags$code(item))) + )) + ) + } else { + subitem2 <- tags$b("none") + } + item1 <- helpText( + h5(icon("comment"), "Uploaded files"), + fluidRow( + column( + width = 6, + tagList( + "Project core files:", subitem1 + ) + ), + column( + width = 6, + tagList( + "Additional files:", subitem2 + ) + ) + ) + ) + # missing files + item2 <- NULL + if(length(missing_files) > 0) { + item2 <- tags$div( + icon("warning"), + "Potentially missing files for an existing project:", + tags$div( + do.call(tags$ul, lapply( + missing_files, + function(item) return(tags$li(tags$code(item))) + )) + ), + tags$b("Note:"), + "you will be able to generate them below.", + style = "color: #F89406;" + ) + } + tagList(item1, item2) + } else { + tags$div( + icon("warning"), "No file was uploaded.", + style = "color: #F89406; margin-top: -15px;" + ) + } + }) + }) +} + +#' Example project ui +#' @keywords internal +#' @author Ghislain Durif +example_proj_ui <- function(id) { + ns <- NS(id) + tagList( + h4(tags$b("Select an example")), + selectInput( + ns("proj_example"), + label = NULL, + choices = c("", "Not available at the moment"), + selected = NULL, + multiple = FALSE + ) + ) +} + #' Input data ui #' @keywords internal #' @author Ghislain Durif From 71336a82d520c62d44e39491e373143444427692 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 19:39:41 +0200 Subject: [PATCH 54/82] function to update project file list and modification counter --- R-pkg/R/04_environment.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index f523a213..782ed1e6 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -262,3 +262,14 @@ getter <- function(var1, var2, env = diyabc_env) { setter <- function(val, var1, var2, env = diyabc_env) { env[[ as.character(substitute(var1)) ]][[ as.character(substitute(var2)) ]] <<- val } + +#' Update project files +#' @keywords internal +#' @author Ghislain Durif +update_proj_file <- function(tag = "ap") { + # increment file modification counter + env[[tag]]$file_modif <- + ifelse(!is.null(env[[tag]]$file_modif), env[[tag]]$file_modif, 0) + 1 + # file list + env[[tag]]$proj_file_list <- list.files(env[[tag]]$proj_dir) +} From 718549b98443906b743e289589de3622b4fbcc45 Mon Sep 17 00:00:00 2001 From: GD Date: Mon, 3 May 2021 19:40:14 +0200 Subject: [PATCH 55/82] additional dependencies --- R-pkg/DESCRIPTION | 2 ++ R-pkg/NAMESPACE | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R-pkg/DESCRIPTION b/R-pkg/DESCRIPTION index a225e458..17bf70aa 100644 --- a/R-pkg/DESCRIPTION +++ b/R-pkg/DESCRIPTION @@ -59,7 +59,9 @@ Imports: lubridate, jsonlite, magrittr, + mime, parallel, pbapply, processx, + readr, rlang, shiny, shinybusy (>= 0.2.2), diff --git a/R-pkg/NAMESPACE b/R-pkg/NAMESPACE index e879737c..bca99a4a 100644 --- a/R-pkg/NAMESPACE +++ b/R-pkg/NAMESPACE @@ -53,11 +53,13 @@ importFrom(ggplot2,ylim) importFrom(jsonlite,fromJSON) importFrom(lubridate,now) importFrom(magrittr,"%>%") +importFrom(mime,guess_type) importFrom(parallel,detectCores) importFrom(parallel,makeCluster) importFrom(parallel,stopCluster) importFrom(pbapply,pblapply) importFrom(processx,process) +importFrom(readr,read_file) importFrom(rlang,duplicate) importFrom(shinyFeedback,feedbackWarning) importFrom(shinyFeedback,useShinyFeedback) From 3935028a692bef2a519ddcd70796c2df0979ad49 Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 15:11:39 +0200 Subject: [PATCH 56/82] init global env sooner at startup + fix issue with datagen env init --- R-pkg/R/04_environment.R | 2 +- R-pkg/R/app_index.R | 6 ++---- R-pkg/R/app_server.R | 6 ++++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index 782ed1e6..1e512ba3 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -125,7 +125,7 @@ reset_diyabcrf_env <- function() { init_datagen_env <- function() { # clean environment - tmp_dp <- reactiveValues( + tmp_dp <- list( ## project setup proj_name = NULL, # project name proj_dir = NULL, # project directory diff --git a/R-pkg/R/app_index.R b/R-pkg/R/app_index.R index a9482cb0..dd49f84c 100644 --- a/R-pkg/R/app_index.R +++ b/R-pkg/R/app_index.R @@ -98,8 +98,7 @@ index_server <- function(input, output, session) { # update tab item updateTabItems(session, "app_menu", selected = "analysis_tab") - # init env - init_diyabcrf_env() + # reset env reset_diyabcrf_env() # verbosity @@ -127,8 +126,7 @@ index_server <- function(input, output, session) { # update tab item updateTabItems(session, "app_menu", selected = "datagen_tab") - # init env - init_datagen_env() + # reset env reset_datagen_env() # verbosity diff --git a/R-pkg/R/app_server.R b/R-pkg/R/app_server.R index c88152e2..3423ea5a 100644 --- a/R-pkg/R/app_server.R +++ b/R-pkg/R/app_server.R @@ -14,6 +14,12 @@ diyabc_server <- function(input, output, session) { ## help observe_helpers(session, help_dir = help_dir(), withMathJax = TRUE) + ## init + local <- reactiveValues(init = NULL) + observeEvent(local$init, { + init_diyabcrf_env() + init_datagen_env() + }, ignoreNULL = FALSE) ## index server function index_server(input, output, session) } \ No newline at end of file From 3468f012fb84613a586fdf45c5f708134f2b3520 Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 15:31:22 +0200 Subject: [PATCH 57/82] ui to list project files (WIP: ui to check project files and ui for example projects) --- R-pkg/R/61_analysis_module.R | 354 ++++++++++++++++++++++------------- 1 file changed, 220 insertions(+), 134 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 08747631..13de48c8 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -63,7 +63,9 @@ analysis_module_server <- function(input, output, session) { ## clean on exit session$onSessionEnded(function() { - isolate(tryCatch(fs::dir_delete(env$ap$proj_dir))) + isolate(tryCatch(function() { + if(isTruthy(env$ap$proj_dir)) fs::dir_delete(env$ap$proj_dir) + })) }) } @@ -159,6 +161,12 @@ proj_type_ui <- function(id) { ns = ns, example_proj_ui(ns("example_proj")), ), + conditionalPanel( + condition = "input.proj_type !== 'new'", + ns = ns, + proj_file_list_ui(ns("proj_file_list")), + proj_file_check_ui(ns("proj_file_check")) + ), hr(), h3("Data file"), helpText( @@ -195,6 +203,15 @@ proj_type_server <- function(input, output, session) { proj_type = reactive(input$proj_type) ) + ## Example project + # TODO + + ## File list for existing or example project + callModule( + proj_file_list_server, "proj_file_list", + proj_type = reactive(input$proj_type) + ) + # ## new or existing project # observe({ # req(input$proj_type) @@ -490,6 +507,184 @@ proj_type_server <- function(input, output, session) { # }) } +#' Feedback on project file list ui +#' @keywords internal +#' @author Ghislain Durif +proj_file_list_ui <- function(id) { + ns <- NS(id) + tagList( + helpText( + uiOutput(ns("feedback_proj_file")) + ) + ) +} + +#' Feedback on project file list server +#' @keywords internal +#' @author Ghislain Durif +proj_file_list_server <- function(input, output, session, + proj_type = reactive({NULL})) { + # init local + local <- reactiveValues(proj_type = NULL) + + # get input + observe({ + local$proj_type <- proj_type() + }) + + # # debugging + # observe({ + # req(env$ap$file_modif) + # req(env$ap$proj_file_list) + # pprint("file modif") + # pprint(env$ap$file_modif) + # pprint("file list") + # pprint(env$ap$proj_file_list) + # }) + + # feedback on list of uploaded files + observeEvent( + {c(env$ap$file_modif, env$ap$proj_file_list, local$proj_type)}, { + req(local$proj_type %in% c("existing", "example")) + # output + output$feedback_proj_file <- renderUI({ + # default + tag_list <- tags$div( + icon("warning"), "No file was uploaded.", + style = "color: #F89406; margin-top: -15px;" + ) + # else + if(isTruthy(env$ap) && isTruthy(env$ap$proj_file_list)) { + # project files + proj_file_list <- env$ap$proj_file_list + + if(length(proj_file_list) > 0) { + # expected files + expected_files1 <- c("headerRF.txt", "header.txt") + expected_files2 <- c("statobsRF.txt", "reftableRF.bin") + expected_files <- c(expected_files1, expected_files2) + + # important project files that are present + important_files <- expected_files[expected_files %in% + proj_file_list] + + # additional files + additional_files <- proj_file_list[!proj_file_list %in% + important_files] + + # missing files ? + missing_files <- NULL + + missing_header <- !any(expected_files1 %in% proj_file_list) + if(missing_header) { + missing_files <- c(missing_files, "headerRF.txt") + } + + missing_files2 <- !(expected_files2 %in% proj_file_list) + if(any(missing_files2)) { + missing_files <- c(missing_files, + expected_files2[missing_files2]) + } + # project core files + subitem1 <- NULL + if(length(important_files) > 0) { + subitem1 <- tags$div( + do.call(tags$ul, lapply( + important_files, + function(item) return(tags$li(tags$code(item))) + )) + ) + } else { + subitem1 <- tags$b("none") + } + # additional files + subitem2 <- NULL + if(length(additional_files) > 0) { + subitem2 <- tags$div( + do.call(tags$ul, lapply( + additional_files, + function(item) return(tags$li(tags$code(item))) + )) + ) + } else { + subitem2 <- tags$b("none") + } + item1 <- helpText( + h5(icon("comment"), tags$b("Uploaded files")), + fluidRow( + column( + width = 6, + tagList( + tags$p("Project core files:", subitem1) + ) + ), + column( + width = 6, + tagList( + tags$p("Additional files:", subitem2) + ) + ) + ) + ) + # missing files + item2 <- NULL + if(length(missing_files) > 0) { + item2 <- tags$div( + tags$p( + icon("warning"), + "Potentially missing files", + "for an existing project:", + tags$div( + do.call(tags$ul, lapply( + missing_files, + function(item) + return(tags$li(tags$code(item))) + )) + ), + tags$b("Note:"), + "you will be able to generate them below." + ), + style = "color: #F89406;" + ) + } + tag_list <- tagList(item1, item2) + } + } + # output + tag_list + }) + }) +} + +#' Project file check ui +#' @keywords internal +#' @author Ghislain Durif +proj_file_check_ui <- function(id) { + ns <- NS(id) + tagList() +} + +#' Project file check server +#' @keywords internal +#' @author Ghislain Durif +proj_file_check_server <- function(input, output, session) { + # TODO + + ## file check + observe({c(env$ap$file_modif, env$ap$proj_file_list)}, { + req(env$ap$proj_dir) + req(env$ap$locus_type) + # file check + file_check <- check_proj_file( + env$ap$proj_dir, env$ap$locus_type + ) + # header ? + if(!is.null(file_check$header_check)) { + # FIXME + } + }) +} + #' Existing project ui #' @keywords internal #' @author Ghislain Durif @@ -556,11 +751,7 @@ existing_proj_ui <- function(id) { ".zip" ) ), - uiOutput(ns("feedback_existing")), - uiOutput(ns("feedback_file_list")), - uiOutput(ns("feedback_header")), - uiOutput(ns("feedback_statobs")), - uiOutput(ns("feedback_reftable")) + uiOutput(ns("feedback_existing")) ) } @@ -577,12 +768,12 @@ existing_proj_server <- function(input, output, session, local$proj_type <- proj_type() }) - # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' - # debugging - observe({ - pprint("file input") - print(input$file_input) - }) + # # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' + # # debugging + # observe({ + # pprint("file input") + # print(input$file_input) + # }) # Feedback on file upload observe({ @@ -603,10 +794,11 @@ existing_proj_server <- function(input, output, session, }) # manage file upload (copy to project directory) - observe({ + observeEvent({c(local$proj_type, input$file_input)}, { req(local$proj_type) req(local$proj_type == "existing") req(input$file_input) + req(env$ap$proj_dir) # clean before upload clean_proj_dir(env$ap$proj_dir) @@ -619,7 +811,6 @@ existing_proj_server <- function(input, output, session, # feedback output$feedback_existing <- renderUI({ - if(is.null(input_check) || !input_check$valid) { msg <- "Issue(s) with uploaded file(s)." feedbackWarning( @@ -641,128 +832,15 @@ existing_proj_server <- function(input, output, session, }) # update project file list and check files - if(!is.null(input$check) && input_check$valid) { - # file list - env$ap$proj_file_list <- list.files(proj_dir) - # file check - file_check <- check_proj_file( - env$ap$proj_dir, env$ap$locus_type - ) - # header ? - if(!is.null(file_check$header_check)) { - # FIXME - } + if(!is.null(input_check) && input_check$valid) { + # file modification + update_proj_file("ap") } else { - env$ap$proj_file_list <- NULL - env$ap$header_check <- NULL - env$ap$reftable_check <- NULL - env$ap$statobs_check <- NULL - } - }) - - # list of uploaded project files - observeEvent({c(local$proj_type, input$file_input)}, { - req(local$proj_type) - req(local$proj_type == "existing") - - # list of files - proj_file_list <- list.files(env$ap$proj_dir) - - # expected files - expected_files1 <- c("headerRF.txt", "header.txt") - expected_files2 <- c("statobsRF.txt", "reftableRF.bin") - expected_files <- c(expected_files1, expected_files2) - - # important project files that are present - important_files <- expected_files[expected_files %in% proj_file_list] - - # additional files - additional_files <- proj_file_list[!proj_file_list %in% - important_files] - - # missing files ? - missing_files <- NULL - - missing_header <- !any(expected_files1 %in% proj_file_list) - if(missing_header) { - missing_files <- c(missing_files, "headerRF.txt") + # clean after failed upload + clean_proj_dir(env$ap$proj_dir) + # file modification + update_proj_file("ap") } - - missing_files2 <- !(expected_files2 %in% proj_file_list) - if(any(missing_files2)) { - missing_files <- c(missing_files, - expected_files2[missing_files2]) - } - - # output - output$feedback_file_list <- renderUI({ - if(length(proj_file_list) > 0) { - # project core files - subitem1 <- NULL - if(length(important_files) > 0) { - subitem1 <- tags$div( - do.call(tags$ul, lapply( - important_files, - function(item) return(tags$li(tags$code(item))) - )) - ) - } else { - subitem1 <- tags$b("none") - } - # additional files - subitem2 <- NULL - if(length(additional_files) > 0) { - subitem2 <- tags$div( - do.call(tags$ul, lapply( - additional_files, - function(item) return(tags$li(tags$code(item))) - )) - ) - } else { - subitem2 <- tags$b("none") - } - item1 <- helpText( - h5(icon("comment"), "Uploaded files"), - fluidRow( - column( - width = 6, - tagList( - "Project core files:", subitem1 - ) - ), - column( - width = 6, - tagList( - "Additional files:", subitem2 - ) - ) - ) - ) - # missing files - item2 <- NULL - if(length(missing_files) > 0) { - item2 <- tags$div( - icon("warning"), - "Potentially missing files for an existing project:", - tags$div( - do.call(tags$ul, lapply( - missing_files, - function(item) return(tags$li(tags$code(item))) - )) - ), - tags$b("Note:"), - "you will be able to generate them below.", - style = "color: #F89406;" - ) - } - tagList(item1, item2) - } else { - tags$div( - icon("warning"), "No file was uploaded.", - style = "color: #F89406; margin-top: -15px;" - ) - } - }) }) } @@ -783,6 +861,14 @@ example_proj_ui <- function(id) { ) } +#' Example project server +#' @keywords internal +#' @author Ghislain Durif +example_proj_server <- function(input, output, session) { + # TODO + # update_proj_file("ap") +} + #' Input data ui #' @keywords internal #' @author Ghislain Durif From 44284ccb3e2ea23a9210012ce5013b1499c900b2 Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 15:55:32 +0200 Subject: [PATCH 58/82] example proj ui + fix proj cleaning in existing proj ui --- R-pkg/R/61_analysis_module.R | 355 +++++++++++++++++++++++------------ 1 file changed, 233 insertions(+), 122 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 13de48c8..03772cc2 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -42,25 +42,25 @@ analysis_module_ui <- function(id) { #' @keywords internal #' @author Ghislain Durif analysis_module_server <- function(input, output, session) { - + ## project setting proj_set <- callModule(analysis_proj_set_server, "proj_set") - + ## Training set sub-module # training_set <- callModule(training_set_server, "train_set") - + ## random forest sub-module # rf <- callModule(rf_module_server, "rf") - + ## action proj_admin <- callModule(proj_admin_server, "proj_admin", tag = "ap") - + ## reset observeEvent(proj_admin$reset, { req(proj_admin$reset) session$reload() }) - + ## clean on exit session$onSessionEnded(function() { isolate(tryCatch(function() { @@ -92,34 +92,34 @@ analysis_proj_set_ui <- function(id) { #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete analysis_proj_set_server <- function(input, output, session) { - - + + # init local local <- reactiveValues() - + # init output out <- reactiveValues() - + ## project name proj_name <- callModule(proj_name_server, "proj_name", tag = "ap") - + ## data type data_type <- callModule(data_type_server, "data_type", tag = "ap") - + ## project type proj_type <- callModule(proj_type_server, "proj_type") - + # output$proj_is_ready <- renderUI({ # if(!(out$valid_proj & out$valid_data_file)) { - # h3(icon("warning"), "Project set up is not ready.", + # h3(icon("warning"), "Project set up is not ready.", # style="color:red;text-align:center;") # } else { # h4(icon("check"), "Project set up is ok.", # style="text-align:center;") # } # }) - # - + # + ## output return(out) } @@ -140,13 +140,13 @@ proj_type_ui <- function(id) { ) # ui tagList( - h3("Project type") %>% + h3("Project type") %>% helper(type = "inline", content = as.character(proj_type_help)), radioGroupButtons( ns("proj_type"), label = NULL, - choices = c("New project" = "new", - "Existing project" = "existing", + choices = c("New project" = "new", + "Existing project" = "existing", "Example" = "example"), selected = "new", justified = TRUE @@ -170,7 +170,7 @@ proj_type_ui <- function(id) { hr(), h3("Data file"), helpText( - icon("clock"), + icon("clock"), "Loading and checking the data file may take some time." ), conditionalPanel( @@ -190,28 +190,31 @@ proj_type_ui <- function(id) { #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete proj_type_server <- function(input, output, session) { - + # init local local <- reactiveValues( file_list = NULL, valid_files = FALSE ) - + ## Existing project callModule( - existing_proj_server, "existing_proj", + existing_proj_server, "existing_proj", proj_type = reactive(input$proj_type) ) - + ## Example project - # TODO - + callModule( + example_proj_server, "example_proj", + proj_type = reactive(input$proj_type) + ) + ## File list for existing or example project callModule( - proj_file_list_server, "proj_file_list", + proj_file_list_server, "proj_file_list", proj_type = reactive(input$proj_type) ) - + # ## new or existing project # observe({ # req(input$proj_type) @@ -229,7 +232,7 @@ proj_type_server <- function(input, output, session) { # out$new_proj <- FALSE # } # }) - # + # # ## Manage existing project # possible_files <- c("headerRF.txt", "reftableRF.bin", "statobsRF.txt") # # copy uploaded files to project working directory (server-side) @@ -240,18 +243,18 @@ proj_type_server <- function(input, output, session) { # # name (chr), size (int), type (chr), datapath (chr) # req(is.data.frame(input$file_input)) # req(nrow(input$file_input) > 0) - # + # # ## extraction # new_file_input <- input$file_input - # + # # tmp_proj_check <- existing_proj_file_check( # new_file_input, possible_files, out$proj_dir, local$file_input # ) - # + # # local$file_input <- tmp_proj_check$file_input # local$existing_proj_zip <- tmp_proj_check$existing_proj_zip # }) - # + # # # print possible files when uploading existing projects # output$file_check <- renderUI({ # helpText( @@ -260,7 +263,7 @@ proj_type_server <- function(input, output, session) { # tags$div( # style = "column-count:2;", # do.call( - # tags$ul, + # tags$ul, # lapply( # possible_files, # function(item) tags$li( @@ -287,21 +290,21 @@ proj_type_server <- function(input, output, session) { # ) # ) # }) - # + # # ## Manage example project # # update possible input # observe({ # req(!is.null(data_type$locus_type)) # req(!is.null(data_type$seq_mode)) - # + # # if(data_type$locus_type == "mss") { # updateSelectInput( - # session, - # "proj_example", + # session, + # "proj_example", # choices = c("", "Not available at the moment"), # selected = NULL # ) - # } else if(data_type$locus_type == "snp" & + # } else if(data_type$locus_type == "snp" & # data_type$seq_mode == "indseq") { # possible_choices <- basename( # list.dirs( @@ -311,12 +314,12 @@ proj_type_server <- function(input, output, session) { # possible_choices <- possible_choices[str_detect(possible_choices, # "IndSeq")] # updateSelectInput( - # session, - # "proj_example", + # session, + # "proj_example", # choices = c("", possible_choices), # selected = NULL # ) - # } else if(data_type$locus_type == "snp" & + # } else if(data_type$locus_type == "snp" & # data_type$seq_mode == "poolseq") { # possible_choices <- basename( # list.dirs( @@ -326,8 +329,8 @@ proj_type_server <- function(input, output, session) { # possible_choices <- possible_choices[str_detect(possible_choices, # "PoolSeq")] # updateSelectInput( - # session, - # "proj_example", + # session, + # "proj_example", # choices = c("", possible_choices), # selected = NULL # ) @@ -335,10 +338,10 @@ proj_type_server <- function(input, output, session) { # }) # # copy files if required # observeEvent(input$proj_example, { - # + # # req(input$proj_type == "example") # req(input$proj_example) - # + # # # copy files # proj_files <- list.files( # file.path( @@ -358,7 +361,7 @@ proj_type_server <- function(input, output, session) { # ), # overwrite = TRUE # ) - # + # # # update file input # # data.frame with 4 columns: # # name (chr), size (int), type (chr), datapath (chr) @@ -372,7 +375,7 @@ proj_type_server <- function(input, output, session) { # ), # valid = rep(TRUE, length(proj_files)) # ) - # + # # ## file type # ind <- which(local$file_input$name == "headerRF.txt") # local$file_input$type[ind] <- "text/plain" @@ -381,18 +384,18 @@ proj_type_server <- function(input, output, session) { # ind <- which(local$file_input$name == "statObsRF.txt") # local$file_input$type[ind] <- "text/plain" # }) - # + # # ## check current project header file # observeEvent(local$file_input, { - # + # # req(is.data.frame(local$file_input)) # req(nrow(local$file_input) > 0) # req(!is.null(input$proj_type)) - # + # # # # debugging # # pprint("file input") # # pprint(local$file_input) - # + # # ## header check # if("headerRF.txt" %in% local$file_input$name) { # ind <- which(local$file_input$name == "headerRF.txt") @@ -412,11 +415,11 @@ proj_type_server <- function(input, output, session) { # local$data_file <- header_file_content$data_file # } # } - # + # # # # debugging # # pprint("file input") # # pprint(local$file_input) - # + # # ## delete non valid files # lapply( # split(local$file_input, seq(nrow(local$file_input))), @@ -429,38 +432,38 @@ proj_type_server <- function(input, output, session) { # } # } # ) - # + # # ## project file list # out$proj_file_list <- local$file_input$name[local$file_input$valid] - # + # # # # debugging # # pprint("file_input") # # pprint(local$file_input) # }) - # + # # ## Data file file # data_file <- callModule( - # input_data_server, "input_data_file", + # input_data_server, "input_data_file", # proj_dir = reactive(out$proj_dir), # existing_proj_zip = reactive(local$existing_proj_zip) # ) - # + # # # update local if data file upload # observe({ # req(!is.null(data_file$name)) # local$data_file <- data_file$name # }) - # + # # # data file extracted from existing project zip file # observe({ # req(!is.null(local$existing_proj_zip)) # req(!is.null(local$header_data_file)) - # + # # if(local$existing_proj_zip) { # local$data_file <- local$header_data_file # } # }) - # + # # ## Data file check # check_data <- callModule( # check_data_server, "check_data_file", @@ -470,35 +473,35 @@ proj_type_server <- function(input, output, session) { # seq_mode = reactive(out$seq_mode), # proj_dir = reactive(out$proj_dir) # ) - # + # # # update output # observe({ # out$data_file <- data_file$name # out$data_info <- check_data$info # out$valid_data_file <- check_data$valid # }) - # + # # # valid set up ? # observe({ - # + # # req(!is.null(out$valid_data_file)) # req(!is.null(local$valid_proj_name)) - # + # # # check header if required # valid_header <- TRUE # if(!is.null(out$proj_header_content$valid)) { # valid_header <- out$proj_header_content$valid # } - # + # # out$valid_proj <- local$valid_proj_name & valid_header & out$valid_data_file - # + # # # # debugging # # logging("valid proj?", out$valid_proj) # }) - # + # # output$proj_is_ready <- renderUI({ # if(!(out$valid_proj & out$valid_data_file)) { - # h3(icon("warning"), "Project set up is not ready.", + # h3(icon("warning"), "Project set up is not ready.", # style="color:red;text-align:center;") # } else { # h4(icon("check"), "Project set up is ok.", @@ -522,16 +525,17 @@ proj_file_list_ui <- function(id) { #' Feedback on project file list server #' @keywords internal #' @author Ghislain Durif +#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. proj_file_list_server <- function(input, output, session, proj_type = reactive({NULL})) { # init local local <- reactiveValues(proj_type = NULL) - + # get input observe({ local$proj_type <- proj_type() }) - + # # debugging # observe({ # req(env$ap$file_modif) @@ -541,7 +545,7 @@ proj_file_list_server <- function(input, output, session, # pprint("file list") # pprint(env$ap$proj_file_list) # }) - + # feedback on list of uploaded files observeEvent( {c(env$ap$file_modif, env$ap$proj_file_list, local$proj_type)}, { @@ -557,29 +561,29 @@ proj_file_list_server <- function(input, output, session, if(isTruthy(env$ap) && isTruthy(env$ap$proj_file_list)) { # project files proj_file_list <- env$ap$proj_file_list - + if(length(proj_file_list) > 0) { # expected files expected_files1 <- c("headerRF.txt", "header.txt") expected_files2 <- c("statobsRF.txt", "reftableRF.bin") expected_files <- c(expected_files1, expected_files2) - + # important project files that are present important_files <- expected_files[expected_files %in% proj_file_list] - + # additional files additional_files <- proj_file_list[!proj_file_list %in% important_files] - + # missing files ? missing_files <- NULL - + missing_header <- !any(expected_files1 %in% proj_file_list) if(missing_header) { missing_files <- c(missing_files, "headerRF.txt") } - + missing_files2 <- !(expected_files2 %in% proj_file_list) if(any(missing_files2)) { missing_files <- c(missing_files, @@ -632,12 +636,12 @@ proj_file_list_server <- function(input, output, session, item2 <- tags$div( tags$p( icon("warning"), - "Potentially missing files", + "Potentially missing files", "for an existing project:", tags$div( do.call(tags$ul, lapply( missing_files, - function(item) + function(item) return(tags$li(tags$code(item))) )) ), @@ -668,8 +672,8 @@ proj_file_check_ui <- function(id) { #' @keywords internal #' @author Ghislain Durif proj_file_check_server <- function(input, output, session) { - # TODO - + # TODO + ## file check observe({c(env$ap$file_modif, env$ap$proj_file_list)}, { req(env$ap$proj_dir) @@ -695,35 +699,35 @@ existing_proj_ui <- function(id) { tags$ul( tags$li( "You can", tags$b("either"), "upload:", - "a project", tags$code("zip"), + "a project", tags$code("zip"), "file generated in a previous run", tags$b("or"), "single project-related files, including", tags$code("headerRF.txt"), ", ", - tags$code("reftableRF.bin"), ", ", + tags$code("reftableRF.bin"), ", ", tags$code("statobsRF.txt"), "and your observed data file." ), tags$li( - "You", tags$b("cannot"), "upload both a project", + "You", tags$b("cannot"), "upload both a project", tags$code("zip"), "file", "and single project-related files, those will be ignored.", style = "margin-top: 10px;" ), tags$li( "When uploading", tags$b("single project-related files"), - "you", tags$b("should"), "upload all required files", - "at the same time (use", tags$code("CTRL+click"), + "you", tags$b("should"), "upload all required files", + "at the same time (use", tags$code("CTRL+click"), "to select multiple files in the file chooser window).", style = "margin-top: 10px;" ), tags$li( "If you", tags$b("re-upload"), "a file or a group of files,", - "it will", tags$b("delete"), "and", tags$b("replace"), + "it will", tags$b("delete"), "and", tags$b("replace"), "any previous upload.", style = "margin-top: 10px;" ), tags$li( - "If some project files are missing or have formating issues", + "If some project files are missing or have formating issues", "you will be able to (re)configure", "the corresponding settings in the next panel.", style = "margin-top: 10px;" @@ -743,7 +747,7 @@ existing_proj_ui <- function(id) { fileInput( ns("file_input"), label = NULL, - buttonLabel = "Select file(s)", + buttonLabel = "Select file(s)", multiple = TRUE, accept = c( ".txt", @@ -758,34 +762,34 @@ existing_proj_ui <- function(id) { #' Existing project server #' @keywords internal #' @author Ghislain Durif +#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. existing_proj_server <- function(input, output, session, proj_type = reactive({NULL})) { # init local local <- reactiveValues(proj_type = NULL) - + # get input observe({ local$proj_type <- proj_type() }) - + # # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' # # debugging # observe({ # pprint("file input") # print(input$file_input) # }) - + # Feedback on file upload observe({ req(local$proj_type) req(local$proj_type == "existing") - + # feedback on missing file feedbackWarning("file_input", !isTruthy(input$file_input), "Missing file(s).") }) - - + # reset file upload when another mode is chosen observe({ req(local$proj_type) @@ -793,34 +797,41 @@ existing_proj_server <- function(input, output, session, shinyjs::reset("file_input") }) - # manage file upload (copy to project directory) - observeEvent({c(local$proj_type, input$file_input)}, { + # clean project directory when choosing this mode + observeEvent(local$proj_type, { req(local$proj_type) req(local$proj_type == "existing") - req(input$file_input) req(env$ap$proj_dir) # clean before upload clean_proj_dir(env$ap$proj_dir) - + # file modification + update_proj_file("ap") + }) + + # manage file upload (copy to project directory) + observeEvent(input$file_input, { + req(input$file_input) + req(env$ap$proj_dir) + # upload input_check <- tryCatch( proj_file_input(input$file_input, env$ap$proj_dir), error = function(e) return(NULL) ) - + # feedback output$feedback_existing <- renderUI({ if(is.null(input_check) || !input_check$valid) { msg <- "Issue(s) with uploaded file(s)." feedbackWarning( - "file_input", is.null(input_check) || !input_check$valid, + "file_input", is.null(input_check) || !input_check$valid, msg ) if(length(input_check$msg) > 0) { tags$div( icon("warning"), "Issue(s) with uploaded file(s):", - do.call(tags$ul, lapply(input_check$msg, tags$li)), + do.call(tags$ul, lapply(input_check$msg, tags$li)), style = "color: #F89406; margin-top: -15px;" ) } else { @@ -830,7 +841,7 @@ existing_proj_server <- function(input, output, session, NULL } }) - + # update project file list and check files if(!is.null(input_check) && input_check$valid) { # file modification @@ -864,9 +875,109 @@ example_proj_ui <- function(id) { #' Example project server #' @keywords internal #' @author Ghislain Durif -example_proj_server <- function(input, output, session) { - # TODO - # update_proj_file("ap") +#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. +example_proj_server <- function(input, output, session, + proj_type = reactive({NULL})) { + + # init local + local <- reactiveValues(proj_type = NULL) + + # get input + observe({ + local$proj_type <- proj_type() + }) + + # clean project directory when choosing this mode + observeEvent(local$proj_type, { + req(local$proj_type) + req(local$proj_type == "example") + req(env$ap$proj_dir) + + # clean before upload + clean_proj_dir(env$ap$proj_dir) + # file modification + update_proj_file("ap") + }) + + # update possible input + observeEvent(local$proj_type, { + req(local$proj_type) + req(local$proj_type == "example") + req(env$ap$locus_type) + + ## MSS + if(env$ap$locus_type == "mss") { + updateSelectInput( + session, + "proj_example", + choices = c("", "Not available at the moment"), + selected = NULL + ) + ## SNP + } else if(env$ap$locus_type == "snp") { + req(env$ap$seq_mode) + ## IndSeq + if(env$ap$seq_mode == "indseq") { + possible_choices <- basename( + list.dirs( + example_dir() + ) + ) + possible_choices <- + possible_choices[str_detect(possible_choices, "IndSeq")] + updateSelectInput( + session, + "proj_example", + choices = c("", possible_choices), + selected = NULL + ) + ## PoolSeq + } else if(env$ap$seq_mode == "poolseq") { + possible_choices <- basename( + list.dirs( + example_dir() + ) + ) + possible_choices <- + possible_choices[str_detect(possible_choices, "PoolSeq")] + updateSelectInput( + session, + "proj_example", + choices = c("", possible_choices), + selected = NULL + ) + } + } + }) + + # manage file upload (copy to project directory) + observeEvent(input$proj_example, { + req(input$proj_example) + req(env$ap$proj_dir) + + # copy files + proj_files <- list.files( + file.path( + example_dir(), + input$proj_example + ) + ) + fs::file_copy( + path = file.path( + example_dir(), + input$proj_example, + proj_files + ), + new_path = file.path( + env$ap$proj_dir, + proj_files + ), + overwrite = TRUE + ) + + # file modification + update_proj_file("ap") + }) } #' Input data ui @@ -878,7 +989,7 @@ input_data_ui <- function(id) { uiOutput(ns("feedback")), fileInput( ns("data_file"), - label = NULL, + label = NULL, buttonLabel = "Select file", multiple = FALSE ) @@ -909,13 +1020,13 @@ input_data_server <- function(input, output, session, out <- reactiveValues( name = NULL ) - + ## feedback output$feedback <- renderUI({ if(!is.null(local$existing_proj_zip)) { if(local$existing_proj_zip) { helpText( - icon("comment"), + icon("comment"), "Data file was already extracted from project zip file." ) } else { @@ -925,7 +1036,7 @@ input_data_server <- function(input, output, session, NULL } }) - + ## get data file observeEvent(input$data_file, { # input$data_file = data.frame with 4 columns: @@ -940,7 +1051,7 @@ input_data_server <- function(input, output, session, fs::file_copy(input$data_file$datapath, file.path(local$proj_dir, out$name), overwrite = TRUE) - + if(file.exists(input$data_file$datapath)) { # logging("deleting:", input$data_file$datapath) fs::file_delete(input$data_file$datapath) @@ -950,7 +1061,7 @@ input_data_server <- function(input, output, session, # observe({ # logging("data file = ", out$file) # }) - + ## output return(out) } @@ -970,7 +1081,7 @@ check_data_ui <- function(id) { #' @keywords internal #' @author Ghislain Durif #' @param data_file string as a `reactive`, data file uploaded by the user. -#' @param expected_data_file string as a `reactive`, expected data file if a +#' @param expected_data_file string as a `reactive`, expected data file if a #' header file is provided (NULL otherwise). #' @param locus_type string as a `reactive`, `"mss"` or `"snp"`. #' @param seq_mode string as a `reactive`, `"indseq"` or `"poolseq"`. @@ -999,7 +1110,7 @@ check_data_server <- function(input, output, session, local$locus_type <- locus_type() local$seq_mode <- seq_mode() local$proj_dir <- proj_dir() - + # # debugging # pprint(paste0("input data file = ", local$data_file)) # pprint(paste0("expected data file = ", local$exp_data_file)) @@ -1009,7 +1120,7 @@ check_data_server <- function(input, output, session, }) # init output out <- reactiveValues( - data_file = NULL, + data_file = NULL, data_info = NULL, valid = FALSE ) @@ -1017,7 +1128,7 @@ check_data_server <- function(input, output, session, # observe({ # logging("data file = ", out$file) # }) - + ## message if missing file output$missing_file <- renderUI({ if(is.null(local$data_file)) { @@ -1028,7 +1139,7 @@ check_data_server <- function(input, output, session, NULL } }) - + # data check observe({ req(!is.null(local$data_file)) @@ -1037,7 +1148,7 @@ check_data_server <- function(input, output, session, req(!is.null(local$seq_mode)) # check local$file_check <- check_data_file( - local$data_file, local$proj_dir, + local$data_file, local$proj_dir, local$locus_type, local$seq_mode, local$exp_data_file ) @@ -1050,7 +1161,7 @@ check_data_server <- function(input, output, session, req(!is.null(local$file_check$spec)) out$info <- local$file_check$spec }) - + # user feedback output$data_info <- renderUI({ req(!is.null(local$file_check)) @@ -1082,7 +1193,7 @@ check_data_server <- function(input, output, session, ) } }) - + # output return(out) } From a41ffa9e86fb55bb90a6eee95a954386d1c3c489 Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 17:00:58 +0200 Subject: [PATCH 59/82] change naming convention for number of summary stats --- R-pkg/R/41_input_read.R | 8 +++--- R-pkg/tests/testthat/test-41_input_read.R | 30 +++++++++++------------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 3bda0f0f..b8bb3ea4 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -13,7 +13,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { out <- list( msg = list(), valid = TRUE, data_file = NULL, loci_desc = NULL, - n_param = NULL, n_prior = NULL, n_sumstat = NULL, + n_param = NULL, n_prior = NULL, n_stat = NULL, cond_list = NULL, prior_list = NULL, n_group = NULL, group_prior_list = NULL, n_scenario = NULL, scenario_list = NULL, @@ -69,7 +69,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { pttrn <- "[0-9]+(?= parameters)" out$n_param <- as.integer(str_extract(strng, pttrn)) pttrn <- "[0-9]+(?= summary statistics)" - out$n_sumstat <- as.integer(str_extract(strng, pttrn)) + out$n_stat <- as.integer(str_extract(strng, pttrn)) ## empty line strng <- header[1] @@ -390,9 +390,9 @@ read_header <- function(file_name, file_type, locus_type = "snp") { # number of summary stats pttrn <- "(?<=^group summary statistics \\()[0-9]+" - tmp_n_sumstat <- sum(as.integer(str_extract(strng, pttrn)), na.rm = TRUE) + tmp_n_stat <- sum(as.integer(str_extract(strng, pttrn)), na.rm = TRUE) # check - if(out$n_sumstat != tmp_n_sumstat) { + if(out$n_stat != tmp_n_stat) { out$valid <- FALSE msg <- tagList( "Issue with line", tags$b(as.character(current_line)), diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index d8fbc1e3..1dd36bc5 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -8,13 +8,13 @@ test_that("read_header", { test_dir <- file.path(data4test_dir(), test_proj) file_name <- file.path(test_dir, "headerRF.txt") file_type <- "text/plain" - locus_type = "snp" + locus_type <- "snp" res <- read_header(file_name, file_type, locus_type) expect_true(res$valid) expect_equal(length(res$msg), 0) expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") expect_equal(res$n_param, 8) - expect_equal(res$n_sumstat, 130) + expect_equal(res$n_stat, 130) expect_equal(res$n_scenario, 1) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 2) @@ -46,7 +46,7 @@ test_that("read_header", { expect_equal(length(res$msg), 0) expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") expect_equal(res$n_param, 13) - expect_equal(res$n_sumstat, 130) + expect_equal(res$n_stat, 130) expect_equal(res$n_scenario, 6) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 6) @@ -69,7 +69,7 @@ test_that("read_header", { expect_equal(length(res$msg), 0) expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") expect_equal(res$n_param, 8) - expect_equal(res$n_sumstat, 130) + expect_equal(res$n_stat, 130) expect_equal(res$n_scenario, 1) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 2) @@ -91,7 +91,7 @@ test_that("read_header", { expect_equal(length(res$msg), 0) expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") expect_equal(res$n_param, 13) - expect_equal(res$n_sumstat, 130) + expect_equal(res$n_stat, 130) expect_equal(res$n_scenario, 6) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 6) @@ -117,7 +117,7 @@ test_that("read_header", { "simu_dataset_microsat_one_pop_bottleneck_multisamples_001.mss" ) expect_equal(res$n_param, 6) - expect_equal(res$n_sumstat, 40) + expect_equal(res$n_stat, 40) expect_equal(res$n_scenario, 2) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 0) @@ -141,7 +141,7 @@ test_that("read_header", { expect_equal(length(res$msg), 0) expect_equal(res$data_file, "mss_example_001.mss") expect_equal(res$n_param, 9) - expect_equal(res$n_sumstat, 2) + expect_equal(res$n_stat, 2) expect_equal(res$n_scenario, 1) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 2) @@ -175,7 +175,7 @@ test_that("read_header", { expect_equal(length(res$msg), 0) expect_equal(res$data_file, "toytest2_micro_seq_complexe_001.mss") expect_equal(res$n_param, 22) - expect_equal(res$n_sumstat, 85) + expect_equal(res$n_stat, 85) expect_equal(res$n_scenario, 2) expect_equal(res$n_scenario, length(res$scenario_list)) expect_equal(res$n_cond, 2) @@ -199,14 +199,11 @@ test_that("parse_header_scenario", { content <- header[5:13] res <- parse_header_scenario(content) - - expect_equal( - res, - list( - valid=TRUE, id=1, n_param=8, prior=0.16667, - scenario=str_c(content[6:13], collapse = "\n") - ) - ) + expect_true(res$valid) + expect_equal(res$id, 1) + expect_equal(res$n_param, 8) + expect_equal(res$prior, 0.16667) + expect_equal(res$scenario, str_c(header[6:13], collapse = "\n")) content <- header[6:13] res <- parse_header_scenario(content) @@ -320,3 +317,4 @@ test_that("read_reftable", { expect_false(res$valid) expect_true(length(res$msg) == 2) }) + From 3a1ae8bde7e3fbdb4c9dba7345025837d02fc52d Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 17:05:15 +0200 Subject: [PATCH 60/82] change naming convention for number of scenarii --- R-pkg/R/41_input_read.R | 4 ++-- R-pkg/tests/testthat/test-41_input_read.R | 28 +++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index b8bb3ea4..f746b0e3 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -16,7 +16,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { n_param = NULL, n_prior = NULL, n_stat = NULL, cond_list = NULL, prior_list = NULL, n_group = NULL, group_prior_list = NULL, - n_scenario = NULL, scenario_list = NULL, + n_scen = NULL, scenario_list = NULL, simu_mode = NULL ) @@ -103,7 +103,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { ## scenario config pttrn <- "[0-9]+(?= scenarios:)" - out$n_scenario <- as.integer(str_extract(strng, pttrn)) + out$n_scen <- as.integer(str_extract(strng, pttrn)) pttrn <- "(?<= )[0-9]+" nrow_per_scenario <- as.integer(unlist(str_extract_all(strng, pttrn))) ## extract scenarii diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 1dd36bc5..106b861e 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -15,8 +15,8 @@ test_that("read_header", { expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") expect_equal(res$n_param, 8) expect_equal(res$n_stat, 130) - expect_equal(res$n_scenario, 1) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 1) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -47,8 +47,8 @@ test_that("read_header", { expect_equal(res$data_file, "indseq_SNP_sim_dataset_4POP_001.snp") expect_equal(res$n_param, 13) expect_equal(res$n_stat, 130) - expect_equal(res$n_scenario, 6) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 6) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -70,8 +70,8 @@ test_that("read_header", { expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") expect_equal(res$n_param, 8) expect_equal(res$n_stat, 130) - expect_equal(res$n_scenario, 1) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 1) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -92,8 +92,8 @@ test_that("read_header", { expect_equal(res$data_file, "poolseq_SNP_sim_dataset_4POP_cov100_001.snp") expect_equal(res$n_param, 13) expect_equal(res$n_stat, 130) - expect_equal(res$n_scenario, 6) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 6) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -118,8 +118,8 @@ test_that("read_header", { ) expect_equal(res$n_param, 6) expect_equal(res$n_stat, 40) - expect_equal(res$n_scenario, 2) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 2) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 0) expect_equal(res$n_cond, length(res$cond_list)) expect_null(res$simu_mode) @@ -142,8 +142,8 @@ test_that("read_header", { expect_equal(res$data_file, "mss_example_001.mss") expect_equal(res$n_param, 9) expect_equal(res$n_stat, 2) - expect_equal(res$n_scenario, 1) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 1) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -176,8 +176,8 @@ test_that("read_header", { expect_equal(res$data_file, "toytest2_micro_seq_complexe_001.mss") expect_equal(res$n_param, 22) expect_equal(res$n_stat, 85) - expect_equal(res$n_scenario, 2) - expect_equal(res$n_scenario, length(res$scenario_list)) + expect_equal(res$n_scen, 2) + expect_equal(res$n_scen, length(res$scenario_list)) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") From 42115a7f3fcab3d048d1a6f57792d4e395025333 Mon Sep 17 00:00:00 2001 From: GD Date: Tue, 4 May 2021 17:18:20 +0200 Subject: [PATCH 61/82] save header file name --- R-pkg/R/41_input_read.R | 6 +++++- R-pkg/tests/testthat/test-41_input_read.R | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index f746b0e3..4dc132e2 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -17,7 +17,8 @@ read_header <- function(file_name, file_type, locus_type = "snp") { cond_list = NULL, prior_list = NULL, n_group = NULL, group_prior_list = NULL, n_scen = NULL, scenario_list = NULL, - simu_mode = NULL + simu_mode = NULL, + header_file = NULL ) current_line <- 0 @@ -42,6 +43,9 @@ read_header <- function(file_name, file_type, locus_type = "snp") { return(out) } + ## HEADER FILE NAME + out$header_file <- basename(file_name) + ## HEADER FILE CONTENT # read whole file in one string and split it by new line header <- str_split(read_file(file_name), "\n", simplify = TRUE) diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 106b861e..5816b653 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -24,6 +24,7 @@ test_that("read_header", { expect_equal(res$n_loci_desc, length(res$loci_desc)) expect_equal(res$n_prior, 8) expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$header_file, basename(file_name)) # indeseq -> bad header file test_proj <- "bad_files" @@ -56,6 +57,7 @@ test_that("read_header", { expect_equal(res$n_loci_desc, length(res$loci_desc)) expect_equal(res$n_prior, 13) expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$header_file, basename(file_name)) ## SNP PoolSeq # estim param @@ -79,6 +81,7 @@ test_that("read_header", { expect_equal(res$n_loci_desc, length(res$loci_desc)) expect_equal(res$n_prior, 8) expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$header_file, basename(file_name)) # model choice test_proj <- "PoolSeq_SNP_model_choice" @@ -101,6 +104,7 @@ test_that("read_header", { expect_equal(res$n_loci_desc, length(res$loci_desc)) expect_equal(res$n_prior, 13) expect_equal(res$n_prior, length(res$prior_list)) + expect_equal(res$header_file, basename(file_name)) ## MSS # microsat @@ -129,6 +133,7 @@ test_that("read_header", { expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 1) expect_equal(res$n_group, length(res$group_prior_list)) + expect_equal(res$header_file, basename(file_name)) # microsat sequence 1 test_proj <- "Microsat_Sequences" @@ -153,6 +158,7 @@ test_that("read_header", { expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 2) expect_equal(res$n_group, length(res$group_prior_list)) + expect_equal(res$header_file, basename(file_name)) # microsat sequence 1 -> bad header file test_proj <- "bad_files" @@ -187,6 +193,7 @@ test_that("read_header", { expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 5) expect_equal(res$n_group, length(res$group_prior_list)) + expect_equal(res$header_file, basename(file_name)) }) From 803293635fe0c8ac0f103a9ef9fb26c78ecf28a3 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 10:38:19 +0200 Subject: [PATCH 62/82] get number of parameters per scenario when reading header file --- R-pkg/R/41_input_read.R | 23 ++++++++++++--------- R-pkg/tests/testthat/test-41_input_read.R | 25 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 4dc132e2..e9eb7e08 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -16,7 +16,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { n_param = NULL, n_prior = NULL, n_stat = NULL, cond_list = NULL, prior_list = NULL, n_group = NULL, group_prior_list = NULL, - n_scen = NULL, scenario_list = NULL, + n_scen = NULL, scenario_list = NULL, n_param_scen = NULL, simu_mode = NULL, header_file = NULL ) @@ -138,10 +138,13 @@ read_header <- function(file_name, file_type, locus_type = "snp") { out$msg <- append(out$msg, list(msg)) return(out) } + # number of parameters per scenario + out$n_param_scen <- unlist(unname(lapply( + parsed_scenario_list, function(item) item$n_param + ))) # extract raw scenario list out$scenario_list <- unlist(unname(lapply( - parsed_scenario_list, - function(item) item$scenario + parsed_scenario_list, function(item) item$scenario ))) ## empty line @@ -595,7 +598,7 @@ read_reftable <- function(file_name, file_type) { # init output out <- list( msg = list(), valid = TRUE, - n_rec = NULL, n_scen = NULL, n_rec_scen = NULL, n_param = NULL, + n_rec = NULL, n_scen = NULL, n_rec_scen = NULL, n_param_scen = NULL, n_stat = NULL ) @@ -625,15 +628,15 @@ read_reftable <- function(file_name, file_type) { # number of records out$n_rec <- readBin(to_read, integer(), endian = "little") # number of scenarios - out$n_scen = readBin(to_read, integer(), endian = "little") + out$n_scen <- readBin(to_read, integer(), endian = "little") # number of records for each scenario - out$n_rec_scen = readBin(to_read, integer(), n = out$n_scen, - endian = "little") + out$n_rec_scen <- readBin(to_read, integer(), n = out$n_scen, + endian = "little") # number of used parameters (non constant) - out$n_param = readBin(to_read, integer(), n = out$n_scen, - endian = "little") + out$n_param_scen <- readBin(to_read, integer(), n = out$n_scen, + endian = "little") # number of stats - out$n_stat = readBin(to_read, integer(), endian = "little") + out$n_stat <- readBin(to_read, integer(), endian = "little") # close stream close(to_read) diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index 5816b653..b26e6d8f 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -17,6 +17,9 @@ test_that("read_header", { expect_equal(res$n_stat, 130) expect_equal(res$n_scen, 1) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 1) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, 8) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -50,6 +53,9 @@ test_that("read_header", { expect_equal(res$n_stat, 130) expect_equal(res$n_scen, 6) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 6) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, c(8L, 8L, 8L, 7L, 7L, 7L)) expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -74,6 +80,9 @@ test_that("read_header", { expect_equal(res$n_stat, 130) expect_equal(res$n_scen, 1) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 1) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, 8) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -97,6 +106,9 @@ test_that("read_header", { expect_equal(res$n_stat, 130) expect_equal(res$n_scen, 6) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 6) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, c(8L, 8L, 8L, 7L, 7L, 7L)) expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -124,6 +136,9 @@ test_that("read_header", { expect_equal(res$n_stat, 40) expect_equal(res$n_scen, 2) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 2) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, c(3L, 1L)) expect_equal(res$n_cond, 0) expect_equal(res$n_cond, length(res$cond_list)) expect_null(res$simu_mode) @@ -149,6 +164,9 @@ test_that("read_header", { expect_equal(res$n_stat, 2) expect_equal(res$n_scen, 1) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 1) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, 9) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -184,6 +202,9 @@ test_that("read_header", { expect_equal(res$n_stat, 85) expect_equal(res$n_scen, 2) expect_equal(res$n_scen, length(res$scenario_list)) + expect_equal(length(res$n_param_scen), 2) + expect_equal(length(res$n_param_scen), length(res$scenario_list)) + expect_equal(res$n_param_scen, c(9L, 5L)) expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") @@ -285,7 +306,7 @@ test_that("read_reftable", { expect_equal(res$n_rec, 12000) expect_equal(res$n_scen, 1) expect_equal(res$n_rec_scen, 12000) - expect_equal(res$n_param, 8) + expect_equal(res$n_param_scen, 8) expect_equal(res$n_stat, 130) ## SNP model choice @@ -298,7 +319,7 @@ test_that("read_reftable", { expect_equal(res$n_rec, 12000) expect_equal(res$n_scen, 6) expect_equal(res$n_rec_scen, c(1942L, 1950L, 2028L, 2001L, 1994L, 2085L)) - expect_equal(res$n_param, c(8L, 8L, 8L, 7L, 7L, 7L)) + expect_equal(res$n_param_scen, c(8L, 8L, 8L, 7L, 7L, 7L)) expect_equal(res$n_stat, 130) ## wrong file type From 22246998793a866a827fbc58e2c1f21fb53e3a02 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 10:42:19 +0200 Subject: [PATCH 63/82] verify consistency between header and reftable --- R-pkg/R/62_analysis_project_io.R | 38 ++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index 528808da..8adcdf46 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -270,12 +270,46 @@ check_proj_file <- function(proj_dir, locus_type = "snp") { msg <- tagList("Issue when checking project reftable file.") out$msg <- append(out$msg, list(msg)) } + + # check agreement between header file (if any and reftable file) + if(isTruthy(out$header_check$valid) && + isTruthy(out$reftable_check$valid)) { + if(out$header_check$n_scen != out$reftable_check$n_scen) { + out$reftable_check$valid <- FALSE + msg <- tagList( + "Different number of scenarii configured in files", + tags$code(out$header_check$header_file), "and", + tags$code("reftableRF.bin"), "." + ) + out$reftable_check$msg <- append(out$msg, list(msg)) + } + if(any(out$header_check$n_param_scen != + out$reftable_check$n_param_scen)) { + out$reftable_check$valid <- FALSE + msg <- tagList( + "Different number of parameters per scenario", + "configured in files", + tags$code(out$header_check$header_file), "and", + tags$code("reftableRF.bin"), "." + ) + out$reftable_check$msg <- append(out$msg, list(msg)) + } + if(out$header_check$n_stat != out$reftable_check$n_stat) { + out$reftable_check$valid <- FALSE + msg <- tagList( + "Different number of summary statistics", + "configured in files", + tags$code(out$header_check$header_file), "and", + tags$code("reftableRF.bin"), "." + ) + out$reftable_check$msg <- append(out$msg, list(msg)) + } + } } # check statobs if("statobsRF.txt" %in% proj_file_list) { - if(!is.null(out$reftable_check) && - !is.null(out$reftable_check$n_stat)) { + if(isTruthy(out$reftable_check$n_stat)) { statobs_file <- file.path(proj_dir, "statobsRF.txt") out$statobs_check <- tryCatch( read_statobs( From e2988e28fcf6843fdbc51874e0b116648472d025 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 11:19:49 +0200 Subject: [PATCH 64/82] project configuration setup check (TODO: manage data file) --- R-pkg/R/61_analysis_module.R | 617 ++++++++++++++++------------------- 1 file changed, 285 insertions(+), 332 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 03772cc2..227a23bc 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -82,6 +82,10 @@ analysis_proj_set_ui <- function(id) { hr(), proj_type_ui(ns("proj_type")), hr(), + proj_config_ui(ns("proj_config")), + hr(), + data_file_ui(ns("data_file")), + hr(), uiOutput(ns("feedback")) ) } @@ -93,21 +97,20 @@ analysis_proj_set_ui <- function(id) { #' @importFrom fs file_copy file_delete analysis_proj_set_server <- function(input, output, session) { - - # init local - local <- reactiveValues() - - # init output - out <- reactiveValues() - ## project name - proj_name <- callModule(proj_name_server, "proj_name", tag = "ap") + callModule(proj_name_server, "proj_name", tag = "ap") ## data type - data_type <- callModule(data_type_server, "data_type", tag = "ap") + callModule(data_type_server, "data_type", tag = "ap") ## project type - proj_type <- callModule(proj_type_server, "proj_type") + callModule(proj_type_server, "proj_type") + + ## project status + callModule(proj_config_server, "proj_config") + + ## data file + callModule(data_file_server, "data_file") # output$proj_is_ready <- renderUI({ # if(!(out$valid_proj & out$valid_data_file)) { @@ -151,6 +154,11 @@ proj_type_ui <- function(id) { selected = "new", justified = TRUE ), + conditionalPanel( + condition = "input.proj_type == 'new'", + ns = ns, + new_proj_ui(ns("new_proj")) + ), conditionalPanel( condition = "input.proj_type == 'existing'", ns = ns, @@ -165,22 +173,7 @@ proj_type_ui <- function(id) { condition = "input.proj_type !== 'new'", ns = ns, proj_file_list_ui(ns("proj_file_list")), - proj_file_check_ui(ns("proj_file_check")) - ), - hr(), - h3("Data file"), - helpText( - icon("clock"), - "Loading and checking the data file may take some time." - ), - conditionalPanel( - condition = "input.proj_type !== 'example'", - ns = ns, - input_data_ui(ns("input_data_file")), - ), - check_data_ui(ns("check_data_file")), - hr(), - uiOutput(ns("feedback_data")) + ) ) } @@ -190,11 +183,11 @@ proj_type_ui <- function(id) { #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete proj_type_server <- function(input, output, session) { - - # init local - local <- reactiveValues( - file_list = NULL, - valid_files = FALSE + + ## New project + callModule( + new_proj_server, "new_proj", + proj_type = reactive(input$proj_type) ) ## Existing project @@ -214,300 +207,6 @@ proj_type_server <- function(input, output, session) { proj_file_list_server, "proj_file_list", proj_type = reactive(input$proj_type) ) - - # ## new or existing project - # observe({ - # req(input$proj_type) - # if(input$proj_type == "new") { - # out$new_proj <- TRUE - # } else if(input$proj_type == "existing") { - # out$new_proj <- FALSE - # req(!is.null(local$local$proj_file_list)) - # if("headerRF.txt" %in% local$proj_file_list) { - # out$new_proj <- FALSE - # } else { - # out$new_proj <- TRUE - # } - # } else if(input$proj_type == "example") { - # out$new_proj <- FALSE - # } - # }) - # - # ## Manage existing project - # possible_files <- c("headerRF.txt", "reftableRF.bin", "statobsRF.txt") - # # copy uploaded files to project working directory (server-side) - # observeEvent(input$file_input, { - # ## user file input - # req(input$file_input) - # # data.frame with 4 columns: - # # name (chr), size (int), type (chr), datapath (chr) - # req(is.data.frame(input$file_input)) - # req(nrow(input$file_input) > 0) - # - # ## extraction - # new_file_input <- input$file_input - # - # tmp_proj_check <- existing_proj_file_check( - # new_file_input, possible_files, out$proj_dir, local$file_input - # ) - # - # local$file_input <- tmp_proj_check$file_input - # local$existing_proj_zip <- tmp_proj_check$existing_proj_zip - # }) - # - # # print possible files when uploading existing projects - # output$file_check <- renderUI({ - # helpText( - # icon("comment"), "Project-related files check", - # tags$p( - # tags$div( - # style = "column-count:2;", - # do.call( - # tags$ul, - # lapply( - # possible_files, - # function(item) tags$li( - # if(item %in% local$file_input$name) { - # ind <- which(item == local$file_input$name) - # if(local$file_input$valid[ind]) { - # tags$div( - # tags$code(item), - # icon("check") - # ) - # } else { - # tags$div( - # tags$code(item), - # icon("times") - # ) - # } - # } else { - # tags$code(item) - # } - # ) - # ) - # ) - # ) - # ) - # ) - # }) - # - # ## Manage example project - # # update possible input - # observe({ - # req(!is.null(data_type$locus_type)) - # req(!is.null(data_type$seq_mode)) - # - # if(data_type$locus_type == "mss") { - # updateSelectInput( - # session, - # "proj_example", - # choices = c("", "Not available at the moment"), - # selected = NULL - # ) - # } else if(data_type$locus_type == "snp" & - # data_type$seq_mode == "indseq") { - # possible_choices <- basename( - # list.dirs( - # example_dir() - # ) - # ) - # possible_choices <- possible_choices[str_detect(possible_choices, - # "IndSeq")] - # updateSelectInput( - # session, - # "proj_example", - # choices = c("", possible_choices), - # selected = NULL - # ) - # } else if(data_type$locus_type == "snp" & - # data_type$seq_mode == "poolseq") { - # possible_choices <- basename( - # list.dirs( - # example_dir() - # ) - # ) - # possible_choices <- possible_choices[str_detect(possible_choices, - # "PoolSeq")] - # updateSelectInput( - # session, - # "proj_example", - # choices = c("", possible_choices), - # selected = NULL - # ) - # } - # }) - # # copy files if required - # observeEvent(input$proj_example, { - # - # req(input$proj_type == "example") - # req(input$proj_example) - # - # # copy files - # proj_files <- list.files( - # file.path( - # example_dir(), - # input$proj_example - # ) - # ) - # fs::file_copy( - # path = file.path( - # example_dir(), - # input$proj_example, - # proj_files - # ), - # new_path = file.path( - # out$proj_dir, - # proj_files - # ), - # overwrite = TRUE - # ) - # - # # update file input - # # data.frame with 4 columns: - # # name (chr), size (int), type (chr), datapath (chr) - # local$file_input <- data.frame( - # name = proj_files, - # size = rep(NA, length(proj_files)), - # type = rep(NA, length(proj_files)), - # datapath = file.path( - # out$proj_dir, - # proj_files - # ), - # valid = rep(TRUE, length(proj_files)) - # ) - # - # ## file type - # ind <- which(local$file_input$name == "headerRF.txt") - # local$file_input$type[ind] <- "text/plain" - # ind <- which(local$file_input$name == "reftableRF.bin") - # local$file_input$type[ind] <- "application/octet-stream" - # ind <- which(local$file_input$name == "statObsRF.txt") - # local$file_input$type[ind] <- "text/plain" - # }) - # - # ## check current project header file - # observeEvent(local$file_input, { - # - # req(is.data.frame(local$file_input)) - # req(nrow(local$file_input) > 0) - # req(!is.null(input$proj_type)) - # - # # # debugging - # # pprint("file input") - # # pprint(local$file_input) - # - # ## header check - # if("headerRF.txt" %in% local$file_input$name) { - # ind <- which(local$file_input$name == "headerRF.txt") - # header_file_content <- parse_diyabc_header( - # file_name = local$file_input$datapath[ind], - # file_type = local$file_input$type[ind], - # locus_type = data_type$locus_type - # ) - # # valid header file - # local$file_input$valid[ind] <- header_file_content$valid - # # header data file name - # local$header_data_file <- header_file_content$data_file - # # header data file content - # out$proj_header_content <- header_file_content - # # data from example ? - # if(input$proj_type == "example") { - # local$data_file <- header_file_content$data_file - # } - # } - # - # # # debugging - # # pprint("file input") - # # pprint(local$file_input) - # - # ## delete non valid files - # lapply( - # split(local$file_input, seq(nrow(local$file_input))), - # function(item) { - # if(!item$valid) { - # if(file.exists(item$datapath)) { - # logging("deleting:", item$datapath) - # fs::file_delete(item$datapath) - # } - # } - # } - # ) - # - # ## project file list - # out$proj_file_list <- local$file_input$name[local$file_input$valid] - # - # # # debugging - # # pprint("file_input") - # # pprint(local$file_input) - # }) - # - # ## Data file file - # data_file <- callModule( - # input_data_server, "input_data_file", - # proj_dir = reactive(out$proj_dir), - # existing_proj_zip = reactive(local$existing_proj_zip) - # ) - # - # # update local if data file upload - # observe({ - # req(!is.null(data_file$name)) - # local$data_file <- data_file$name - # }) - # - # # data file extracted from existing project zip file - # observe({ - # req(!is.null(local$existing_proj_zip)) - # req(!is.null(local$header_data_file)) - # - # if(local$existing_proj_zip) { - # local$data_file <- local$header_data_file - # } - # }) - # - # ## Data file check - # check_data <- callModule( - # check_data_server, "check_data_file", - # data_file = reactive(local$data_file), - # expected_data_file = reactive(local$header_data_file), - # locus_type = reactive(out$locus_type), - # seq_mode = reactive(out$seq_mode), - # proj_dir = reactive(out$proj_dir) - # ) - # - # # update output - # observe({ - # out$data_file <- data_file$name - # out$data_info <- check_data$info - # out$valid_data_file <- check_data$valid - # }) - # - # # valid set up ? - # observe({ - # - # req(!is.null(out$valid_data_file)) - # req(!is.null(local$valid_proj_name)) - # - # # check header if required - # valid_header <- TRUE - # if(!is.null(out$proj_header_content$valid)) { - # valid_header <- out$proj_header_content$valid - # } - # - # out$valid_proj <- local$valid_proj_name & valid_header & out$valid_data_file - # - # # # debugging - # # logging("valid proj?", out$valid_proj) - # }) - # - # output$proj_is_ready <- renderUI({ - # if(!(out$valid_proj & out$valid_data_file)) { - # h3(icon("warning"), "Project set up is not ready.", - # style="color:red;text-align:center;") - # } else { - # h4(icon("check"), "Project set up is ok.", - # style="text-align:center;") - # } - # }) } #' Feedback on project file list ui @@ -665,30 +364,205 @@ proj_file_list_server <- function(input, output, session, #' @author Ghislain Durif proj_file_check_ui <- function(id) { ns <- NS(id) - tagList() + tagList( + uiOutput(ns("global_feedback")), + uiOutput(ns("feedback_header")), + uiOutput(ns("feedback_reftable")), + uiOutput(ns("feedback_statobs")) + ) } #' Project file check server #' @keywords internal #' @author Ghislain Durif proj_file_check_server <- function(input, output, session) { - # TODO ## file check - observe({c(env$ap$file_modif, env$ap$proj_file_list)}, { + observeEvent({c(env$ap$file_modif, env$ap$proj_file_list)}, { req(env$ap$proj_dir) req(env$ap$locus_type) # file check file_check <- check_proj_file( env$ap$proj_dir, env$ap$locus_type ) - # header ? - if(!is.null(file_check$header_check)) { - # FIXME + # update env + env$ap$header_check <- file_check$header_check + env$ap$reftable_check <- file_check$reftable_check + env$ap$statobs_check <- file_check$statobs_check + }) + + ## OUTPUT + # global + output$global_feedback <- renderUI({ + if(is.null(env$ap$header_check) && is.null(env$ap$reftable_check) && + is.null(env$ap$statobs_check)) { + helpText( + "Project is not configured yet.", + tags$p(tags$ul(tags$li( + "For a new project, you will be able to configure it", + "in the panel below." + ))), + tags$p(tags$ul(tags$li( + "For an existing or an example project,", + "you will be able to check the configuration", + "or modify it in the panel below." + ))) + ) + } else { + helpText(h5(icon("comment"), "Current setup")) + } + }) + # header + output$feedback_header <- renderUI({ + if(isTruthy(env$ap$header_check)) { + if(isTruthy(env$ap$header_check$valid)) { + # data file + data_file <- NULL + if(env$ap$header_check$data_file %in% env$ap$proj_file_list) { + data_file <- tagList( + "Data file:", + tags$code(env$ap$header_check$data_file) + ) + } else { + data_file <- tagList( + "Expected data file:", + tags$code(env$ap$header_check$data_file), + "(it can be uploaded below)." + ) + } + # output + helpText( + tags$p(tags$ul(tags$li(data_file))), + tags$p(tags$ul(tags$li( + tags$code(env$ap$header_check$header_file), + "file is ok", + "with", + tags$b(as.character(env$ap$header_check$n_scen)), + ifelse( + env$ap$header_check$n_scen > 1, + "scenarii", + "scenario"), + "." + ))) + ) + } else { + tags$div( + tags$p( + icon("warning"), + "Issue with provided", + tags$code(env$ap$header_check$header_file), + "file:", + do.call( + tags$ul, + lapply(env$ap$header_check$msg, tags$li) + ) + ), + style = "color: #F89406;" + ) + } + } else { + NULL + } + }) + # reftable + output$feedback_reftable <- renderUI({ + if(isTruthy(env$ap$reftable_check)) { + if(isTruthy(env$ap$reftable_check$valid)) { + helpText(tags$p(tags$ul(tags$li( + tags$code("reftableRF.bin"), "file is ok", + "with", + tags$b(as.character(env$ap$reftable_check$n_stat)), + "summary statistics over", + tags$b(as.character(env$ap$reftable_check$n_rec)), + "simulations in the training set." + )))) + } else { + tags$div( + tags$p( + icon("warning"), + "Issue with provided", tags$code("reftableRF.bin"), + "file:", + do.call( + tags$ul, + lapply(env$ap$header_check$msg, tags$li) + ) + ), + style = "color: #F89406;" + ) + } + } else { + NULL + } + }) + # statobs + output$feedback_statobs <- renderUI({ + if(isTruthy(env$ap$statobs_check)) { + if(isTruthy(env$ap$statobs_check$valid)) { + helpText(tags$p(tags$ul(tags$li( + tags$code("statobsRF.txt"), + "file is ok." + )))) + } else { + tags$div( + tags$p( + icon("warning"), + "Issue with provided", tags$code("statobsRF.txt"), + "file:", + do.call( + tags$ul, + lapply(env$ap$header_check$msg, tags$li) + ) + ), + style = "color: #F89406;" + ) + } + } else { + NULL } }) } +#' New project ui +#' @keywords internal +#' @author Ghislain Durif +new_proj_ui <- function(id) { + ns <- NS(id) + tagList( + helpText( + icon("comment"), "You will be able to upload your data file", + "and configure your project below." + ) + ) +} + +#' New project server +#' @keywords internal +#' @author Ghislain Durif +#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. +new_proj_server <- function(input, output, session, + proj_type = reactive({NULL})) { + + # init local + local <- reactiveValues(proj_type = NULL) + + # get input + observe({ + local$proj_type <- proj_type() + }) + + # clean project directory when choosing this mode + observeEvent(local$proj_type, { + req(local$proj_type) + req(local$proj_type == "new") + req(env$ap$proj_dir) + + # clean before upload + clean_proj_dir(env$ap$proj_dir) + # file modification + update_proj_file("ap") + }) +} + #' Existing project ui #' @keywords internal #' @author Ghislain Durif @@ -843,7 +717,7 @@ existing_proj_server <- function(input, output, session, }) # update project file list and check files - if(!is.null(input_check) && input_check$valid) { + if(!is.null(input_check) && isTruthy(input_check$valid)) { # file modification update_proj_file("ap") } else { @@ -980,6 +854,85 @@ example_proj_server <- function(input, output, session, }) } +#' Project configuration feedback ui +#' @keywords internal +#' @author Ghislain Durif +proj_config_ui <- function(id) { + ns <- NS(id) + tagList( + h3("Project configuration"), + proj_file_check_ui(ns("proj_file_check")) + ) +} + +#' Project configuration feedback server +#' @keywords internal +#' @author Ghislain Durif +proj_config_server <- function(input, output, session) { + callModule(proj_file_check_server, "proj_file_check") +} + +#' Data file ui +#' @keywords internal +#' @author Ghislain Durif +data_file_ui <- function(id) { + ns <- NS(id) + tagList( + h3("Data file"), + helpText( + icon("clock"), + "Loading and checking the data file may take some time." + ), + conditionalPanel( + condition = "input.proj_type !== 'example'", + ns = ns, + input_data_ui(ns("input_data_file")), + ), + check_data_ui(ns("check_data_file")), + hr(), + uiOutput(ns("feedback_data")) + ) +} + +#' Data file server +#' @keywords internal +#' @author Ghislain Durif +data_file_server <- function(input, output, session) { + + # ## Data file file + # data_file <- callModule( + # input_data_server, "input_data_file", + # proj_dir = reactive(out$proj_dir), + # existing_proj_zip = reactive(local$existing_proj_zip) + # ) + # + # # update local if data file upload + # observe({ + # req(!is.null(data_file$name)) + # local$data_file <- data_file$name + # }) + # + # # data file extracted from existing project zip file + # observe({ + # req(!is.null(local$existing_proj_zip)) + # req(!is.null(local$header_data_file)) + # + # if(local$existing_proj_zip) { + # local$data_file <- local$header_data_file + # } + # }) + # + # ## Data file check + # check_data <- callModule( + # check_data_server, "check_data_file", + # data_file = reactive(local$data_file), + # expected_data_file = reactive(local$header_data_file), + # locus_type = reactive(out$locus_type), + # seq_mode = reactive(out$seq_mode), + # proj_dir = reactive(out$proj_dir) + # ) +} + #' Input data ui #' @keywords internal #' @author Ghislain Durif From 7333605933f28e09bafa61d38f7f7ae9f3a0a9ed Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 12:03:39 +0200 Subject: [PATCH 65/82] data file input (TODO: data file check) --- R-pkg/R/61_analysis_module.R | 137 +++++++++++++++++++++-------------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 227a23bc..74c1428a 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -883,11 +883,7 @@ data_file_ui <- function(id) { icon("clock"), "Loading and checking the data file may take some time." ), - conditionalPanel( - condition = "input.proj_type !== 'example'", - ns = ns, - input_data_ui(ns("input_data_file")), - ), + uiOutput(ns("input_data")), check_data_ui(ns("check_data_file")), hr(), uiOutput(ns("feedback_data")) @@ -899,6 +895,43 @@ data_file_ui <- function(id) { #' @author Ghislain Durif data_file_server <- function(input, output, session) { + ns <- session$ns + + # input data file + output$input_data <- renderUI({ + if(isTruthy(env$ap$header_check$valid) && + isTruthy(env$ap$proj_file_list) && + (env$ap$header_check$data_file %in% env$ap$proj_file_list)) { + helpText( + icon("comment"), + "Data file was already provided." + ) + } else { + input_data_file_ui(ns("input_data_file")) + } + }) + # observeEvent({c(env$ap$proj_file_list, env$ap$file_modif)}, { + # + # pprint("toto") + # + # output$input_data <- renderUI({ + # if(isTruthy(env$ap$header_check$valid) && + # isTruth(env$ap$proj_file_list) && + # (env$ap$header_check$data_file %in% env$ap$proj_file_list)) { + # helpText( + # icon("comment"), + # "Data file was already provided." + # ) + # } else { + # input_data_file_ui(ns("input_data_file")) + # } + # }) + # }, ignoreNULL = TRUE) + + callModule(input_data_file_server, "input_data_file") + + + # ## Data file file # data_file <- callModule( # input_data_server, "input_data_file", @@ -933,73 +966,46 @@ data_file_server <- function(input, output, session) { # ) } -#' Input data ui +#' Input data file ui #' @keywords internal #' @author Ghislain Durif -input_data_ui <- function(id) { +input_data_file_ui <- function(id) { ns <- NS(id) tagList( - uiOutput(ns("feedback")), fileInput( ns("data_file"), label = NULL, buttonLabel = "Select file", multiple = FALSE - ) + ), + uiOutput(ns("feedback")) ) } -#' Input data server +#' Input data file server #' @keywords internal #' @author Ghislain Durif #' @param proj_dir string as a `reactive`, project directory. -input_data_server <- function(input, output, session, - proj_dir = reactive({NULL}), - existing_proj_zip = reactive({NULL})) { - ## init local - local <- reactiveValues( - # input - proj_dir = NULL, - existing_proj_zip = NULL - ) - ## get input +input_data_file_server <- function(input, output, session) { + + # Feedback on file upload observe({ - local$proj_dir <- proj_dir() - local$existing_proj_zip <- existing_proj_zip() - # # debugging - # pprint(paste0("input proj dir = ", local$proj_dir)) - }) - ## init output - out <- reactiveValues( - name = NULL - ) - - ## feedback - output$feedback <- renderUI({ - if(!is.null(local$existing_proj_zip)) { - if(local$existing_proj_zip) { - helpText( - icon("comment"), - "Data file was already extracted from project zip file." - ) - } else { - NULL - } - } else { - NULL - } + # feedback on missing file + feedbackWarning("file_input", !isTruthy(input$data_file), + "Missing data file.") }) ## get data file observeEvent(input$data_file, { # input$data_file = data.frame with 4 columns: # name (chr), size (int), type (chr), datapath (chr) - req(local$proj_dir) - req(input$data_file) - req(is.data.frame(input$data_file)) - req(nrow(input$data_file) > 0) - # data file - out$name <- input$data_file$name + req(env$ap$proj_dir) + req(nrow(input$data_file) == 1) + # check data file name (if header exists) + if(isTruthy(env$ap$header_check$valid) && + isTruthy(env$ap$header_check$data_file)) { + req(input$data_file$name == env$ap$header_check$data_file) + } # copy to project directory fs::file_copy(input$data_file$datapath, file.path(local$proj_dir, out$name), @@ -1010,10 +1016,33 @@ input_data_server <- function(input, output, session, fs::file_delete(input$data_file$datapath) } }) - # # debugging - # observe({ - # logging("data file = ", out$file) - # }) + + ## feedback + observeEvent(input$data_file, { + output$feedback <- renderUI({ + if(isTruthy(nrow(input$data_file) == 1)) { + if(isTruthy(env$ap$header_check$valid) && + isTruthy(env$ap$header_check$data_file) && + input$data_file$name != env$ap$header_check$data_file) { + tags$div( + icon("warning"), + "Provided data file name does not match", + "expected data file name from", + tags$code(env$ap$header_check$header_file), "file.", + style = "color: #F89406;" + ) + } else { + NULL + } + } else { + tags$div( + icon("warning"), + "Missing data file.", + style = "color: #F89406;" + ) + } + }) + }) ## output return(out) From 7f567f90c22c9058a9e6586b330cdeb7eac7975b Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 14:37:28 +0200 Subject: [PATCH 66/82] WIP: preparation for data file check refactoring --- R-pkg/R/04_environment.R | 3 +- R-pkg/R/61_analysis_module.R | 208 +++++++++++++---------------------- 2 files changed, 77 insertions(+), 134 deletions(-) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index 1e512ba3..bd835334 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -42,7 +42,8 @@ init_diyabcrf_env <- function() { reftable_check = NULL, # result of reftable file check statobs_check = NULL, # result of statobs file check ## observed data - data_file = NULL # observed data file name + data_file = NULL, # observed data file name + data_check = NULL # result of data file check ) # dataset metadata diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 74c1428a..bb6ab8ed 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -472,7 +472,7 @@ proj_file_check_server <- function(input, output, session) { tags$code("reftableRF.bin"), "file is ok", "with", tags$b(as.character(env$ap$reftable_check$n_stat)), - "summary statistics over", + "summary statistics computed over", tags$b(as.character(env$ap$reftable_check$n_rec)), "simulations in the training set." )))) @@ -991,7 +991,7 @@ input_data_file_server <- function(input, output, session) { # Feedback on file upload observe({ # feedback on missing file - feedbackWarning("file_input", !isTruthy(input$data_file), + feedbackWarning("data_file", !isTruthy(input$data_file), "Missing data file.") }) @@ -1006,6 +1006,8 @@ input_data_file_server <- function(input, output, session) { isTruthy(env$ap$header_check$data_file)) { req(input$data_file$name == env$ap$header_check$data_file) } + # save data file name + env$ap$data_file <- input$data_file$name # copy to project directory fs::file_copy(input$data_file$datapath, file.path(local$proj_dir, out$name), @@ -1018,30 +1020,28 @@ input_data_file_server <- function(input, output, session) { }) ## feedback - observeEvent(input$data_file, { - output$feedback <- renderUI({ - if(isTruthy(nrow(input$data_file) == 1)) { - if(isTruthy(env$ap$header_check$valid) && - isTruthy(env$ap$header_check$data_file) && - input$data_file$name != env$ap$header_check$data_file) { - tags$div( - icon("warning"), - "Provided data file name does not match", - "expected data file name from", - tags$code(env$ap$header_check$header_file), "file.", - style = "color: #F89406;" - ) - } else { - NULL - } - } else { + output$feedback <- renderUI({ + if(isTruthy(nrow(input$data_file) == 1)) { + if(isTruthy(env$ap$header_check$valid) && + isTruthy(env$ap$header_check$data_file) && + input$data_file$name != env$ap$header_check$data_file) { tags$div( icon("warning"), - "Missing data file.", + "Provided data file name does not match", + "expected data file name from", + tags$code(env$ap$header_check$header_file), "file.", style = "color: #F89406;" ) + } else { + NULL } - }) + } else { + tags$div( + icon("warning"), + "Missing data file.", + style = "color: #F89406;" + ) + } }) ## output @@ -1054,7 +1054,7 @@ input_data_file_server <- function(input, output, session) { check_data_ui <- function(id) { ns <- NS(id) tagList( - uiOutput(ns("missing_file")), + uiOutput(ns("feedback")), uiOutput(ns("data_info")) ) } @@ -1062,119 +1062,61 @@ check_data_ui <- function(id) { #' check data server #' @keywords internal #' @author Ghislain Durif -#' @param data_file string as a `reactive`, data file uploaded by the user. -#' @param expected_data_file string as a `reactive`, expected data file if a -#' header file is provided (NULL otherwise). -#' @param locus_type string as a `reactive`, `"mss"` or `"snp"`. -#' @param seq_mode string as a `reactive`, `"indseq"` or `"poolseq"`. -#' @param proj_dir string as a `reactive`, project directory. -check_data_server <- function(input, output, session, - data_file = reactive({NULL}), - expected_data_file = reactive({NULL}), - locus_type = reactive({"snp"}), - seq_mode = reactive({"indseq"}), - proj_dir = reactive({NULL})) { - # init local - local <- reactiveValues( - file_check = NULL, - data_info = NULL, - # input - data_file = NULL, - exp_data_file = NULL, - locus_type = NULL, - seq_mode = NULL, - proj_dir = NULL - ) - # get input - observe({ - local$data_file <- data_file() - local$exp_data_file <- expected_data_file() - local$locus_type <- locus_type() - local$seq_mode <- seq_mode() - local$proj_dir <- proj_dir() - - # # debugging - # pprint(paste0("input data file = ", local$data_file)) - # pprint(paste0("expected data file = ", local$exp_data_file)) - # pprint(paste0("input locus type = ", local$locus_type)) - # pprint(paste0("input seq mode = ", local$seq_mode)) - # pprint(paste0("input proj dir = ", local$proj_dir)) - }) - # init output - out <- reactiveValues( - data_file = NULL, - data_info = NULL, - valid = FALSE - ) - # # debugging +check_data_server <- function(input, output, session) { + + # # data check # observe({ - # logging("data file = ", out$file) + # req(!is.null(local$data_file)) + # req(!is.null(local$proj_dir)) + # req(!is.null(local$locus_type)) + # req(!is.null(local$seq_mode)) + # # check + # local$file_check <- check_data_file( + # local$data_file, local$proj_dir, + # local$locus_type, local$seq_mode, + # local$exp_data_file + # ) + # # data info + # req(!is.null(local$file_check)) + # req(!is.null(local$file_check$valid)) + # # valid data + # out$valid <- local$file_check$valid + # # data spec + # req(!is.null(local$file_check$spec)) + # out$info <- local$file_check$spec + # }) + # + # # user feedback + # output$data_info <- renderUI({ + # req(!is.null(local$file_check)) + # # show data info + # if(local$file_check$valid) { + # req(local$file_check$msg) + # helpText( + # icon("comment"), "Data file info", + # do.call( + # tags$ul, + # lapply(local$file_check$msg, function(item) { + # return(tags$li(item)) + # }) + # ) + # ) + # } else { + # tmp_msg <- NULL + # if(!is.null(local$file_check$err)) { + # tmp_msg <- do.call( + # tags$ul, + # lapply(local$file_check$err, function(item) { + # return(tags$li(item)) + # }) + # ) + # } + # helpText( + # icon("warning"), "Issue with data file.", + # tmp_msg + # ) + # } # }) - - ## message if missing file - output$missing_file <- renderUI({ - if(is.null(local$data_file)) { - helpText( - icon("warning"), "Missing data file" - ) - } else { - NULL - } - }) - - # data check - observe({ - req(!is.null(local$data_file)) - req(!is.null(local$proj_dir)) - req(!is.null(local$locus_type)) - req(!is.null(local$seq_mode)) - # check - local$file_check <- check_data_file( - local$data_file, local$proj_dir, - local$locus_type, local$seq_mode, - local$exp_data_file - ) - # data info - req(!is.null(local$file_check)) - req(!is.null(local$file_check$valid)) - # valid data - out$valid <- local$file_check$valid - # data spec - req(!is.null(local$file_check$spec)) - out$info <- local$file_check$spec - }) - - # user feedback - output$data_info <- renderUI({ - req(!is.null(local$file_check)) - # show data info - if(local$file_check$valid) { - req(local$file_check$msg) - helpText( - icon("comment"), "Data file info", - do.call( - tags$ul, - lapply(local$file_check$msg, function(item) { - return(tags$li(item)) - }) - ) - ) - } else { - tmp_msg <- NULL - if(!is.null(local$file_check$err)) { - tmp_msg <- do.call( - tags$ul, - lapply(local$file_check$err, function(item) { - return(tags$li(item)) - }) - ) - } - helpText( - icon("warning"), "Issue with data file.", - tmp_msg - ) - } - }) # output return(out) From ad9de24e287d850a0b6a8657a753da0476233f02 Mon Sep 17 00:00:00 2001 From: GD Date: Wed, 5 May 2021 15:21:12 +0200 Subject: [PATCH 67/82] fix typo --- R-pkg/R/41_input_read.R | 20 ++++++++++---------- R-pkg/R/42_input_check.R | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index e9eb7e08..628776e8 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -12,7 +12,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { # init output out <- list( msg = list(), valid = TRUE, - data_file = NULL, loci_desc = NULL, + data_file = NULL, locus_desc = NULL, n_param = NULL, n_prior = NULL, n_stat = NULL, cond_list = NULL, prior_list = NULL, n_group = NULL, group_prior_list = NULL, @@ -280,22 +280,22 @@ read_header <- function(file_name, file_type, locus_type = "snp") { # number of loci description pttrn <- "(?<=^loci description \\()[0-9]+" - out$n_loci_desc <- as.integer(str_extract(strng, pttrn)) + out$n_locus_desc <- as.integer(str_extract(strng, pttrn)) # extract loci description - out$loci_desc <- header[1:out$n_loci_desc] - header <- header[-(1:out$n_loci_desc)] - current_line <- current_line + out$n_loci_desc + out$locus_desc <- header[1:out$n_locus_desc] + header <- header[-(1:out$n_locus_desc)] + current_line <- current_line + out$n_locus_desc # check extracted loci description - loci_desc_check <- unlist(lapply( - out$loci_desc, - check_header_loci_desc, type = locus_type + locus_desc_check <- unlist(lapply( + out$locus_desc, + check_header_locus_desc, type = locus_type )) - if(!all(loci_desc_check)) { + if(!all(locus_desc_check)) { out$valid <- FALSE msg <- tagList( "Issue with format of locus description at lines:", - str_c(which(!loci_desc_check) + current_line, collapse = ", "), + str_c(which(!locus_desc_check) + current_line, collapse = ", "), "." ) out$msg <- append(out$msg, list(msg)) diff --git a/R-pkg/R/42_input_check.R b/R-pkg/R/42_input_check.R index caf9a2d4..0c1f024a 100644 --- a/R-pkg/R/42_input_check.R +++ b/R-pkg/R/42_input_check.R @@ -48,7 +48,7 @@ check_header_cond <- function(strng) { #' Content: see doc #' @param strng string, prior description. #' @param type string, `"mss"` or `"snp"` -check_header_loci_desc <- function(strng, type = "mss") { +check_header_locus_desc <- function(strng, type = "mss") { # init output valid <- FALSE # Microsat/Sequence From 8fa4d220f818d151f014c7df4b4b830f52c7c5f5 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 09:57:52 +0200 Subject: [PATCH 68/82] WIP: function to read data files (DONE: function to process and check a SNP from IndSeq SNP file) --- R-pkg/R/43_data_read.R | 568 +++++++++++++++++++++++ R-pkg/tests/testthat/test-43_data_read.R | 211 +++++++++ 2 files changed, 779 insertions(+) create mode 100644 R-pkg/R/43_data_read.R create mode 100644 R-pkg/tests/testthat/test-43_data_read.R diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R new file mode 100644 index 00000000..78370a5b --- /dev/null +++ b/R-pkg/R/43_data_read.R @@ -0,0 +1,568 @@ +#' Read and parse IndSeq SNP data file +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @importFrom tools file_ext +#' @importFrom parallel makeCluster stopCluster +#' @importFrom pbapply pblapply +read_indseq_snp_data <- function(data_file, data_dir) { + + # init output + out <- list( + msg = list(), valid = TRUE, + data_file = NULL, n_loci = NULL, locus_count = NULL, locus_desc = NULL, + n_pop = NULL, n_indiv = NULL, + sex_ratio = NULL, maf = NULL + ) + + current_line <- 0 + + # full path + file_name <- file.path(data_dir, data_file) + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid data file name.") + out$msg <- append(out$msg, list(msg)) + } + + # check file content + if(file.info(file_name)$size == 0) { + out$valid <- FALSE + msg <- tagList("Data file is empty.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(tools::file_ext(file_name) != "snp") { + out$valid <- FALSE + msg <- tagList( + "IndSeq SNP files should have an extension", + tags$code(".snp"), "." + ) + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + # data file name + out$data_file <- data_file + + ## DATA FILE CONTENT + + ## HEADER 1 + header1 <- readLines(file_name, n = 1, warn = FALSE) + + # sex ratio + pttrn <- "(?i)NM=[0-9]+\\.?[0-9]*NF(?-i)" + if(!str_detect(header1, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Missing", tags$b("sex ratio"), "in header1 first line." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + out$sex_ratio <- str_extract(header1, pttrn) + + # MAF + pttrn <- "(?i)(?<=MAF=)([0-9]+\\.?[0-9]*|hudson)(?-i)" + if(!str_detect(header1, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Missing", tags$b("Minimum Allele Frequency"), + "(MAF) in header1 first line.", + "MAF should be a real number between 0 and 1", + "or the keyword", tags$code("hudson"), + ", see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + out$maf <- str_extract(header1, pttrn) + + if(str_detect(out$maf, "^[0-9]+\\.?[0-9]*$")) { + out$maf <- as.numeric(out$maf) + if(out$maf < 0 || out$maf > 1) { + out$valid <- FALSE + msg <- tagList( + tags$b("Minimum Allele Frequency"), + "(MAF) should be a real number between 0 and 1", + "or the keyword", tags$code("hudson"), + ", see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + } + + ## HEADER 2 + header2 <- tryCatch( + unname(unlist(read.table(file = file_name, skip = 1, nrows = 1))), + error = function(e) return(e) + ) + if("error" %in% class(header2)) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with second line header, ", + "impossible to read it, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # upper case + header2 <- str_to_upper(header2) + + # header 2 content + if(header2 != "IND" && header2[2] != "SEX" && header2[3] != "POP" && + !all(header2[-(1:3)] %in% c("A", "H", "X", "Y", "M"))) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with header second line, see manual.", + "Required column titles are", tags$code("IND SEX POP"), + "followed by a letter indicator among", + tags$code("A"), tags$code("H"), tags$code("X"), + tags$code("Y"), tags$code("M"), + "for each SNP locus in the file (see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # nb of locus + out$n_loci <- length(header2) - 3 + + # locus type + candidate_locus <- c("A", "H", "X", "Y", "M") + locus_encoding <- str_c(header2[-(1:3)], collapse = " ") + out$locus_count <- Reduce("rbind", lapply( + candidate_locus, + function(pttrn) { + count <- str_count(locus_encoding, pttrn) + return(data.frame( + count = count, + type = pttrn, + stringsAsFactors = FALSE + )) + } + )) + + # save snp type for filtering + snp_type <- header2[-(1:3)] + + ## DATA FILE CONTENT + content <- tryCatch( + read.table(file_name, skip = 2), error = function(e) return(e) + ) + if("error" %in% class(content)) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with data, ", + "impossible to read them, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check for SEX column with only F (interpreted as FALSE) + if(is.logical(content[,2])) { + content[,2] <- ifelse(content[,2], "T", "F") + } + + # check number of locus + if(out$n_loci != (ncol(content) - 3)) { + out$valid <- FALSE + msg <- tagList( + "Number of loci not consistent between", + "file header and file content." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check sex content + if(!all(str_to_upper(as.character(content[,2])) %in% + c("F", "M", "9"))) { + out$valid <- FALSE + msg <- tagList( + tags$code("SEX"), "column should only contain", + tags$code("F"), "for female,", + tags$code("M"), "for male or", + tags$code("9"), "for missing values (see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # number of pop + out$n_pop <- length(unique(content[,3])) + + # number of individuals + out$n_indiv <- nrow(content) + + ## REFORMAT DATA (to speed up the checks) + + # individual information + indiv_info <- content[,1:3] + colnames(indiv_info) <- c("IND", "SEX", "POP") + + # check for any missing values + if(any(is.na(indiv_info))) { + out$valid <- FALSE + msg <- tagList( + tags$code("NA"), "values were found", + "in one (or more) of the columns", + tags$code("IND"), tags$code("SEX"), "or", tags$code("POP"), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # SNP info + content <- t(as.matrix(content[,-(1:3)])) + + # check for any missing values + if(any(is.na(content))) { + out$valid <- FALSE + msg <- tagList( + tags$code("NA"), "values were found", + "in one (or more) of the columns encoding the SNPs." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check data type + if(!is.integer(content)) { + out$valid <- FALSE + msg <- tagList( + "SNP encoding should be only contain integer values." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check data encoding + if(!all(content %in% c(0,1,2,9))) { + out$valid <- FALSE + msg <- tagList( + "SNP encoding should only be", + tags$code("0"), tags$code("1"), tags$code("2"), + "or", tags$code("9"), "for missing data (see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + + + + + + + + # ## filtering locus + # if(valid) { + # filtered_locus <- filter_snp_indseq( + # content, indiv_info, snp_type, locus_details, maf + # ) + # if(is.null(filtered_locus)) { + # err <- append( + # err, + # str_c( + # "Issue with IndSeq SNP file data content:", + # "error during SNP filtering, ", + # "probable issue with SNP data encoding (see manual).", + # sep = " " + # ) + # ) + # valid <- FALSE + # } else { + # locus <- unname(unlist(lapply( + # split(filtered_locus, seq(nrow(filtered_locus))), + # function(item) { + # if(item$count > 0) + # return(str_c(item$count - item$filter, + # " <", item$type, ">")) + # else + # return(NULL) + # } + # ))) + # locus_msg <- unname(unlist(lapply( + # split(filtered_locus, seq(nrow(filtered_locus))), + # function(item) { + # if(item$count > 0) { + # item_type <- str_c("<", item$type, ">") + # txt <- str_c( + # item$count - item$filter, item_type, sep = " " + # ) + # if(item$filter > 0) { + # txt <- str_c( + # item$count - item$filter, item_type, + # "(note:", item$filter, item_type, + # "loci are filtered out", + # "based on MAF criterion)", sep = " " + # ) + # } + # if(item$mono > 0) { + # str_c( + # item$count - item$filter, item_type, + # "(note:", item$filter, item_type, + # "loci, including ", + # item$mono, item_type, + # "monomorphic loci, are filtered out", + # "based on MAF criterion)", sep = " " + # ) + # } + # return(txt) + # } else + # return(NULL) + # } + # ))) + # msg <- append( + # msg, + # str_c("Total number of loci =", n_loci, sep = " ") + # ) + # msg <- append( + # msg, + # str_c("Available loci:", str_c(locus_msg, collapse = "; "), + # sep = " ") + # ) + # } + # } + # ## output + # spec <- lst(locus, n_indiv, n_loci, n_pop) + # } + + ## output + return(out) +} + +#' Processing individual locus specific in IndSeq SNP data file based on MAF +#' @keywords internal +#' @author Ghislain Durif +#' @param snp_data integer vector encoding for each individual the +#' number of ancestral allele for the loci, i.e. `0`, `1` and `2` for +#' autosome (`A`) and X-chromosome (`X`) in female, or `0` and `1` for +#' haploid locus (`H`), Y-chromosome (`Y`) in male and +#' mitochondrial locus (`M`). +#' Note : missing values are encoded by a `9`. +#' @param sex_data character vector encoding for each individual the sex, i.e. +#' `"F"` for female and `"M"` for male. +#' Note : missing values are encoded by a `"9"`. +#' @param pop_data character vector encoding each individual population. +#' @param snp_type character encoding the type of the locus +#' (among `A`, `H`, `X`, `Y`, `M`). +#' @param maf double between 0 and 1 +process_indseq_locus <- function(snp_data, sex_data, pop_data, snp_type, maf) { + + # init local + local <- list( + valid = TRUE, filt = FALSE, mono = FALSE, + missing_pop = NA, issue_X = NA, issue_Y = NA, af = 0, + hudson = FALSE + ) + + ## identify excessive missing data for a single pop + is_missing_pop <- tapply( + snp_data, as.factor(pop_data), function(x) return(x == 9) + ) + check_missing_pop <- unlist(lapply( + 1:length(is_missing_pop), + function(ind) + return(sum(is_missing_pop[[ind]]) == length(is_missing_pop[[ind]])) + )) + if(any(check_missing_pop)) { + local$valid <- FALSE + local$missing_pop <- str_c( + names(is_missing_pop)[check_missing_pop], collapse = "," + ) + } + + ## identify missing data and compute data size without missing data + non_missing_data <- (snp_data != 9) + true_data_size <- sum(non_missing_data) + ## identify female, male, and missing sex + female_ind <- (sex_data == "F") + male_ind <- (sex_data == "M") + missing_sex <- (sex_data == "9") + + if(snp_type == "A") { + ## autosome + if(true_data_size > 0) { + # reference allele frequence + local$af <- sum(snp_data[non_missing_data]) / (2 * true_data_size) + } + } else if(snp_type %in% c("H", "M")) { + ## haploid & mitochondrial + if(true_data_size > 0) { + # reference allele frequence + local$af <- sum(snp_data[non_missing_data]) / true_data_size + } + } else if(snp_type == "X") { + ## X-chromosome + check_X <- (missing_sex & non_missing_data) | + (male_ind & (snp_data == 2)) + if(any(check_X)) { + local$valid <- FALSE + local$issue_X <- str_c(which(check_X), collapse = ",") + } + + specific_ind <- non_missing_data & !missing_sex + specific_data_size <- sum(specific_ind) + + if(specific_data_size > 0) { + weighted_data_size <- 2 * sum(non_missing_data & female_ind) + + sum(non_missing_data & male_ind) + # reference allele frequence + local$af <- sum(snp_data[specific_ind]) / weighted_data_size + } + } else if(snp_type == "Y") { + ## Y-chromosome + check_Y <- (missing_sex & non_missing_data) | + (male_ind & (snp_data == 2)) | (female_ind & (snp_data != 9)) + if(any(check_Y)) { + local$valid <- FALSE + local$issue_Y <- str_c(which(check_Y), collapse = ",") + } + + specific_ind <- non_missing_data & male_ind + specific_data_size <- sum(specific_ind) + + if(specific_data_size > 0) { + # reference allele frequence + local$af <- sum(snp_data[specific_ind]) / specific_data_size + } + } + + # filtering + if(maf == "hudson") { + maf <- 0 + local$hudson <- TRUE + } + # MAF filter + local$filter <- (local$af < maf) || (1 - local$af < maf) + # mono + local$mono <- (local$af == 0) || (1 - local$af == 0) + + if(local$hudson) { + local$filter <- local$mono + } + + # filter + return(data.frame( + valid = local$valid, filt = local$filt, mono = local$mono, + missing_pop = local$missing_pop, + issue_X = local$issue_X, issue_Y = local$issue_Y, af = local$af, + maf = maf, hudson = local$hudson, + stringsAsFactors = FALSE + )) +} + +#' Filter IndSeq SNP data based on MAF +#' @keywords internal +#' @author Ghislain Durif +#' @param content data.frame with data file content, with columns +#' `IND` (individual id), `SEX` (female or male), `POP` (population id), +#' and remaining columns corresponding to SNPs where each entry encode the +#' number of ancestral allele for the loci, i.e. `0`, `1` and `2` for +#' autosome (`A`) and X-chromosome (`X`) in female, or `0` and `1` for +#' haploid locus (`H`), Y-chromosome (`Y`) in male and +#' mitochondrial locus (`M`). +#' Note : missing values are encoded by a `9`. +#' @param snp_type vector of column header, being `IND`, `SEX`, `POP` followed +#' by each locus type (among `A`, `H`, `X`, `Y`, `M`). +#' @param locus_details data.frame with two columns, `count` being the number +#' of locus for each type in the data, and `type` being the corresponding locus +#' type (among `A`, `H`, `X`, `Y`, `M`). +#' @importFrom dplyr bind_rows +filter_snp_indseq <- function(content, indiv_info, snp_type, locus_details, + maf=0.05) { + + ncore <- getOption("diyabcGUI")$ncore + snp_filter <- NULL + + if(is.null(maf)) maf <- 0 + + snp_filter <- pblapply( + 1:nrow(content), + function(ind) { + out <- indseq_locus_filter( + snp_data = content[ind,], + sex_data = indiv_info$SEX, + locus_type = snp_type[ind], + maf = maf + ) + }, + cl = ncore + ) + + seek_error <- unlist(lapply( + snp_filter, + function(item) "try-error" %in% class(item) + )) + if(any(seek_error)) { + error_msg <- attributes( + snp_filter[[ which(seek_error[1]) ]] + )$condition$message + err <- str_c( + "Issue when checking IndSeq SNP data file", + "content:", + error_msg, + sep = " " + ) + pprint(err) + return(NULL) + } else { + # no error + snp_filter <- Reduce("bind_rows", snp_filter) + + # extract number of filtered loci by type + tmp_filter <- tapply( + snp_filter$filter, + snp_type, + sum + ) + tmp_filter <- data.frame( + filter=tmp_filter, type=names(tmp_filter), + row.names=NULL, stringsAsFactors = FALSE + ) + + # extract number of monomorphic loci by type + tmp_mono <- tapply( + snp_filter$mono, + snp_type, + sum + ) + tmp_mono <- data.frame( + mono=tmp_mono, type=names(tmp_mono), + row.names=NULL, stringsAsFactors = FALSE + ) + + # extract number of loci with issue regarding sex by type + tmp_issue <- tapply( + snp_filter$issue, + snp_type, + sum + ) + tmp_issue <- data.frame( + issue=tmp_issue, type=names(tmp_issue), + row.names=NULL, stringsAsFactors = FALSE + ) + + # merge all results into locos_details table + locus_details <- merge(locus_details, tmp_filter) + locus_details <- merge(locus_details, tmp_mono) + locus_details <- merge(locus_details, tmp_issue) + } + + return(locus_details) +} \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-43_data_read.R b/R-pkg/tests/testthat/test-43_data_read.R new file mode 100644 index 00000000..ba18fc99 --- /dev/null +++ b/R-pkg/tests/testthat/test-43_data_read.R @@ -0,0 +1,211 @@ +context("43_data_read") + +test_that("read_indseq_snp_data", { + + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") + out <- read_indseq_snp_data( + data_file, data_dir, expected_data_file = NULL + ) + expect_true(out$valid) + + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(example_dir(), + "PoolSeq_SNP_estim_param") + out <- read_indseq_snp_data( + data_file, data_dir, expected_data_file = NULL + ) + expect_false(out$valid) +}) + +test_that("process_indseq_locus", { + # generate test data + n_indiv <- 100 + n_pop <- 5 + indiv_id <- str_c("ind", 1:n_indiv) + indiv_sex <- sample(c("F", "M"), size = n_indiv, replace = TRUE) + indiv_pop <- sample(1:n_pop, size = n_indiv, replace = TRUE) + content <- data.frame( + IND = str_c("ind", 1:n_indiv), + SEX = indiv_sex, + POP = as.character(indiv_pop), + A = sample(0:2, size = n_indiv, replace = TRUE), + H = sample(0:1, size = n_indiv, replace = TRUE), + X = ifelse( + indiv_sex == "F", + sample(0:2, size = n_indiv, replace = TRUE), + sample(0:1, size = n_indiv, replace = TRUE) + ), + Y = ifelse( + indiv_sex == "F", + rep(9, n_indiv), + sample(0:1, size = n_indiv, replace = TRUE) + ), + M = sample(0:1, size = n_indiv, replace = TRUE), + stringsAsFactors = FALSE + ) + + col_type <- colnames(content) + + locus_count <- data.frame( + count = rep(1, 5), + type = tail(col_type, 5), + stringsAsFactors = FALSE + ) + + maf <- 0.05 + + # test on a single SNP + ind <- 4 + snp_data <- content[,ind] + sex_data <- content[,2] + pop_data <- content[,3] + snp_type <- col_type[ind] + + res <- process_indseq_locus(snp_data, sex_data, pop_data, snp_type, maf) + expect_true(res$valid) + expect_false(res$filt) + expect_false(res$mono) + expect_true(is.na(res$missing_pop)) + expect_true(is.na(res$issue_X)) + expect_true(is.na(res$issue_Y)) + expect_true(is.numeric(res$af)) + expect_true(is.numeric(res$maf)) + expect_false(res$hudson) + + # test on multiple SNP without missing values at random + res <- Reduce("rbind", lapply( + 1:(ncol(content) - 3) + 3, + function(ind) { + out <- process_indseq_locus( + snp_data = content[,ind], + sex_data = content[,2], + pop_data = content[,3], + snp_type = col_type[ind], + maf = maf + ) + } + )) + + expect_true(is.data.frame(res)) + + ## test missing pop and missing values for X and Y chromosome + # generate test data + n_indiv <- 100 + n_pop <- 5 + indiv_id <- str_c("ind", 1:n_indiv) + indiv_sex <- sort(rep(c("F", "M"), length.out = n_indiv)) + indiv_pop <- sort(rep(str_c("pop", 1:n_pop), length.out = n_indiv)) + content <- data.frame( + IND = str_c("ind", 1:n_indiv), + SEX = indiv_sex, + POP = as.character(indiv_pop), + A = c(rep(9, sum(indiv_pop == "pop1")), + sample(0:2, size = n_indiv - sum(indiv_pop == "pop1"), + replace = TRUE)), + H = sample(0:1, size = n_indiv, replace = TRUE), + X = sample(0:2, size = n_indiv, replace = TRUE), + Y = sample(0:1, size = n_indiv, replace = TRUE), + M = sample(0:1, size = n_indiv, replace = TRUE), + stringsAsFactors = FALSE + ) + col_type <- colnames(content) + # run + res <- Reduce("rbind", lapply( + 1:(ncol(content) - 3) + 3, + function(ind) { + out <- process_indseq_locus( + snp_data = content[,ind], + sex_data = content[,2], + pop_data = content[,3], + snp_type = col_type[ind], + maf = maf + ) + } + )) + + expect_equal(res$valid, c(FALSE, TRUE, FALSE, FALSE, TRUE)) +}) + +test_that("filter_snp_indseq", { + ## generate simulated test data + n_indiv <- 100 + n_pop <- 5 + indiv_id <- str_c("ind", 1:n_indiv) + indiv_sex <- sample(c("F", "M"), size = n_indiv, replace = TRUE) + indiv_pop <- sample(1:n_pop, size = n_indiv, replace = TRUE) + content <- data.frame( + IND = str_c("ind", 1:n_indiv), + SEX = indiv_sex, + POP = indiv_pop, + A = sample(0:2, size = n_indiv, replace = TRUE), + H = sample(0:1, size = n_indiv, replace = TRUE), + X = ifelse( + indiv_sex == "F", + sample(0:2, size = n_indiv, replace = TRUE), + sample(0:1, size = n_indiv, replace = TRUE) + ), + Y = ifelse( + indiv_sex == "F", + rep(9, n_indiv), + sample(0:1, size = n_indiv, replace = TRUE) + ), + M = sample(0:1, size = n_indiv, replace = TRUE), + stringsAsFactors = FALSE + ) + + col_type <- colnames(content) + + locus_details <- data.frame( + count = rep(1, 5), + type = tail(col_type, 5), + stringsAsFactors = FALSE + ) + + maf <- 0.05 + + # test on simulated data + out <- filter_snp_indseq(content, col_type, locus_details, maf) + + expect_true(is.data.frame(out)) + expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) + + ## test on SNP data file + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(example_dir(), + "IndSeq_SNP_estim_param") + data_path <- file.path(data_dir, data_file) + + # header + col_type <- unname(unlist( + read.table(file = data_path, skip = 1, nrows = 1) + )) + + # locus type + candidate_locus <- c("A", "H", "X", "Y", "M") + locus_encoding <- str_c(header[-(1:3)], collapse = " ") + locus_details <- Reduce("rbind", lapply( + candidate_locus, + function(pttrn) { + count <- str_count(locus_encoding, pttrn) + return(data.frame( + count = count, + type = pttrn, + stringsAsFactors = FALSE + )) + } + )) + + # data + content <- read.table(file = data_path, skip = 2) + + # test on data + out <- filter_snp_indseq(content, col_type, locus_details, maf) + + expect_true(is.data.frame(out)) + expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) + + +}) + + From 356f151880f708ae24204974afbd3a6bf61d4e38 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 11:19:28 +0200 Subject: [PATCH 69/82] WIP: function to read data files (DONE: function to check all SNPs from IndSeq SNP file) --- R-pkg/R/43_data_read.R | 219 +++++++++++++++-------- R-pkg/tests/testthat/test-43_data_read.R | 79 ++++++-- 2 files changed, 210 insertions(+), 88 deletions(-) diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R index 78370a5b..13e68c61 100644 --- a/R-pkg/R/43_data_read.R +++ b/R-pkg/R/43_data_read.R @@ -459,7 +459,7 @@ process_indseq_locus <- function(snp_data, sex_data, pop_data, snp_type, maf) { # filter return(data.frame( - valid = local$valid, filt = local$filt, mono = local$mono, + valid = local$valid, filter = local$filter, mono = local$mono, missing_pop = local$missing_pop, issue_X = local$issue_X, issue_Y = local$issue_Y, af = local$af, maf = maf, hudson = local$hudson, @@ -467,38 +467,39 @@ process_indseq_locus <- function(snp_data, sex_data, pop_data, snp_type, maf) { )) } -#' Filter IndSeq SNP data based on MAF +#' Check (for missing values) and filter (based on MAF) IndSeq SNP data #' @keywords internal #' @author Ghislain Durif -#' @param content data.frame with data file content, with columns -#' `IND` (individual id), `SEX` (female or male), `POP` (population id), -#' and remaining columns corresponding to SNPs where each entry encode the -#' number of ancestral allele for the loci, i.e. `0`, `1` and `2` for -#' autosome (`A`) and X-chromosome (`X`) in female, or `0` and `1` for -#' haploid locus (`H`), Y-chromosome (`Y`) in male and +#' @param content data.frame with columns corresponding to SNPs +#' where each entry encode the number of ancestral allele for the loci, +#' i.e. `0`, `1` and `2` for autosome (`A`) and X-chromosome (`X`) in female, +#' or `0` and `1` for haploid locus (`H`), Y-chromosome (`Y`) in male and #' mitochondrial locus (`M`). #' Note : missing values are encoded by a `9`. -#' @param snp_type vector of column header, being `IND`, `SEX`, `POP` followed -#' by each locus type (among `A`, `H`, `X`, `Y`, `M`). -#' @param locus_details data.frame with two columns, `count` being the number +#' @param indiv_info data.frame with columns `IND` (individual id), +#' `SEX` (female or male), `POP` (population id). +#' Note : missing values are encoded by a `9`. +#' @param snp_type vector of locus type (among `A`, `H`, `X`, `Y`, `M`). +#' @param locus_cout data.frame with two columns, `count` being the number #' of locus for each type in the data, and `type` being the corresponding locus #' type (among `A`, `H`, `X`, `Y`, `M`). #' @importFrom dplyr bind_rows -filter_snp_indseq <- function(content, indiv_info, snp_type, locus_details, - maf=0.05) { +check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, + maf=0.05) { - ncore <- getOption("diyabcGUI")$ncore - snp_filter <- NULL + # init output + out <- list( + valid = TRUE, locus_count = NULL, msg = list() + ) - if(is.null(maf)) maf <- 0 + ncore <- getOption("diyabcGUI")$ncore - snp_filter <- pblapply( + snp_list <- pblapply( 1:nrow(content), function(ind) { - out <- indseq_locus_filter( - snp_data = content[ind,], - sex_data = indiv_info$SEX, - locus_type = snp_type[ind], + out <- process_indseq_locus( + snp_data = content[ind,], sex_data = indiv_info$SEX, + pop_data = indiv_info$POP, snp_type = snp_type[ind], maf = maf ) }, @@ -506,63 +507,141 @@ filter_snp_indseq <- function(content, indiv_info, snp_type, locus_details, ) seek_error <- unlist(lapply( - snp_filter, - function(item) "try-error" %in% class(item) + snp_list, function(item) "try-error" %in% class(item) )) if(any(seek_error)) { error_msg <- attributes( - snp_filter[[ which(seek_error[1]) ]] + snp_list[[ which(seek_error)[1] ]] )$condition$message err <- str_c( - "Issue when checking IndSeq SNP data file", - "content:", - error_msg, - sep = " " + "Issue when checking IndSeq SNP data file content:", + error_msg, sep = " " ) + out$valid <- FALSE + msg <- tagList("Error when checking data file content.") + out$msg <- append(out$msg, list(msg)) pprint(err) - return(NULL) - } else { - # no error - snp_filter <- Reduce("bind_rows", snp_filter) - - # extract number of filtered loci by type - tmp_filter <- tapply( - snp_filter$filter, - snp_type, - sum - ) - tmp_filter <- data.frame( - filter=tmp_filter, type=names(tmp_filter), - row.names=NULL, stringsAsFactors = FALSE - ) - - # extract number of monomorphic loci by type - tmp_mono <- tapply( - snp_filter$mono, - snp_type, - sum - ) - tmp_mono <- data.frame( - mono=tmp_mono, type=names(tmp_mono), - row.names=NULL, stringsAsFactors = FALSE - ) - - # extract number of loci with issue regarding sex by type - tmp_issue <- tapply( - snp_filter$issue, - snp_type, - sum - ) - tmp_issue <- data.frame( - issue=tmp_issue, type=names(tmp_issue), - row.names=NULL, stringsAsFactors = FALSE - ) + return(out) + } + + # no error + snp_tab <- Reduce("bind_rows", snp_list) + rm("snp_list") + + # check for unvalid locus + if(any(!snp_tab$valid)) { + # missing pop + is_missing_pop <- !is.na(snp_tab$missing_pop) + if(any(is_missing_pop)) { + out$valid <- FALSE + missing_pop <- snp_tab$missing_pop[is_missing_pop] + snp_issue <- which(is_missing_pop) + msg <- tagList( + "Issue with missing data for entire population(s)", + "regarding SNP:", + tags$div( + do.call( + tags$ul, + lapply( + 1:length(snp_issue), + function(ind) { + tags$li( + tags$b(snp_issue[ind]), + "(for population(s)", + tags$code(missing_pop[ind]), ")" + ) + } + ) + ), + style = "column-count:2;" + ) + ) + out$msg <- append(out$msg, list(msg)) + } - # merge all results into locos_details table - locus_details <- merge(locus_details, tmp_filter) - locus_details <- merge(locus_details, tmp_mono) - locus_details <- merge(locus_details, tmp_issue) + # X chromosome + is_issue_X <- !is.na(snp_tab$issue_X) + if(any(is_issue_X)) { + out$valid <- FALSE + issue_X <- snp_tab$issue_X[is_issue_X] + snp_issue <- which(is_issue_X) + msg <- tagList( + "Issue with data for SNP on X chromosome (see manual)", + "regarding SNP:", + tags$div( + do.call( + tags$ul, + lapply( + 1:length(snp_issue), + function(ind) { + tags$li( + tags$b(snp_issue[ind]), + "(for individuals", + tags$code(issue_X[ind]), ")" + ) + } + ) + ) + ) + ) + out$msg <- append(out$msg, list(msg)) + } + # Y chromosome + is_issue_Y <- !is.na(snp_tab$issue_Y) + if(any(is_issue_Y)) { + out$valid <- FALSE + issue_Y <- snp_tab$issue_Y[is_issue_Y] + snp_issue <- which(is_issue_Y) + msg <- tagList( + "Issue with data for SNP on Y chromosome (see manual)", + "regarding SNP:", + tags$div( + do.call( + tags$ul, + lapply( + 1:length(snp_issue), + function(ind) { + tags$li( + tags$b(snp_issue[ind]), + "(for individuals", + tags$code(issue_Y[ind]), ")" + ) + } + ) + ) + ) + ) + out$msg <- append(out$msg, list(msg)) + } } - return(locus_details) + # continue ? + if(!out$valid) { + return(out) + } + + # extract number of filtered loci by type + tmp_filter <- tapply( + snp_tab$filter, snp_type, sum + ) + tmp_filter <- data.frame( + filt=tmp_filter, type=names(tmp_filter), + row.names=NULL, stringsAsFactors = FALSE + ) + + # extract number of monomorphic loci by type + tmp_mono <- tapply( + snp_tab$mono, snp_type, sum + ) + tmp_mono <- data.frame( + mono=tmp_mono, type=names(tmp_mono), + row.names=NULL, stringsAsFactors = FALSE + ) + + # merge all results into locos_details table + out$locus_count <- merge(locus_count, tmp_filter) + out$locus_count <- merge(out$locus_count, tmp_mono) + + # output + return(out) } \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-43_data_read.R b/R-pkg/tests/testthat/test-43_data_read.R index ba18fc99..410ac026 100644 --- a/R-pkg/tests/testthat/test-43_data_read.R +++ b/R-pkg/tests/testthat/test-43_data_read.R @@ -127,14 +127,14 @@ test_that("process_indseq_locus", { expect_equal(res$valid, c(FALSE, TRUE, FALSE, FALSE, TRUE)) }) -test_that("filter_snp_indseq", { +test_that("check_snp_indseq", { ## generate simulated test data n_indiv <- 100 n_pop <- 5 indiv_id <- str_c("ind", 1:n_indiv) indiv_sex <- sample(c("F", "M"), size = n_indiv, replace = TRUE) indiv_pop <- sample(1:n_pop, size = n_indiv, replace = TRUE) - content <- data.frame( + data_tab <- data.frame( IND = str_c("ind", 1:n_indiv), SEX = indiv_sex, POP = indiv_pop, @@ -154,37 +154,75 @@ test_that("filter_snp_indseq", { stringsAsFactors = FALSE ) - col_type <- colnames(content) + indiv_info <- data_tab[,1:3] + content <- t(data_tab[,-(1:3)]) + snp_type <- rownames(content) - locus_details <- data.frame( - count = rep(1, 5), - type = tail(col_type, 5), + locus_count <- data.frame( + count = rep(1, 5), type = snp_type, stringsAsFactors = FALSE ) maf <- 0.05 # test on simulated data - out <- filter_snp_indseq(content, col_type, locus_details, maf) + res <- check_snp_indseq(content, indiv_info, snp_type, locus_count, maf) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_true(is.data.frame(res$locus_count)) + + ## test missing pop and missing values for X and Y chromosome + # generate test data + n_indiv <- 100 + n_pop <- 5 + indiv_id <- str_c("ind", 1:n_indiv) + indiv_sex <- sort(rep(c("F", "M"), length.out = n_indiv)) + indiv_pop <- sort(rep(str_c("pop", 1:n_pop), length.out = n_indiv)) + data_tab <- data.frame( + IND = str_c("ind", 1:n_indiv), + SEX = indiv_sex, + POP = as.character(indiv_pop), + A = c(rep(9, sum(indiv_pop == "pop1")), + sample(0:2, size = n_indiv - sum(indiv_pop == "pop1"), + replace = TRUE)), + H = sample(0:1, size = n_indiv, replace = TRUE), + X = sample(0:2, size = n_indiv, replace = TRUE), + Y = sample(0:1, size = n_indiv, replace = TRUE), + M = sample(0:1, size = n_indiv, replace = TRUE), + stringsAsFactors = FALSE + ) + + indiv_info <- data_tab[,1:3] + content <- t(data_tab[,-(1:3)]) + snp_type <- rownames(content) + + locus_count <- data.frame( + count = rep(1, 5), type = snp_type, + stringsAsFactors = FALSE + ) - expect_true(is.data.frame(out)) - expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) + maf <- 0.05 + + # test on simulated data + res <- check_snp_indseq(content, indiv_info, snp_type, locus_count, maf) + expect_false(res$valid) + expect_equal(length(res$msg), 3) ## test on SNP data file data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") data_path <- file.path(data_dir, data_file) # header - col_type <- unname(unlist( + header <- unname(unlist( read.table(file = data_path, skip = 1, nrows = 1) )) # locus type candidate_locus <- c("A", "H", "X", "Y", "M") locus_encoding <- str_c(header[-(1:3)], collapse = " ") - locus_details <- Reduce("rbind", lapply( + locus_count <- Reduce("rbind", lapply( candidate_locus, function(pttrn) { count <- str_count(locus_encoding, pttrn) @@ -199,13 +237,18 @@ test_that("filter_snp_indseq", { # data content <- read.table(file = data_path, skip = 2) - # test on data - out <- filter_snp_indseq(content, col_type, locus_details, maf) - - expect_true(is.data.frame(out)) - expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) - + snp_type <- header[-(1:3)] + indiv_info <- content[,1:3] + colnames(indiv_info) <- header[1:3] + content <- t(content[,-(1:3)]) + # test on data + res <- check_snp_indseq(content, indiv_info, snp_type, locus_count, maf) + expect_true(res$valid) + expect_true(is.data.frame(res$locus_count)) + expect_equal(res$locus_count$count, 30000) + expect_equal(res$locus_count$filt, 13046) + expect_equal(res$locus_count$mono, 0) }) From 3ee60275891fe08b3072316f6602eba73dfb72f9 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 11:39:47 +0200 Subject: [PATCH 70/82] function to read IndSeq SNP data file is ok --- R-pkg/R/43_data_read.R | 102 +++++------------------ R-pkg/tests/testthat/test-43_data_read.R | 29 ++++--- 2 files changed, 37 insertions(+), 94 deletions(-) diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R index 13e68c61..d3d841ac 100644 --- a/R-pkg/R/43_data_read.R +++ b/R-pkg/R/43_data_read.R @@ -11,7 +11,7 @@ read_indseq_snp_data <- function(data_file, data_dir) { # init output out <- list( msg = list(), valid = TRUE, - data_file = NULL, n_loci = NULL, locus_count = NULL, locus_desc = NULL, + data_file = NULL, n_loci = NULL, locus_count = NULL, n_pop = NULL, n_indiv = NULL, sex_ratio = NULL, maf = NULL ) @@ -143,7 +143,7 @@ read_indseq_snp_data <- function(data_file, data_dir) { # locus type candidate_locus <- c("A", "H", "X", "Y", "M") locus_encoding <- str_c(header2[-(1:3)], collapse = " ") - out$locus_count <- Reduce("rbind", lapply( + locus_count <- Reduce("rbind", lapply( candidate_locus, function(pttrn) { count <- str_count(locus_encoding, pttrn) @@ -262,85 +262,19 @@ read_indseq_snp_data <- function(data_file, data_dir) { return(out) } - - - - - - - - # ## filtering locus - # if(valid) { - # filtered_locus <- filter_snp_indseq( - # content, indiv_info, snp_type, locus_details, maf - # ) - # if(is.null(filtered_locus)) { - # err <- append( - # err, - # str_c( - # "Issue with IndSeq SNP file data content:", - # "error during SNP filtering, ", - # "probable issue with SNP data encoding (see manual).", - # sep = " " - # ) - # ) - # valid <- FALSE - # } else { - # locus <- unname(unlist(lapply( - # split(filtered_locus, seq(nrow(filtered_locus))), - # function(item) { - # if(item$count > 0) - # return(str_c(item$count - item$filter, - # " <", item$type, ">")) - # else - # return(NULL) - # } - # ))) - # locus_msg <- unname(unlist(lapply( - # split(filtered_locus, seq(nrow(filtered_locus))), - # function(item) { - # if(item$count > 0) { - # item_type <- str_c("<", item$type, ">") - # txt <- str_c( - # item$count - item$filter, item_type, sep = " " - # ) - # if(item$filter > 0) { - # txt <- str_c( - # item$count - item$filter, item_type, - # "(note:", item$filter, item_type, - # "loci are filtered out", - # "based on MAF criterion)", sep = " " - # ) - # } - # if(item$mono > 0) { - # str_c( - # item$count - item$filter, item_type, - # "(note:", item$filter, item_type, - # "loci, including ", - # item$mono, item_type, - # "monomorphic loci, are filtered out", - # "based on MAF criterion)", sep = " " - # ) - # } - # return(txt) - # } else - # return(NULL) - # } - # ))) - # msg <- append( - # msg, - # str_c("Total number of loci =", n_loci, sep = " ") - # ) - # msg <- append( - # msg, - # str_c("Available loci:", str_c(locus_msg, collapse = "; "), - # sep = " ") - # ) - # } - # } - # ## output - # spec <- lst(locus, n_indiv, n_loci, n_pop) - # } + ## LOCUS FILTERING + # run + snp_check <- check_snp_indseq( + content, indiv_info, snp_type, locus_count, out$maf + ) + # check + if(!snp_check$valid) { + out$valid <- FALSE + out$msg <- append(out$msg, snp_check$msg) + return(out) + } + # results + out$locus_count <- snp_check$locus_count ## output return(out) @@ -554,7 +488,8 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, ) ), style = "column-count:2;" - ) + ), + "Remove this locus (these loci) from your dataset." ) out$msg <- append(out$msg, list(msg)) } @@ -582,7 +517,8 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, } ) ) - ) + ), + "Remove this locus (these loci) from your dataset." ) out$msg <- append(out$msg, list(msg)) } diff --git a/R-pkg/tests/testthat/test-43_data_read.R b/R-pkg/tests/testthat/test-43_data_read.R index 410ac026..d51250c5 100644 --- a/R-pkg/tests/testthat/test-43_data_read.R +++ b/R-pkg/tests/testthat/test-43_data_read.R @@ -4,18 +4,26 @@ test_that("read_indseq_snp_data", { data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") - out <- read_indseq_snp_data( - data_file, data_dir, expected_data_file = NULL - ) - expect_true(out$valid) + res <- read_indseq_snp_data(data_file, data_dir) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, data_file) + expect_equal(res$n_loci, 30000) + expect_true(is.data.frame(res$locus_count)) + expect_equal(res$locus_count$count, 30000) + expect_equal(res$locus_count$filt, 13046) + expect_equal(res$locus_count$mono, 0) + expect_equal(res$n_pop, 4) + expect_equal(res$n_indiv, 40) + expect_equal(res$sex_ratio, "NM=1NF") + expect_equal(res$maf, 0.05) data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(example_dir(), + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") - out <- read_indseq_snp_data( - data_file, data_dir, expected_data_file = NULL - ) - expect_false(out$valid) + res <- read_indseq_snp_data(data_file, data_dir) + expect_false(res$valid) + expect_true(length(res$msg) > 0) }) test_that("process_indseq_locus", { @@ -210,8 +218,7 @@ test_that("check_snp_indseq", { ## test on SNP data file data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(data4test_dir(), - "IndSeq_SNP_estim_param") + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") data_path <- file.path(data_dir, data_file) # header From e85e35630fc75cd60c6bf02767a86cc94fc42cd5 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 13:47:20 +0200 Subject: [PATCH 71/82] function to read poolseq file (+ implementation of monomorphic locus filter) --- R-pkg/R/43_data_read.R | 262 +++++++++++++++++++++-- R-pkg/tests/testthat/test-43_data_read.R | 62 +++++- 2 files changed, 307 insertions(+), 17 deletions(-) diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R index d3d841ac..230aa138 100644 --- a/R-pkg/R/43_data_read.R +++ b/R-pkg/R/43_data_read.R @@ -16,8 +16,6 @@ read_indseq_snp_data <- function(data_file, data_dir) { sex_ratio = NULL, maf = NULL ) - current_line <- 0 - # full path file_name <- file.path(data_dir, data_file) @@ -64,7 +62,7 @@ read_indseq_snp_data <- function(data_file, data_dir) { if(!str_detect(header1, pttrn)) { out$valid <- FALSE msg <- tagList( - "Missing", tags$b("sex ratio"), "in header1 first line." + "Missing", tags$b("sex ratio"), "in first line header." ) out$msg <- append(out$msg, list(msg)) return(out) @@ -77,7 +75,7 @@ read_indseq_snp_data <- function(data_file, data_dir) { out$valid <- FALSE msg <- tagList( "Missing", tags$b("Minimum Allele Frequency"), - "(MAF) in header1 first line.", + "(MAF) in first line header.", "MAF should be a real number between 0 and 1", "or the keyword", tags$code("hudson"), ", see manual." @@ -126,8 +124,9 @@ read_indseq_snp_data <- function(data_file, data_dir) { !all(header2[-(1:3)] %in% c("A", "H", "X", "Y", "M"))) { out$valid <- FALSE msg <- tagList( - "Formatting issue with header second line, see manual.", - "Required column titles are", tags$code("IND SEX POP"), + "Formatting issue with header second line.", + "Required column titles are", tags$code("IND"), + tags$code("SEX"), tags$code("POP"), "followed by a letter indicator among", tags$code("A"), tags$code("H"), tags$code("X"), tags$code("Y"), tags$code("M"), @@ -166,7 +165,7 @@ read_indseq_snp_data <- function(data_file, data_dir) { out$valid <- FALSE msg <- tagList( "Formatting issue with data, ", - "impossible to read them, see manual." + "impossible to read the file, see manual." ) out$msg <- append(out$msg, list(msg)) return(out) @@ -300,7 +299,7 @@ process_indseq_locus <- function(snp_data, sex_data, pop_data, snp_type, maf) { # init local local <- list( - valid = TRUE, filt = FALSE, mono = FALSE, + valid = TRUE, filter = FALSE, mono = FALSE, missing_pop = NA, issue_X = NA, issue_Y = NA, af = 0, hudson = FALSE ) @@ -426,8 +425,8 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, valid = TRUE, locus_count = NULL, msg = list() ) + # process SNPs ncore <- getOption("diyabcGUI")$ncore - snp_list <- pblapply( 1:nrow(content), function(ind) { @@ -561,7 +560,7 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, snp_tab$filter, snp_type, sum ) tmp_filter <- data.frame( - filt=tmp_filter, type=names(tmp_filter), + filter=tmp_filter, type=names(tmp_filter), row.names=NULL, stringsAsFactors = FALSE ) @@ -574,10 +573,249 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, row.names=NULL, stringsAsFactors = FALSE ) - # merge all results into locos_details table + # merge all results into locus_count table out$locus_count <- merge(locus_count, tmp_filter) out$locus_count <- merge(out$locus_count, tmp_mono) # output return(out) -} \ No newline at end of file +} + +#' Read and parse PoolSeq SNP data file +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @importFrom tools file_ext +#' @importFrom parallel makeCluster stopCluster +#' @importFrom pbapply pblapply +read_poolseq_snp_data <- function(data_file, data_dir) { + + # init output + out <- list( + msg = list(), valid = TRUE, + data_file = NULL, n_loci = NULL, locus_count = NULL, + n_pop = NULL, sex_ratio = NULL, mrc = NULL + ) + + # full path + file_name <- file.path(data_dir, data_file) + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid data file name.") + out$msg <- append(out$msg, list(msg)) + } + + # check file content + if(file.info(file_name)$size == 0) { + out$valid <- FALSE + msg <- tagList("Data file is empty.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(tools::file_ext(file_name) != "snp") { + out$valid <- FALSE + msg <- tagList( + "IndSeq SNP files should have an extension", + tags$code(".snp"), "." + ) + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + # data file name + out$data_file <- data_file + + ## DATA FILE CONTENT + + ## HEADER 1 + header1 <- readLines(file_name, n = 1, warn = FALSE) + + # sex ratio + pttrn <- "(?i)NM=[0-9]+\\.?[0-9]*NF(?-i)" + if(!str_detect(header1, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Missing", tags$b("sex ratio"), "in first line header." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + out$sex_ratio <- str_extract(header1, pttrn) + + # MRC + pttrn <- "(?i)(?<=MRC=)[0-9]+(?-i)" + if(!str_detect(header1, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Missing", tags$b("Minimum Read Count"), + "(MRC) in first line header.", + "MRC should be a positive or null integer,", + "see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + out$mrc <- as.integer(str_extract(header1, pttrn)) + + ## HEADER 2 + header2 <- tryCatch( + unname(unlist(read.table(file = file_name, skip = 1, nrows = 1))), + error = function(e) return(e) + ) + if("error" %in% class(header2)) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with second line header, ", + "impossible to read it, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # upper case + header2 <- str_to_upper(header2) + + # header 2 content + if(header2[1] != "POOL" & + header2[2] != "POP_NAME:HAPLOID_SAMPLE_SIZE" & + !all(str_detect(header2[-(1:2)], "POP[0-9]+:[0-9]+"))) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with header second line.", + "Required column titles are", tags$code("POOL"), + tags$code("POP_NAME:HAPLOID_SAMPLE_SIZE"), + "followed by a character string", + tags$code("POP:"), + "for each population in the file (see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## number of population + out$n_pop <- length(header2) - 2 + + ## DATA FILE CONTENT + content <- tryCatch( + read.table(file_name, skip = 2), error = function(e) return(e) + ) + if("error" %in% class(content)) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with data, ", + "impossible to read the file, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check number of population + if(ncol(content) %% 2 != 0) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with data:", + "number of column should be even,", + "providing pair of counts for reference", + "and alternate alleles at each locus", + "in each popultation, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + if(ncol(content) != 2*out$n_pop) { + out$valid <- FALSE + msg <- tagList( + "Formatting issue with data:", + "number of population indicated in file second-line header", + "does not correspond to number of columns in file content,", + "see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # nb of locus + out$n_loci <- nrow(content) + + # check data encoding + check_snp_encoding <- apply(content, 1, is.integer) + if(!all(unlist(check_snp_encoding))) { + out$valid <- FALSE + msg <- tagList( + "Issue with data encoding at lines", + tags$code(str_c(which(check_snp_encoding, collapse = ", "))), ".", + "Expecting read counts, i.e. positive or null integer", + "(see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + if(any(is.na(content))) { + out$valid <- FALSE + msg <- tagList( + "Missing values (i.e.", tags$code("NA"), ")", + "are not allowed (see manual)." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## FILTERING LOCUS + snp_check <- check_snp_poolseq(content, mrc = out$mrc) + out$locus_count <- snp_check$locus_count + + ## output + return(out) +} + +#' Check (for missing values) and filter (based on MRC) IndSeq SNP data +#' @keywords internal +#' @author Ghislain Durif +#' @param content data.frame with columns corresponding to couples of +#' PoolSeq encoding. +check_snp_poolseq <- function(content, mrc = 1) { + + # init output + out <- list(locus_count = NULL) + + # number of pop + n_pop <- ncol(content) / 2 + + # count per allele + allele1_count <- apply( + content[,rep(c(TRUE,FALSE), n_pop)], 1, sum + ) + allele2_count <- apply( + content[,rep(c(FALSE,TRUE), n_pop)], 1, sum + ) + + # extract number of filtered loci by type + tmp_filter <- (allele1_count < mrc) | (allele2_count < mrc) + + # extract number of monomorphic loci by type + tmp_mono <- (allele1_count < 1) | (allele2_count < 1) + + # merge all results into locus_count table + out$locus_count <- data.frame( + type = "A", + count = nrow(content), + filter = sum(tmp_filter), + mono = sum(tmp_mono), + stringsAsFactors = FALSE + ) + + # output + return(out) +} + diff --git a/R-pkg/tests/testthat/test-43_data_read.R b/R-pkg/tests/testthat/test-43_data_read.R index d51250c5..812131f3 100644 --- a/R-pkg/tests/testthat/test-43_data_read.R +++ b/R-pkg/tests/testthat/test-43_data_read.R @@ -2,6 +2,7 @@ context("43_data_read") test_that("read_indseq_snp_data", { + # good file data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") res <- read_indseq_snp_data(data_file, data_dir) @@ -11,16 +12,16 @@ test_that("read_indseq_snp_data", { expect_equal(res$n_loci, 30000) expect_true(is.data.frame(res$locus_count)) expect_equal(res$locus_count$count, 30000) - expect_equal(res$locus_count$filt, 13046) + expect_equal(res$locus_count$filter, 13046) expect_equal(res$locus_count$mono, 0) expect_equal(res$n_pop, 4) expect_equal(res$n_indiv, 40) expect_equal(res$sex_ratio, "NM=1NF") expect_equal(res$maf, 0.05) + # bad file data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(data4test_dir(), - "PoolSeq_SNP_estim_param") + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") res <- read_indseq_snp_data(data_file, data_dir) expect_false(res$valid) expect_true(length(res$msg) > 0) @@ -72,7 +73,7 @@ test_that("process_indseq_locus", { res <- process_indseq_locus(snp_data, sex_data, pop_data, snp_type, maf) expect_true(res$valid) - expect_false(res$filt) + expect_false(res$filter) expect_false(res$mono) expect_true(is.na(res$missing_pop)) expect_true(is.na(res$issue_X)) @@ -254,8 +255,59 @@ test_that("check_snp_indseq", { expect_true(res$valid) expect_true(is.data.frame(res$locus_count)) expect_equal(res$locus_count$count, 30000) - expect_equal(res$locus_count$filt, 13046) + expect_equal(res$locus_count$filter, 13046) expect_equal(res$locus_count$mono, 0) }) +test_that("read_poolseq_snp_data", { + + # good file + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") + res <- read_poolseq_snp_data(data_file, data_dir) + expect_true(res$valid) + expect_equal(length(res$msg), 0) + expect_equal(res$data_file, data_file) + expect_equal(res$n_loci, 30000) + expect_true(is.data.frame(res$locus_count)) + expect_equal(res$locus_count$count, 30000) + expect_equal(res$locus_count$filter, 15612) + expect_equal(res$locus_count$mono, 5918) + expect_equal(res$n_pop, 4) + expect_equal(res$sex_ratio, "NM=1NF") + expect_equal(res$mrc, 5) + + # bad file + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") + res <- read_poolseq_snp_data(data_file, data_dir) + expect_false(res$valid) + expect_true(length(res$msg) > 0) +}) + + +test_that("check_snp_poolseq", { + + ## test on SNP data file + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") + data_path <- file.path(data_dir, data_file) + + # header + header <- unname(unlist( + read.table(file = data_path, skip = 1, nrows = 1) + )) + + # data + content <- read.table(file = data_path, skip = 2) + + # test on data + mrc <- 5 + res <- check_snp_poolseq(content, mrc) + expect_true(is.data.frame(res$locus_count)) + expect_equal(res$locus_count$count, 30000) + expect_equal(res$locus_count$filter, 15612) + expect_equal(res$locus_count$mono, 5918) +}) + From 0782cccf6b33695580fc059f48be72d7d5df557a Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 17:07:04 +0200 Subject: [PATCH 72/82] fix typo + use vector instead of data frame for header file content --- R-pkg/R/41_input_read.R | 2 +- R-pkg/tests/testthat/test-41_input_read.R | 29 +++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/R-pkg/R/41_input_read.R b/R-pkg/R/41_input_read.R index 628776e8..a169354f 100644 --- a/R-pkg/R/41_input_read.R +++ b/R-pkg/R/41_input_read.R @@ -48,7 +48,7 @@ read_header <- function(file_name, file_type, locus_type = "snp") { ## HEADER FILE CONTENT # read whole file in one string and split it by new line - header <- str_split(read_file(file_name), "\n", simplify = TRUE) + header <- unlist(str_split(read_file(file_name), "\n")) ## data file out$data_file <- header[1] diff --git a/R-pkg/tests/testthat/test-41_input_read.R b/R-pkg/tests/testthat/test-41_input_read.R index b26e6d8f..aef70b20 100644 --- a/R-pkg/tests/testthat/test-41_input_read.R +++ b/R-pkg/tests/testthat/test-41_input_read.R @@ -23,8 +23,8 @@ test_that("read_header", { expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 1) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 1) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 8) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$header_file, basename(file_name)) @@ -59,8 +59,8 @@ test_that("read_header", { expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 1) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 1) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 13) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$header_file, basename(file_name)) @@ -86,8 +86,8 @@ test_that("read_header", { expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 1) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 1) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 8) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$header_file, basename(file_name)) @@ -112,8 +112,8 @@ test_that("read_header", { expect_equal(res$n_cond, 6) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 1) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 1) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 13) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$header_file, basename(file_name)) @@ -142,8 +142,8 @@ test_that("read_header", { expect_equal(res$n_cond, 0) expect_equal(res$n_cond, length(res$cond_list)) expect_null(res$simu_mode) - expect_equal(res$n_loci_desc, 50) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 50) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 3) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 1) @@ -170,8 +170,8 @@ test_that("read_header", { expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 28) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 28) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 9) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 2) @@ -208,8 +208,8 @@ test_that("read_header", { expect_equal(res$n_cond, 2) expect_equal(res$n_cond, length(res$cond_list)) expect_equal(res$simu_mode, "DRAW UNTIL") - expect_equal(res$n_loci_desc, 28) - expect_equal(res$n_loci_desc, length(res$loci_desc)) + expect_equal(res$n_locus_desc, 28) + expect_equal(res$n_locus_desc, length(res$locus_desc)) expect_equal(res$n_prior, 9) expect_equal(res$n_prior, length(res$prior_list)) expect_equal(res$n_group, 5) @@ -345,4 +345,3 @@ test_that("read_reftable", { expect_false(res$valid) expect_true(length(res$msg) == 2) }) - From bda8161e8f14f53ce67f1edd90cc3003f640bfa1 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 17:07:56 +0200 Subject: [PATCH 73/82] function to read mss file (including missing pop check) --- R-pkg/R/43_data_read.R | 491 ++++++++++++++++++++++- R-pkg/tests/testthat/test-43_data_read.R | 27 ++ 2 files changed, 516 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R index 230aa138..41442ab7 100644 --- a/R-pkg/R/43_data_read.R +++ b/R-pkg/R/43_data_read.R @@ -587,8 +587,6 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, #' @param data_file string, data file name. #' @param data_dir string, path to directory where data file is stored. #' @importFrom tools file_ext -#' @importFrom parallel makeCluster stopCluster -#' @importFrom pbapply pblapply read_poolseq_snp_data <- function(data_file, data_dir) { # init output @@ -819,3 +817,492 @@ check_snp_poolseq <- function(content, mrc = 1) { return(out) } +#' Check Microsat/Sequence (GenePop) data file +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @param expected_data_file string, expected data file name for +#' existing project, default is NULL. +#' @importFrom tools file_ext +#' @importFrom readr read_file +read_mss_data <- function(data_file, data_dir) { + + # init output + out <- list( + msg = list(), valid = TRUE, + data_file = NULL, n_loci = NULL, locus_count = NULL, + n_pop = NULL, n_indiv = NULL, pop_size = NULL, + sex_ratio = NULL + ) + + # ## init output + # locus <- NULL + # locus_mode <- NULL + # locus_name <- NULL + # seq_length <- NULL + # n_loci <- NULL + # n_pop <- NULL + # n_indiv <- NULL + # pop_size <- NULL + + # full path + file_name <- file.path(data_dir, data_file) + + # check file_name + tmp <- check_file_name(file_name) + if(!tmp) { + out$valid <- FALSE + msg <- tagList("Invalid data file name.") + out$msg <- append(out$msg, list(msg)) + } + + # check file content + if(file.info(file_name)$size == 0) { + out$valid <- FALSE + msg <- tagList("Data file is empty.") + out$msg <- append(out$msg, list(msg)) + } + + # check file_type + if(tools::file_ext(file_name) != "mss") { + out$valid <- FALSE + msg <- tagList( + "IndSeq SNP files should have an extension", + tags$code(".mss"), "." + ) + out$msg <- append(out$msg, list(msg)) + } + + # continue ? + if(!out$valid) { + return(out) + } + + # data file name + out$data_file <- data_file + + ## DATA FILE CONTENT + + ## HEADER 1 + header1 <- readLines(file_name, n = 1, warn = FALSE) + + # sex ratio + pttrn <- "(?i)NM=[0-9]+\\.?[0-9]*NF(?-i)" + if(!str_detect(header1, pttrn)) { + out$valid <- FALSE + msg <- tagList( + "Missing", tags$b("sex ratio"), "in first line header." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + out$sex_ratio <- str_extract(header1, pttrn) + + ## FILE CONTENT + file_content <- unlist(str_split(read_file(file_name), "\n")) + + ## locus description + pttrn <- "(?<=<)(A|H|X|Y|M)(?=>)" + locus_pttrn_match <- str_extract(file_content, pttrn) + # check if present + if(all(is.na(locus_pttrn_match))) { + out$valid <- FALSE + msg <- tagList( + "Missing locus description, see manual" + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check if description follows title line + locus_match_ind <- which(!is.na(locus_pttrn_match)) + if(!all(locus_match_ind == 2:tail(locus_match_ind, 1))) { + out$valid <- FALSE + msg <- tagList( + "Locus description should be located", + "at beginning of data file, after title line,", + "with a single locus per line, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + # check locus description format + pttrn <- "^[A-Za-z0-9\\s_\\-]+ <(A|H|X|Y|M)>$" + locus_desc_match_ind <- which(str_detect(file_content, pttrn)) + if(!all(locus_desc_match_ind == locus_match_ind)) { + out$valid <- FALSE + issue_line <- locus_match_ind[!locus_match_ind %in% + locus_desc_match_ind] + msg <- tagList( + "Issue with Microsat/Sequence locus description format at rows:", + tags$code(str_c(issue_line, collapse = ",")), ".", br(), + "You can use the following character to specify", + "locus names:", + tags$code("A-Z"), tags$code("a-z"), tags$code("0-9"), + tags$code("_"), tags$code("-"), "and", tags$code(" "), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## number of locus + out$n_loci <- length(locus_desc_match_ind) + + ## locus description + pttrn <- "(?<=<)(A|H|X|Y|M)(?=>$)" + locus_desc <- str_extract(file_content[locus_match_ind], pttrn) + + ## locus name + pttrn <- "^[A-Za-z0-9\\s_\\-]+(?= <)" + locus_name <- str_extract(file_content[locus_match_ind], pttrn) + locus_name <- str_replace_all(locus_name, " +", "_") + # check + if(length(unique(locus_name)) != length(locus_name)) { + out$valid <- FALSE + msg <- tagList( + "Issue with locus description", + "each locus should have a unique name, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## population + pttrn <- "^(?i)pop(?-i)$" + pop_match_ind <- which(str_detect(file_content, pttrn)) + # check + if(length(pop_match_ind) == 0) { + out$valid <- FALSE + msg <- tagList( + "Keyword", tags$code("POP"), "is missing, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## remove unnecessary space + file_content <- str_replace_all( + str_trim(file_content), " +", " " + ) + + ## remove final empty line (if any) + if(tail(file_content, 1) == "") { + file_content <- head(file_content, length(file_content) - 1) + } + + ## number of pop + out$n_pop <- length(pop_match_ind) + + ## pop size + out$pop_size <- diff(c(pop_match_ind, length(file_content))) - + c(rep(1, out$n_pop - 1), 0) + + ## number of individuals + out$n_indiv <- sum(out$pop_size) + + ## population id + pop_id <- unlist(lapply( + 1:out$n_pop, + function(ind) return(rep(ind, each = out$pop_size[ind])) + )) + + ## EXTRACT DATA + data_ind <- head(pop_match_ind, 1):length(file_content) + data_ind <- data_ind[!data_ind %in% pop_match_ind] + data_content <- file_content[data_ind] + + # write data content to a temporary file + data_content <- str_replace_all( + str_replace_all(data_content, ",", " "), + " +", ";" + ) + tmp_file <- file.path(data_dir, "tmp_data_file.csv") + tmp <- writeLines(data_content, tmp_file) + on.exit({ + if(file.exists(tmp_file)) + fs::file_delete(tmp_file) + }) + + # read data as data.frame + data_content <- tryCatch( + read.table(tmp_file, sep = ";", stringsAsFactors = FALSE, + colClasses = "character"), + error = function(e) e + ) + if("error" %in% class(data_content)) { + out$valid <- FALSE + msg <- tagList( + "Issue with Microsat/Sequences data file format, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ##### CHECK DATA + ## check number of loci + if(ncol(data_content) != out$n_loci + 1) { + out$valid <- FALSE + msg <- tagList( + "Number of declared loci at beginning of file", + "does not correspond to number of actual loci", + "in the data, see manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## format data.frame + colnames(data_content) <- c("indiv", str_c("locus", 1:out$n_loci)) + data_content$pop <- as.character(pop_id) + + ## indiv name first column + if(!all(str_detect(data_content$indiv, "[A-Za-z0-9_-]+"))) { + out$valid <- FALSE + msg <- tagList( + "First column should provide individual names,", + "you can use the following character to specify", + "such names:", + tags$code("A-Z"), tags$code("a-z"), tags$code("0-9"), + tags$code("_"), tags$code("-"), "and", tags$code(" "), "." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## check locus encoding + microsat_hap_encoding <- "^[0-9]{3}$" + microsat_dip_encoding <- "^[0-9]{6}$" + microsat_x_encoding <- "^([0-9]{3}|[0-9]{6})$" + nucleotid_encoding <- "[ATCG]*" + seq_hap_encoding <- str_c("^<\\[", nucleotid_encoding, "\\]>") + seq_dip_encoding <- str_c( + "^<\\[", nucleotid_encoding, "\\]\\[", nucleotid_encoding, "\\]>" + ) + seq_x_encoding <- str_c( + "^<\\[", nucleotid_encoding, "\\](\\[", nucleotid_encoding, "\\])?>" + ) + + ## locus data + locus_data <- data_content[,!colnames(data_content) %in% c("indiv", "pop")] + # issue when a single locus + if(ncol(data_content) == 3) { + locus_data <- data.frame(locus1 = locus_data) + } + + ### microsat locus + microsat_hap_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, microsat_hap_encoding))) + } + ) & (locus_desc %in% c("H", "Y", "M")) + ) + microsat_dip_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, microsat_dip_encoding))) + } + ) & (locus_desc == "A") + ) + microsat_x_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, microsat_x_encoding))) + } + ) & (locus_desc == "X") + ) + + ### seq locus + seq_hap_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, seq_hap_encoding))) + } + ) & (locus_desc %in% c("H", "Y", "M")) + ) + seq_dip_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, seq_dip_encoding))) + } + ) & (locus_desc == "A") + ) + seq_x_locus <- which( + apply( + locus_data, 2, function(loc) { + return(all(str_detect(loc, seq_x_encoding))) + } + ) & (locus_desc == "X") + ) + + ## check that A are diploid locus + if(!all(which(locus_desc == "A") %in% + c(microsat_dip_locus, seq_dip_locus))) { + out$valid <- FALSE + msg <- tagList( + "Autosomal diploid (i.e. identified by a", tags$code("A"), ")", + "should all be diploid loci." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## check that H, M and Y are haploid locus + if(!all(which(locus_desc %in% c("H", "M", "Y")) %in% + c(microsat_hap_locus, seq_hap_locus))) { + out$valid <- FALSE + msg <- tagList( + "Autosomal haploid loci", + "(i.e. identified by a", tags$code("H"), ")", + "Y-linked loci (i.e. identified by a", tags$code("Y"), ")", + "and Mitochondrial loci", + "(i.e. identified by a", tags$code("M"), ")", + "should all be haploid loci." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ### check if other issue with formating + if(!all(sort(c(microsat_hap_locus, microsat_dip_locus, + microsat_x_locus, + seq_hap_locus, seq_dip_locus, + seq_x_locus)) == + 1:out$n_loci)) { + out$valid <- FALSE + msg <- tagList( + "Issue with data content of columns:", + tags$code(str_c( + (1:n_loci)[!(1:n_loci) %in% + c(microsat_hap_locus, + microsat_dip_locus, + microsat_x_locus, + seq_hap_locus, + seq_dip_locus, + seq_x_locus)], + collapse = ", " + )), br(), + "See manual." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## locus mode (microsat or sequence, hap or dip) + tmp_locus_mode <- data.frame( + mode = c( + rep("M", length(microsat_hap_locus) + length(microsat_dip_locus) + + length(microsat_x_locus)), + rep("S", length(seq_hap_locus) + length(seq_dip_locus) + + length(seq_x_locus)) + ), + index = c(microsat_hap_locus, microsat_dip_locus, + microsat_x_locus, + seq_hap_locus, seq_dip_locus, + seq_x_locus) + ) + locus_mode <- tmp_locus_mode$mode[order(tmp_locus_mode$index)] + + ## check seq locus length + seq_length <- unlist(lapply( + which(locus_mode == "S"), + function(col_ind) { + tmp <- unlist(lapply( + 1:out$n_indiv, + function(row_ind) { + return( + str_length(str_extract_all( + locus_data[row_ind,col_ind], + "\\[[ATCG]*\\]", + simplify = TRUE + )) - 2 + ) + } + )) + + out <- max(tmp) + if(!all(unique(tmp) %in% c(0, out))) { + return(-100) + } else { + return(out) + } + } + )) + + if(any(seq_length == -100)) { + out$valid <- FALSE + msg <- tagList( + "Non-missing sequence data attached to following loci", + tags$code(str_c(which(seq_length == -100), collapse = ", ")), + "do not have the same length in all", + "individuals." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## check for missing data in microsat locus + microsat_missing_encoding <- "^(0{3}|0{6})$" + missing_microsat <- apply( + locus_data[,which(locus_mode == "M")], c(1,2), + function(item) return(str_detect(item, microsat_missing_encoding)) + ) + + missing_pop <- apply( + missing_microsat, 2, + function(item) { + return(any(unlist(tapply(item, pop_id, sum)) == out$pop_size)) + } + ) + + if(any(missing_pop)) { + out$valid <- FALSE + msg <- tagList( + "Issue with missing data for entire population(s)", + "regarding microsat locus:", + tags$code(str_c(which(missing_pop), collapse = ", ")), br(), + "Remove this locus (these loci) from your dataset." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## check for missing data in seq locus + seq_missing_encoding <- "^<\\[\\](\\[\\])?>$" + missing_seq <- apply( + locus_data[,which(locus_mode == "S")], c(1,2), + function(item) return(str_detect(item, seq_missing_encoding)) + ) + + missing_pop <- apply( + missing_seq, 2, + function(item) { + return(any(unlist(tapply(item, pop_id, sum)) == out$pop_size)) + } + ) + + if(any(missing_pop)) { + out$valid <- FALSE + msg <- tagList( + "Issue with missing data for entire population(s)", + "regarding sequence locus:", + tags$code(str_c(which(missing_pop), collapse = ", ")), br(), + "Remove this locus (these loci) from your dataset." + ) + out$msg <- append(out$msg, list(msg)) + return(out) + } + + ## locus count + out$locus_count <- as.data.frame(table(locus_desc, locus_mode)) + colnames(out$locus_count) <- c("type", "mode", "count") + + ## save locus description and mode + out$locus_desc <- locus_desc + out$locus_mode <- locus_mode + + ## output + return(out) +} diff --git a/R-pkg/tests/testthat/test-43_data_read.R b/R-pkg/tests/testthat/test-43_data_read.R index 812131f3..74a67594 100644 --- a/R-pkg/tests/testthat/test-43_data_read.R +++ b/R-pkg/tests/testthat/test-43_data_read.R @@ -311,3 +311,30 @@ test_that("check_snp_poolseq", { expect_equal(res$locus_count$mono, 5918) }) +test_that("read_mss_data", { + + ## good file + data_file <- "mss_example_001.mss" + data_dir <- data4test_dir("mss") + res <- read_mss_data(data_file, data_dir) + expect_true(out$valid) + expect_equal(length(out$msg), 0) + expect_equal(res$data_file, data_file) + expect_equal(res$n_loci, 28) + expect_true(is.data.frame(res$locus_count)) + expect_equal(sum(res$locus_count$count), res$n_loci) + expect_equal(res$n_pop, 3) + expect_equal(res$n_indiv, 60) + expect_equal(res$pop_size, c(20,20,20)) + expect_equal(res$sex_ratio, "NM=2.33333NF") + expect_equal(length(res$locus_desc), res$n_loci) + expect_equal(length(res$locus_mode), res$n_loci) + + ## bad file + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(example_dir(), + "IndSeq_SNP_estim_param") + out <- read_mss_data(data_file, data_dir) + expect_false(out$valid) +}) + From c5e6412e7d0e5324560d215256140531742a10e4 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 17:16:36 +0200 Subject: [PATCH 74/82] check data file --- R-pkg/R/44_data_check.R | 28 ++++++++++++++++++++++ R-pkg/tests/testthat/test-44_data_check.R | 29 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 R-pkg/R/44_data_check.R create mode 100644 R-pkg/tests/testthat/test-44_data_check.R diff --git a/R-pkg/R/44_data_check.R b/R-pkg/R/44_data_check.R new file mode 100644 index 00000000..39a00c1a --- /dev/null +++ b/R-pkg/R/44_data_check.R @@ -0,0 +1,28 @@ +#' Check data file +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @param locus_type string, locus type `"mss"` or `"snp"`. +#' @param seq_mode string, `"indseq"` or `"poolseq"`. +check_data_file <- function(data_file, data_dir, locus_type, seq_mode) { + # output + out <- NULL + # ## debugging + # logging("data_file = ", data_file) + ## mss locus + if(locus_type == "mss") { + out <- read_mss_data(data_file, data_dir) + ## snp locus / indseq + } else if((locus_type == "snp") & (seq_mode == "indseq")) { + out <- read_indseq_snp_data(data_file, data_dir) + ## snp locus / poolseq + } else if((locus_type == "snp") & (seq_mode == "poolseq")) { + out <- read_poolseq_snp_data(data_file, data_dir) + } else { + stop("Issue with input arguments") + } + + ## output + return(out) +} \ No newline at end of file diff --git a/R-pkg/tests/testthat/test-44_data_check.R b/R-pkg/tests/testthat/test-44_data_check.R new file mode 100644 index 00000000..bf9100b8 --- /dev/null +++ b/R-pkg/tests/testthat/test-44_data_check.R @@ -0,0 +1,29 @@ +context("44_data_check") + +test_that("check_data_file", { + # snp indseq + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(example_dir(), + "IndSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "indseq" + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) + + # snp poolseq + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(example_dir(), + "PoolSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "poolseq" + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) + + # mss + data_file <- "mss_example_001.mss" + data_dir <- data4test_dir("mss") + locus_type <- "mss" + seq_mode <- NULL + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) +}) From 5e2caa1796894c18c5fed18838d1fe7e7cd94318 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 17:17:01 +0200 Subject: [PATCH 75/82] remove functions due to refactoring --- R-pkg/R/41_io_management.R | 1380 +----------------- R-pkg/tests/testthat/test-41_io_management.R | 231 --- 2 files changed, 1 insertion(+), 1610 deletions(-) diff --git a/R-pkg/R/41_io_management.R b/R-pkg/R/41_io_management.R index 757e1e39..6d10c5f9 100644 --- a/R-pkg/R/41_io_management.R +++ b/R-pkg/R/41_io_management.R @@ -1,1379 +1,3 @@ -#' Check data file -#' @keywords internal -#' @author Ghislain Durif -#' @param data_file string, data file name. -#' @param data_dir string, path to directory where data file is stored. -#' @param locus_type string, locus type `"mss"` or `"snp"`. -#' @param seq_mode string, `"indseq"` or `"poolseq"`. -#' @param expected_data_file string, expected data file name for -#' existing project, default is NULL. -#' @importFrom tools file_ext -check_data_file <- function(data_file, data_dir, locus_type, seq_mode, - expected_data_file = NULL) { - # output - out <- NULL - # ## debugging - # logging("data_file = ", data_file) - ## mss locus - if(locus_type == "mss") { - out <- check_mss_data_file(data_file, data_dir, expected_data_file) - ## snp locus / indseq - } else if((locus_type == "snp") & (seq_mode == "indseq")) { - out <- check_indseq_snp_data_file(data_file, data_dir, - expected_data_file) - ## snp locus / poolseq - } else if((locus_type == "snp") & (seq_mode == "poolseq")) { - out <- check_poolseq_snp_data_file(data_file, data_dir, - expected_data_file) - } else { - stop("Issue with input arguments") - } - - ## output - return(out) -} - -#' Check IndSeq SNP data file -#' @keywords internal -#' @author Ghislain Durif -#' @param data_file string, data file name. -#' @param data_dir string, path to directory where data file is stored. -#' @param expected_data_file string, expected data file name for -#' existing project, default is NULL. -#' @importFrom tools file_ext -#' @importFrom parallel makeCluster stopCluster -#' @importFrom pbapply pblapply -check_indseq_snp_data_file <- function(data_file, data_dir, - expected_data_file = NULL) { - # init output and intermediate - header <- NULL - info <- NULL - spec <- NULL - valid <- TRUE - - content <- NULL - indiv_info <- NULL - - locus <- NULL - locus_details <- NULL - n_loci <- NULL - n_pop <- NULL - n_indiv <- NULL - snp_type <- NULL - maf <- NULL - - err <- list() - msg <- list() - - # data path - data_path <- file.path(data_dir, data_file) - - ## file exists and is not empty? - if(!file.exists(data_path)) { - err <- append(err, "Input data file does not exist") - valid <- FALSE - } else if(file.info(data_path)$size == 0) { - err <- append(err, "Input data file is empty") - valid <- FALSE - } else { - ## check file type - if(tools::file_ext(data_path) != "snp") { - err <- append( - err, - "IndSeq SNP files should have an extension '.snp'." - ) - valid <- FALSE - } else { - ## info - info <- readLines(data_path, n = 1, warn = FALSE) - # sex ratio - pttrn <- "(?<=<)NM=[0-9]+\\.?[0-9]*NF(?=>)" - if(str_detect(info, pttrn)) { - msg <- append( - msg, - str_c("Sex ratio:", str_extract(info, pttrn), sep = " ") - ) - } else { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file header first line:", - "missing 'sex ratio', see manual", sep = " " - ) - ) - valid <- FALSE - } - # MAF - pttrn <- "(?<=<)MAF=[:graph:]+(?=>)" - if(str_detect(info, pttrn)) { - msg <- append( - msg, - str_c("Minimum allele frequency criterion:", - str_extract(info, pttrn), sep = " ") - ) - # numeric MAF - pttrn <- "(?<=)" - if(str_detect(info, pttrn)) { - tmp_maf <- str_extract(info, pttrn) - maf <- as.numeric(tmp_maf) - if(maf < 0 | maf > 1) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file header first line:", - "MAF should be a real number between 0 and 1", - "or the keyword 'hudson', see manual.", - sep = " " - ) - ) - valid <- FALSE - } - } - } else { - msg <- append( - msg, - str_c( - "Missing 'minimum allele frequency criterion (MAF)'", - "in IndSeq SNP file header first line:", - "Hudson algorithm will be used.", sep = " " - ) - ) - } - # additional info - pttrn <- str_c( - "()", - "()", - sep = "|" - ) - add_info <- str_trim(str_replace_all(info, pttrn, "")) - if(str_length(add_info) > 0) { - msg <- append( - msg, - str_c("Additional information:", add_info, sep = " ") - ) - } - ## header - header <- tryCatch( - unname(unlist(read.table(file = data_path, - skip = 1, nrows = 1))), - error = function(e) e - ) - if("error" %in% class(header)) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file header second line:", - header$message, sep = " " - ) - ) - valid <- FALSE - } else { - # upper case - header <- str_to_upper(header) - # header format - if(header[1] != "IND" & header[2] != "SEX" & - header[3] != "POP" & - !all(header[-(1:3)] %in% c("A", "H", "X", "Y", "M"))) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file header second line:", - "required format is 'IND SEX POP' followed by", - "a letter indicator among 'A', 'H', 'X', 'Y', 'M'", - "for each SNP locus in the file (see manual).", - sep = " " - ) - ) - valid <- FALSE - } - # nb of locus - n_loci <- length(header) - 3 - # locus type - candidate_locus <- c("A", "H", "X", "Y", "M") - locus_encoding <- str_c(header[-(1:3)], collapse = " ") - locus_details <- Reduce("rbind", lapply( - candidate_locus, - function(pttrn) { - count <- str_count(locus_encoding, pttrn) - return(data.frame( - count = count, - type = pttrn, - stringsAsFactors = FALSE - )) - } - )) - # save snp type for filtering - snp_type <- header[-(1:3)] - # merge header - header_length <- length(header) - header <- str_c(header[1:min(20,header_length)], - collapse = " ") - if(header_length > 20) - header <- str_c(header, "...", sep = " ") - msg <- append( - msg, - str_c("header:", str_c("'", header, "'"), sep = " ") - ) - ## content - content <- tryCatch( - read.table(file = data_path, skip = 2), - error = function(e) e - ) - if("error" %in% class(content)) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP data file content:", - content$message, sep = " " - ) - ) - valid <- FALSE - } else { - # check for SEX column with only F (interpreted as FALSE) - if(is.logical(content[,2])) { - content[,2] <- ifelse(content[,2], "T", "F") - } - # check number of locus - if(n_loci != (ncol(content) - 3)) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP data file content:", - "number of loci not consistent between", - "file header and file content.", - sep = " " - ) - ) - valid <- FALSE - } - # check sex content - if(!all(str_to_upper(as.character(content[,2])) %in% - c("F", "M", "9"))) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP data file content:", - "'SEX' column should contain only", - "'F' for female, 'M' for male", - "or '9' for missing values (see manual).", - sep = " " - ) - ) - valid <- FALSE - } - # number of pop - n_pop <- length(unique(content[,3])) - # number of individuals - n_indiv <- nrow(content) - msg <- append( - msg, - str_c( - "Sample:", - n_indiv, "individuals", - "from", n_pop, "populations", - sep = " ") - ) - # reformat data - indiv_info <- content[,1:3] - colnames(indiv_info) <- c("IND", "SEX", "POP") - content <- t(as.matrix(content[,-(1:3)])) - # check for any - if(any(is.na(content)) | any(is.na(indiv_info))) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file data content:", - "NA values were found", - sep = " " - ) - ) - valid <- FALSE - } else { - # check SNP encoding - if(!all(content %in% c(0,1,2,9))) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file data content:", - "SNP encoding should be '0', '1', '2'", - "or '9' for missing data (see manual).", - sep = " " - ) - ) - valid <- FALSE - } - } - } - } - } - ## filtering locus - if(valid) { - filtered_locus <- filter_snp_indseq( - content, indiv_info, snp_type, locus_details, maf - ) - if(is.null(filtered_locus)) { - err <- append( - err, - str_c( - "Issue with IndSeq SNP file data content:", - "error during SNP filtering, ", - "probable issue with SNP data encoding (see manual).", - sep = " " - ) - ) - valid <- FALSE - } else { - locus <- unname(unlist(lapply( - split(filtered_locus, seq(nrow(filtered_locus))), - function(item) { - if(item$count > 0) - return(str_c(item$count - item$filter, - " <", item$type, ">")) - else - return(NULL) - } - ))) - locus_msg <- unname(unlist(lapply( - split(filtered_locus, seq(nrow(filtered_locus))), - function(item) { - if(item$count > 0) { - item_type <- str_c("<", item$type, ">") - txt <- str_c( - item$count - item$filter, item_type, sep = " " - ) - if(item$filter > 0) { - txt <- str_c( - item$count - item$filter, item_type, - "(note:", item$filter, item_type, - "loci are filtered out", - "based on MAF criterion)", sep = " " - ) - } - if(item$mono > 0) { - str_c( - item$count - item$filter, item_type, - "(note:", item$filter, item_type, - "loci, including ", - item$mono, item_type, - "monomorphic loci, are filtered out", - "based on MAF criterion)", sep = " " - ) - } - return(txt) - } else - return(NULL) - } - ))) - msg <- append( - msg, - str_c("Total number of loci =", n_loci, sep = " ") - ) - msg <- append( - msg, - str_c("Available loci:", str_c(locus_msg, collapse = "; "), - sep = " ") - ) - } - } - ## output - spec <- lst(locus, n_indiv, n_loci, n_pop) - } - ## expected data file ? - if(!is.null(expected_data_file)) { - if(data_file != expected_data_file) { - err <- append( - err, - str_c( - "Data file expected by provided 'header.txt'", - "or 'headerRF.txt' file(s)", - "and data file provided by user are different.", - sep = " " - ) - ) - valid <- FALSE - } - } - ## output - out <- lst(header, info, spec, valid, err, msg) - return(out) -} - -#' Locus specific filter for IndSeq SNP data based on MAF -#' @keywords internal -#' @author Ghislain Durif -#' @param snp_data integer vector encoding for each individual the -#' number of ancestral allele for the loci, i.e. `0`, `1` and `2` for -#' autosome (`A`) and X-chromosome (`X`) in female, or `0` and `1` for -#' haploid locus (`H`), Y-chromosome (`Y`) in male and -#' mitochondrial locus (`M`). -#' Note : missing values are encoded by a `9`. -#' @param sex_data character vector encoding for each individual the sex, i.e. -#' `"F"` for female and `"M"` for male. -#' Note : missing values are encoded by a `"9"`. -#' @param locus_type character encoding the type of the locus -#' (among `A`, `H`, `X`, `Y`, `M`). -#' @param maf double between 0 and 1 -indseq_locus_filter <- function(snp_data, sex_data, locus_type, maf) { - - # init - af <- 0 - issue <- FALSE - - # identify missing data and compute data size without missing data - non_missing_data <- (snp_data != 9) - true_data_size <- sum(non_missing_data) - # identify female, male, and missing sex - female_ind <- (sex_data == "F") - male_ind <- (sex_data == "M") - missing_sex <- (sex_data == "9") - - if(locus_type == "A") { - ## autosome - if(true_data_size > 0) { - # reference allele frequence - af <- sum(snp_data[non_missing_data]) / (2 * true_data_size) - } - } else if(locus_type %in% c("H", "M")) { - ## haploid & mitochondrial - if(true_data_size > 0) { - # reference allele frequence - af <- sum(snp_data[non_missing_data]) / true_data_size - } - } else if(locus_type == "X") { - ## X-chromosome - issue <- any(missing_sex & non_missing_data) | - any(male_ind & (snp_data == 2)) - - specific_ind <- non_missing_data & !missing_sex - - specific_data_size <- sum(specific_ind) - - if(specific_data_size > 0) { - weighted_data_size <- 2 * sum(non_missing_data & female_ind) + - sum(non_missing_data & male_ind) - # reference allele frequence - af <- sum(snp_data[specific_ind]) / weighted_data_size - } - } else if(locus_type == "Y") { - ## Y-chromosome - issue <- any(missing_sex & non_missing_data) | - any(male_ind & (snp_data == 2)) | - any(female_ind & (snp_data != 9)) - - specific_ind <- non_missing_data & male_ind - - specific_data_size <- sum(specific_ind) - - if(specific_data_size > 0) { - # reference allele frequence - af <- sum(snp_data[specific_ind]) / specific_data_size - } - } - - # filtering - # TODO check > or >= for maf filter - filter_ind <- !((af >= maf) & ((1 - af) >= maf)) - mono_ind <- !((af > 0) & ((1 - af) > 0)) - if(maf == 0) { - filter_ind <- mono_ind - } - - # filter - return(data.frame( - filter = filter_ind, - mono = mono_ind, - issue = issue, - stringsAsFactors = FALSE - )) -} - -#' Filter IndSeq SNP data based on MAF -#' @keywords internal -#' @author Ghislain Durif -#' @param content data.frame with data file content, with columns -#' `IND` (individual id), `SEX` (female or male), `POP` (population id), -#' and remaining columns corresponding to SNPs where each entry encode the -#' number of ancestral allele for the loci, i.e. `0`, `1` and `2` for -#' autosome (`A`) and X-chromosome (`X`) in female, or `0` and `1` for -#' haploid locus (`H`), Y-chromosome (`Y`) in male and -#' mitochondrial locus (`M`). -#' Note : missing values are encoded by a `9`. -#' @param snp_type vector of column header, being `IND`, `SEX`, `POP` followed -#' by each locus type (among `A`, `H`, `X`, `Y`, `M`). -#' @param locus_details data.frame with two columns, `count` being the number -#' of locus for each type in the data, and `type` being the corresponding locus -#' type (among `A`, `H`, `X`, `Y`, `M`). -#' @importFrom dplyr bind_rows -filter_snp_indseq <- function(content, indiv_info, snp_type, locus_details, - maf=0.05) { - - ncore <- getOption("diyabcGUI")$ncore - snp_filter <- NULL - - if(is.null(maf)) maf <- 0 - - snp_filter <- pblapply( - 1:nrow(content), - function(ind) { - out <- indseq_locus_filter( - snp_data = content[ind,], - sex_data = indiv_info$SEX, - locus_type = snp_type[ind], - maf = maf - ) - }, - cl = ncore - ) - - seek_error <- unlist(lapply( - snp_filter, - function(item) "try-error" %in% class(item) - )) - if(any(seek_error)) { - error_msg <- attributes( - snp_filter[[ which(seek_error[1]) ]] - )$condition$message - err <- str_c( - "Issue when checking IndSeq SNP data file", - "content:", - error_msg, - sep = " " - ) - pprint(err) - return(NULL) - } else { - # no error - snp_filter <- Reduce("bind_rows", snp_filter) - - # extract number of filtered loci by type - tmp_filter <- tapply( - snp_filter$filter, - snp_type, - sum - ) - tmp_filter <- data.frame( - filter=tmp_filter, type=names(tmp_filter), - row.names=NULL, stringsAsFactors = FALSE - ) - - # extract number of monomorphic loci by type - tmp_mono <- tapply( - snp_filter$mono, - snp_type, - sum - ) - tmp_mono <- data.frame( - mono=tmp_mono, type=names(tmp_mono), - row.names=NULL, stringsAsFactors = FALSE - ) - - # extract number of loci with issue regarding sex by type - tmp_issue <- tapply( - snp_filter$issue, - snp_type, - sum - ) - tmp_issue <- data.frame( - issue=tmp_issue, type=names(tmp_issue), - row.names=NULL, stringsAsFactors = FALSE - ) - - # merge all results into locos_details table - locus_details <- merge(locus_details, tmp_filter) - locus_details <- merge(locus_details, tmp_mono) - locus_details <- merge(locus_details, tmp_issue) - } - - return(locus_details) -} - -#' Check PoolSeq SNP data file -#' @keywords internal -#' @author Ghislain Durif -#' @param data_file string, data file name. -#' @param data_dir string, path to directory where data file is stored. -#' @param expected_data_file string, expected data file name for -#' existing project, default is NULL. -#' @importFrom tools file_ext -check_poolseq_snp_data_file <- function(data_file, data_dir, - expected_data_file = NULL) { - # output - header <- NULL - info <- NULL - spec <- NULL - valid <- TRUE - - err <- list() - msg <- list() - - locus <- NULL - n_loci <- NULL - n_pop <- NULL - n_indiv <- NULL - mrc <- NULL - - # data path - data_path <- file.path(data_dir, data_file) - - ## file exists and is not empty? - if(!file.exists(data_path)) { - err <- append(err, "Input data file does not exist") - valid <- FALSE - } else if(file.info(data_path)$size == 0) { - err <- append(err, "Input data file is empty") - valid <- FALSE - } else { - ## check file type - if(tools::file_ext(data_path) != "snp") { - err <- append( - err, - "PoolSeq SNP files should have an extension '.snp'." - ) - valid <- FALSE - } else { - ## info - info <- readLines(data_path, n = 1, warn = FALSE) - # sex ratio - pttrn <- "(?<=<)NM=[0-9]+\\.?[0-9]*NF(?=>)" - if(str_detect(info, pttrn)) { - msg <- append( - msg, - str_c("Sex ratio:", str_extract(info, pttrn), sep = " ") - ) - } else { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file header first line:", - "missing 'sex ratio', see manual", sep = " " - ) - ) - valid <- FALSE - } - # MRC - pttrn <- "(?<=<)MRC=[0-9]+(?=>)" - if(str_detect(info, pttrn)) { - msg <- append( - msg, - str_c( - "Minimum read count:", - str_extract(info, pttrn), sep = " " - ) - ) - pttrn <- "(?<=)" - mrc <- as.integer(str_extract(info, pttrn)) - } else { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file header first line:", - "missing 'minimum read count (MRC)', see manual", - sep = " " - ) - ) - valid <- FALSE - } - # additional info - pttrn <- str_c( - "()", - "()", - sep = "|" - ) - add_info <- str_trim(str_replace_all(info, pttrn, "")) - if(str_length(add_info) > 0) { - msg <- append( - msg, - str_c("Additional information in data file header:", - add_info, sep = " ") - ) - } - ## header - header <- tryCatch( - as.vector(read.table(file = data_path, skip = 1, nrows = 1)), - error = function(e) e - ) - if("error" %in% class(header)) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file header second line:", - header$message, sep = " " - ) - ) - valid <- FALSE - } else { - # header format - if(header[1] != "POOL" & - header[2] != "POP_NAME:HAPLOID_SAMPLE_SIZE" & - !all(str_detect(header[-(1:2)], "POP[0-9]+:[0-9]+"))) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file header second line:", - "required format is", - "'POOL POP_NAME:HAPLOID_SAMPLE_SIZE' followed by", - "a character string 'POP:'", - "for each population in the file (see manual).", - sep = " " - ) - ) - valid <- FALSE - } - # number of population - n_pop <- length(header) - 2 - # merge header - header <- str_c(header, collapse = " ") - msg <- append( - msg, - str_c("header:", str_c("'", header, "'"), sep = " ") - ) - ## content - content <- tryCatch( - read.table(file = data_path, skip = 2), - error = function(e) e - ) - if("error" %in% class(content)) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP data file content:", - content$message, sep = " " - ) - ) - valid <- FALSE - } else { - # check number of population - if(ncol(content) %% 2 != 0) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP data file content:", - "number of column should be even,", - "providing pair of counts for reference", - "and alternate alleles at each locus", - "in each popultation.", - sep = " " - ) - ) - valid <- FALSE - } - if((ncol(content) / 2) != n_pop) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP data", - "file second-line header", - "or content:", - "number of population indicated in", - "file second-line header", - "does not correspond to", - "number of columns in file content.", - sep = " " - ) - ) - valid <- FALSE - } - msg <- append( - msg, - str_c("Number of population(s) =", n_pop, sep = " ") - ) - # nb of locus - n_loci <- nrow(content) - msg <- append( - msg, - str_c("Total number of loci =", n_loci, sep = " ") - ) - # check data encoding - check_snp_encoding <- apply(content, 2, is.integer) - if(!all(unlist(check_snp_encoding))) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file data content:", - "SNP encoding should be read counts,", - "i.e. positive integer (see manual).", - sep = " " - ) - ) - valid <- FALSE - } - if(any(is.na(content))) { - err <- append( - err, - str_c( - "Issue with PoolSeq SNP file data content:", - "no missing values are allowed.", - sep = " " - ) - ) - valid <- FALSE - } - # loci - if(valid) { - # filtering locus - # TODO check (> or >=) and (& or |) for mrc filter - allele1_count <- apply( - content[,rep(c(TRUE,FALSE), n_pop)], 1, sum - ) - allele2_count <- apply( - content[,rep(c(FALSE,TRUE), n_pop)], 1, sum - ) - snp_filter <- (allele1_count >= mrc) & - (allele2_count >= mrc) - # locus type - locus <- str_c(sum(snp_filter), "", sep = " ") - msg <- append( - msg, - str_c( - "Available loci:", locus, - "(note:", sum(!snp_filter), - "loci are filtered out", - "based on MRC criterion)", - sep = " " - ) - ) - } - } - } - } - # output - spec <- lst(locus, n_indiv, n_loci, n_pop) - } - ## expected data file ? - if(!is.null(expected_data_file)) { - if(data_file != expected_data_file) { - err <- append( - err, - str_c( - "Data file expected by provided 'header.txt'", - "or 'headerRF.txt' file(s)", - "and data file provided by user are different.", - sep = " " - ) - ) - valid <- FALSE - } - } - ## output - out <- lst(header, info, spec, valid, err, msg) - return(out) -} - -#' Check Microsat/Sequence (GenePop) data file -#' @keywords internal -#' @author Ghislain Durif -#' @param data_file string, data file name. -#' @param data_dir string, path to directory where data file is stored. -#' @param expected_data_file string, expected data file name for -#' existing project, default is NULL. -#' @importFrom tools file_ext -check_mss_data_file <- function(data_file, data_dir, - expected_data_file = NULL) { - # output - header <- NULL - info <- NULL - spec <- NULL - valid <- TRUE - - err <- list() - msg <- list() - - # data path - data_path <- file.path(data_dir, data_file) - - ## file exists and is not empty? - if(!file.exists(data_path)) { - err <- append(err, "Input data file does not exist") - valid <- FALSE - } else if(file.info(data_path)$size == 0) { - err <- append(err, "Input data file is empty") - valid <- FALSE - } else { - ## init output - locus <- NULL - locus_mode <- NULL - locus_name <- NULL - seq_length <- NULL - n_loci <- NULL - n_pop <- NULL - n_indiv <- NULL - pop_size <- NULL - ## check file type - if(tools::file_ext(data_path) != "mss") { - err <- append( - err, - "Microsat/Sequence files should have an extension '.mss'" - ) - valid <- FALSE - } else { - ## info - info <- readLines(data_path, n = 1, warn = FALSE) - # sex ratio - pttrn <- "(?<=<)NM=[0-9]+\\.?[0-9]*NF(?=>)" - if(str_detect(info, pttrn)) { - msg <- append( - msg, - str_c("Sex ratio:", str_extract(info, pttrn), sep = " ") - ) - } else { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file header first line:", - "missing 'sex ratio', see manual", sep = " " - ) - ) - valid <- FALSE - } - # additional info - pttrn <- "" - add_info <- str_trim(str_replace_all(info, pttrn, "")) - if(str_length(add_info) > 0) { - msg <- append( - msg, - str_c("Additional information:", add_info, sep = " ") - ) - } - - ## file content - file_content <- readLines(data_path, warn = FALSE) - - ## locus description - pttrn <- "(?<=<)(A|H|X|Y|M)(?=>)" - locus_pttrn_match <- str_extract(file_content, pttrn) - # check if present - if(all(is.na(locus_pttrn_match))) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file locus description:", - "missing locus description, see manual", sep = " " - ) - ) - valid <- FALSE - } else { - # check if description follows title line - locus_match_ind <- which(!is.na(locus_pttrn_match)) - if(!all(locus_match_ind == 2:tail(locus_match_ind, 1))) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file locus", - "description:", - "locus description should be located", - "at beginning of data file, after title line,", - "with a single locus per line, see manual", - sep = " " - ) - ) - valid <- FALSE - } - - # check locus format - # pttrn <- "^(Locus )?[A-Za-z0-9_-]+ <(A|H|X|Y|M)>$" - pttrn <- "^[A-Za-z0-9\\s_\\-]+ <(A|H|X|Y|M)>$" - locus_spec_match_ind <- which(str_detect(file_content, pttrn)) - if(!all(locus_spec_match_ind %in% locus_match_ind) & - !all(locus_match_ind %in% locus_spec_match_ind)) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file locus", - "description format at rows:", - str_c( - locus_spec_match_ind[!locus_spec_match_ind %in% locus_match_ind], - collapse = ", " - ), - sep = " " - ), - str_c( - "You can use the following character to specify", - "locus names:", - "'A-Z', 'a-z', '0-9', '_', '-' and ' '", - sep = " " - ) - ) - valid <- FALSE - } - - ## number of locus - n_loci <- length(locus_match_ind) - msg <- append( - msg, - str_c("Number of loci =", n_loci, sep = " ") - ) - - ## locus info - pttrn <- "(?<=<)(A|H|X|Y|M)(?=>$)" - locus <- str_extract(file_content[locus_match_ind], pttrn) - - ## locus name - pttrn <- "^[A-Za-z0-9\\s_\\-]+(?= <)" - locus_name <- str_extract(file_content[locus_match_ind], pttrn) - locus_name <- str_replace_all(locus_name, " +", "_") - - if(length(unique(locus_name)) != length(locus_name)) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file locus", - "description:", - "each locus should have a unique name", - sep = " " - ) - ) - valid <- FALSE - } - } - - ## remove unnecessary space - file_content <- str_replace_all( - str_trim(file_content), - " +", " " - ) - - ## population - pttrn <- "^(?i)pop(?-i)$" - pop_match_ind <- which(str_detect(file_content, pttrn)) - # check - if(length(pop_match_ind) == 0) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequence file locus", - "data description:", - "keyword 'POP' is missing, see manual", - sep = " " - ) - ) - valid <- FALSE - } else { - # number of pop - n_pop <- length(pop_match_ind) - # pop size - pop_size <- diff(c(pop_match_ind, length(file_content))) - - c(rep(1, n_pop - 1), 0) - # number of individuals - n_indiv <- sum(pop_size) - # msg - msg <- append( - msg, - str_c( - "Sample:", - n_indiv, "individuals", - "from", n_pop, "populations", - sep = " ") - ) - - ## DATA content - data_ind <- head(pop_match_ind, 1):length(file_content) - data_ind <- data_ind[!data_ind %in% pop_match_ind] - data_content <- file_content[data_ind] - - # write data content to a temporary file - data_content <- str_replace_all( - str_replace_all(data_content, ",", " "), - " +", ";" - ) - tmpFile <- file.path(data_dir, "tmp_data_file.csv") - tmp <- writeLines(data_content, tmpFile) - on.exit({ - if(file.exists(tmpFile)) - fs::file_delete(tmpFile) - }) - - # read data as data.frame - data_content <- tryCatch( - read.table(tmpFile, sep = ";", stringsAsFactors = FALSE, - colClasses = "character"), - error = function(e) e - ) - if("error" %in% class(data_content)) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file content:", - content$message, sep = " " - ) - ) - valid <- FALSE - } else { - ##### check data - ## check number of loci - if(ncol(data_content) != n_loci + 1) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content:", - "number of declared loci at beginning of file", - "does not correspond to number of actual loci", - "in the data", sep = " " - ) - ) - valid <- FALSE - } - - # format data.frame - colnames(data_content) <- c("indiv", - str_c("locus", 1:n_loci)) - data_content$pop <- as.character(rep(1:n_pop, pop_size)) - - ## indiv name first column - if(!all(str_detect(data_content$indiv, - "[A-Za-z0-9_-]+"))) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content:", - "First column should provide individual names,", - "you can use the following character to specify", - "such names:", - "'A-Z', 'a-z', '0-9', '_', '-'", - sep = " " - ) - ) - valid <- FALSE - } - - ## check locus encoding - microsat_hap_encoding <- "^[0-9]{3}$" - microsat_dip_encoding <- "^[0-9]{6}$" - microsat_x_encoding <- "^[0-9]{3}|[0-9]{6}$" - nucleotid_encoding <- "[ATCG]*" - seq_hap_encoding <- str_c("^<\\[", - nucleotid_encoding, - "\\]>") - seq_dip_encoding <- str_c("^<\\[", - nucleotid_encoding, - "\\]\\[", - nucleotid_encoding, - "\\]>") - seq_x_encoding <- str_c("^<\\[", - nucleotid_encoding, - "\\](\\[", - nucleotid_encoding, - "\\])?>") - - # locus data - locus_data <- data_content[,2:(ncol(data_content) - 1)] - - if(ncol(data_content) == 3) { - locus_data <- data.frame(locus1 = locus_data) - } - - # microsat locus - microsat_hap_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - microsat_hap_encoding))) - } - ) & (! locus == "X") - ) - microsat_dip_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - microsat_dip_encoding))) - } - ) & (! locus == "X") - ) - microsat_x_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - microsat_x_encoding))) - } - ) & (locus == "X") - ) - - # seq locus - seq_hap_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - seq_hap_encoding))) - } - ) & (! locus == "X") - ) - seq_dip_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - seq_dip_encoding))) - } - ) & (! locus == "X") - ) - seq_x_locus <- which( - apply( - locus_data, 2, function(loc) { - return(all(str_detect(loc, - seq_x_encoding))) - } - ) & (locus == "X") - ) - - # check if issue with formating - if(!all(sort(c(microsat_hap_locus, microsat_dip_locus, - microsat_x_locus, - seq_hap_locus, seq_dip_locus, - seq_x_locus)) == - 1:n_loci)) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content in columns:", - str_c( - (1:n_loci)[!(1:n_loci) %in% - c(microsat_hap_locus, - microsat_dip_locus, - microsat_x_locus, - seq_hap_locus, - seq_dip_locus, - seq_x_locus)], - collapse = ", " - ), - sep = " " - ) - ) - valid <- FALSE - } else { - ## locus mode (microsat or sequence, hap or dip) - tmp_locus_mode <- data.frame( - mode = c(rep("microsat_hap", - length(microsat_hap_locus)), - rep("microsat_dip", - length(microsat_dip_locus)), - rep("microsat_xy", - length(microsat_x_locus)), - rep("seq_hap", - length(seq_hap_locus)), - rep("seq_dip", - length(seq_dip_locus)), - rep("seq_xy", - length(seq_x_locus))), - index = c(microsat_hap_locus, microsat_dip_locus, - microsat_x_locus, - seq_hap_locus, seq_dip_locus, - seq_x_locus) - ) - locus_mode <- - tmp_locus_mode$mode[order(tmp_locus_mode$index)] - - ## check that A are diploid locus - if(!all(which(locus == "A") %in% - c(microsat_dip_locus, seq_dip_locus))) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content:", - "Autosomal diploid (A)", - "should all be diploid loci.", - sep = " " - ) - ) - valid <- FALSE - } - - ## check that H, M and Y are haploid locus - if(!all(which(locus %in% c("H", "M", "Y")) %in% - c(microsat_hap_locus, seq_hap_locus))) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content:", - "Autosomal haploid (H), Y-linked (Y)", - "and Mitochondrial (M) loci", - "should all be haploid loci.", - sep = " " - ) - ) - valid <- FALSE - } - } - - ## check seq locus length - seq_length <- unlist(lapply( - 1:n_loci, - function(col_ind) { - if(col_ind %in% c(seq_hap_locus, seq_dip_locus, - seq_x_locus)) { - - tmp <- unlist(lapply( - 1:n_indiv, - function(row_ind) { - return( - str_length(str_extract_all( - locus_data[row_ind,col_ind], - "\\[[ATCG]*\\]", - simplify = TRUE - )) - 2 - ) - } - )) - - out <- max(tmp) - if(!all(unique(tmp) %in% c(0, out))) { - return(-100) - } else { - return(out) - } - } else { - return(NA) - } - } - )) - - if(any(seq_length[!is.na(seq_length)] == -100)) { - err <- append( - err, - str_c( - "Issue with Microsat/Sequences data file", - "content:", - "non-missing sequence data attached to", - "following loci", - str_c(which(seq_length == -100), - collapse = ", "), - "do not have the same length in all", - "individuals", - sep = " " - ) - ) - valid <- FALSE - } - } - } - } - - # output - spec <- lst(locus, locus_mode, locus_name, seq_length, - n_indiv, n_loci, n_pop, pop_size) - } - ## expected data file ? - if(!is.null(expected_data_file)) { - if(data_file != expected_data_file) { - err <- append( - err, - str_c( - "Data file expected by provided 'header.txt'", - "or 'headerRF.txt' file(s)", - "and data file provided by user are different.", - sep = " " - ) - ) - valid <- FALSE - } - } - ## output - out <- lst(header, info, spec, valid, err, msg) - return(out) -} - #' Parse diyabc simulation header file #' @keywords internal #' @author Ghislain Durif @@ -1381,6 +5,4 @@ check_mss_data_file <- function(data_file, data_dir, #' Content: see doc #' @param file_name string, (server-side) path to a headersim file. #' @param type string, MIME file type. -parse_diyabc_headersim <- function(file_name, type) { - -} \ No newline at end of file +parse_diyabc_headersim <- function(file_name, type) {} diff --git a/R-pkg/tests/testthat/test-41_io_management.R b/R-pkg/tests/testthat/test-41_io_management.R index 62c58633..db28bf02 100644 --- a/R-pkg/tests/testthat/test-41_io_management.R +++ b/R-pkg/tests/testthat/test-41_io_management.R @@ -1,232 +1 @@ context("io_management") - -test_that("check_data_file", { - # snp indseq - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - locus_type <- "snp" - seq_mode <- "indseq" - out <- check_data_file(data_file, data_dir, locus_type, seq_mode, - expected_data_file = data_file) - expect_true(out$valid) - - # snp poolseq - data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(example_dir(), - "PoolSeq_SNP_estim_param") - locus_type <- "snp" - seq_mode <- "poolseq" - out <- check_data_file(data_file, data_dir, locus_type, seq_mode, - expected_data_file = data_file) - expect_true(out$valid) - - # mss - data_file <- "mss_example_001.mss" - data_dir <- data4test_dir("mss") - locus_type <- "mss" - seq_mode <- NULL - out <- check_data_file(data_file, data_dir, locus_type, seq_mode, - expected_data_file = data_file) - expect_true(out$valid) -}) - -test_that("check_indseq_snp_data_file", { - - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - out <- check_indseq_snp_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_true(out$valid) - - data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(example_dir(), - "PoolSeq_SNP_estim_param") - out <- check_indseq_snp_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_false(out$valid) -}) - -test_that("indseq_locus_filter", { - # generate test data - n_indiv <- 100 - n_pop <- 5 - indiv_id <- str_c("ind", 1:n_indiv) - indiv_sex <- sample(c("F", "M"), size = n_indiv, replace = TRUE) - indiv_pop <- sample(1:n_pop, size = n_indiv, replace = TRUE) - content <- data.frame( - IND = str_c("ind", 1:n_indiv), - SEX = indiv_sex, - POP = indiv_pop, - A = sample(0:2, size = n_indiv, replace = TRUE), - H = sample(0:1, size = n_indiv, replace = TRUE), - X = ifelse( - indiv_sex == "F", - sample(0:2, size = n_indiv, replace = TRUE), - sample(0:1, size = n_indiv, replace = TRUE) - ), - Y = ifelse( - indiv_sex == "F", - rep(9, n_indiv), - sample(0:1, size = n_indiv, replace = TRUE) - ), - M = sample(0:1, size = n_indiv, replace = TRUE), - stringsAsFactors = FALSE - ) - - col_type <- colnames(content) - - locus_details <- data.frame( - count = rep(1, 5), - type = tail(col_type, 5), - stringsAsFactors = FALSE - ) - - maf <- 0.05 - - # test without missing values at random - snp_filter <- lapply( - 1:(ncol(content) - 3) + 3, - function(ind) { - out <- indseq_locus_filter( - snp_data = content[,ind], - sex_data = content[,2], - locus_type = col_type[ind], - maf = maf - ) - } - ) - snp_filter <- Reduce("rbind", snp_filter) - - expect_true(is.data.frame(snp_filter)) - expect_identical(colnames(snp_filter), c("filter", "mono", "issue")) - }) - -test_that("filter_snp_indseq", { - ## generate simulated test data - n_indiv <- 100 - n_pop <- 5 - indiv_id <- str_c("ind", 1:n_indiv) - indiv_sex <- sample(c("F", "M"), size = n_indiv, replace = TRUE) - indiv_pop <- sample(1:n_pop, size = n_indiv, replace = TRUE) - content <- data.frame( - IND = str_c("ind", 1:n_indiv), - SEX = indiv_sex, - POP = indiv_pop, - A = sample(0:2, size = n_indiv, replace = TRUE), - H = sample(0:1, size = n_indiv, replace = TRUE), - X = ifelse( - indiv_sex == "F", - sample(0:2, size = n_indiv, replace = TRUE), - sample(0:1, size = n_indiv, replace = TRUE) - ), - Y = ifelse( - indiv_sex == "F", - rep(9, n_indiv), - sample(0:1, size = n_indiv, replace = TRUE) - ), - M = sample(0:1, size = n_indiv, replace = TRUE), - stringsAsFactors = FALSE - ) - - col_type <- colnames(content) - - locus_details <- data.frame( - count = rep(1, 5), - type = tail(col_type, 5), - stringsAsFactors = FALSE - ) - - maf <- 0.05 - - # test on simulated data - out <- filter_snp_indseq(content, col_type, locus_details, maf) - - expect_true(is.data.frame(out)) - expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) - - ## test on SNP data file - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - data_path <- file.path(data_dir, data_file) - - # header - col_type <- unname(unlist( - read.table(file = data_path, skip = 1, nrows = 1) - )) - - # locus type - candidate_locus <- c("A", "H", "X", "Y", "M") - locus_encoding <- str_c(header[-(1:3)], collapse = " ") - locus_details <- Reduce("rbind", lapply( - candidate_locus, - function(pttrn) { - count <- str_count(locus_encoding, pttrn) - return(data.frame( - count = count, - type = pttrn, - stringsAsFactors = FALSE - )) - } - )) - - # data - content <- read.table(file = data_path, skip = 2) - - # test on data - out <- filter_snp_indseq(content, col_type, locus_details, maf) - - expect_true(is.data.frame(out)) - expect_true(all(colnames(out) %in% c("type", "mono", "count", "filter", "issue"))) - - -}) - -test_that("check_poolseq_snp_data_file", { - - data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(example_dir(), - "PoolSeq_SNP_estim_param") - out <- check_poolseq_snp_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_true(out$valid) - - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - out <- check_poolseq_snp_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_false(out$valid) -}) - -test_that("check_mss_data_file", { - - data_file <- "mss_example_001.mss" - data_dir <- data4test_dir("mss") - out <- check_mss_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_true(out$valid) - - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - out <- check_mss_data_file( - data_file, data_dir, expected_data_file = NULL - ) - expect_false(out$valid) -}) - -test_that("check_file_name", { - expect_true(check_file_name(system.file("DESCRIPTION", - package = "diyabcGUI"))) - expect_false(check_file_name(5)) - expect_false(check_file_name(file.path(fs::path_home(), - "not_existing_file"))) -}) From ea4d8f4f917e7acafbdf0eebcf18a26416c2ef0f Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 18:16:37 +0200 Subject: [PATCH 76/82] fonction to format data info to be printed in the GUI --- R-pkg/R/62_analysis_project_io.R | 137 +++++++++++++++++- .../testthat/test-62_analysis_project_io.R | 27 ++++ 2 files changed, 163 insertions(+), 1 deletion(-) diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index 8adcdf46..5fedd85b 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -335,4 +335,139 @@ check_proj_file <- function(proj_dir, locus_type = "snp") { # output return(out) -} \ No newline at end of file +} + +#' Format data info for user output +#' @keywords internal +#' @author Ghislain Durif +format_data_info <- function(data_check, locus_type, seq_mode) { + + out <- NULL + + ## microsat/sequence + if(locus_type == "mss") { + out <- tagList( + tags$ul( + tags$li( + "Data file:", tags$code(data_check$data_file) + ), + tags$li( + "Number of populations =", + as.character(data_check$n_pop) + ), + tags$li( + "Number of individuals =", + as.character(data_check$n_indiv) + ), + tags$li( + "Total number of loci =", as.character(data_check$n_loci), + ", including", + tags$b(as.character(sum(data_check$locus_mode == "M"))), + "microsat locus, and", + tags$b(as.character(sum(data_check$locus_mode == "S"))), + "sequence locus." + ), + tags$li( + "Sex ratio in the dataset:", + tags$code(data_check$sex_ratio) + ) + ) + ) + ## snp locus / indseq + } else if((locus_type == "snp") && (seq_mode == "indseq")) { + out <- tagList( + tags$ul( + tags$li( + "Data file:", tags$code(data_check$data_file) + ), + tags$li( + "Number of population pools =", + as.character(data_check$n_pop) + ), + tags$li( + "Filtering with Minimun Allele Frequency (MAF) =", + as.character(data_check$maf) + ), + tags$li( + "Total number of loci =", + as.character(data_check$n_loci), br(), + do.call( + tags$ul, + lapply( + split( + data_check$locus_count, + seq(nrow(data_check$locus_count)) + ), + function(item) { + return(tags$li( + "Number of ", + tags$code(as.character(item$type)), + "loci =", as.character(item$count), br(), + "Number of excluded loci (with MAF <", + as.character(data_check$maf), ") =", + as.character(item$filter), + ", including", as.character(item$mono), + "monomorphic loci.", br(), + tags$b( + "Number of", + tags$code(as.character(item$type)), + "available of loci =", + as.character( + item$count - item$filter + ) + ) + )) + } + ) + ) + ), + tags$li( + "Sex ratio in the dataset:", + tags$code(data_check$sex_ratio) + ) + ) + ) + ## snp locus / poolseq + } else if((locus_type == "snp") && (seq_mode == "poolseq")) { + out <- tagList( + tags$ul( + tags$li( + "Data file:", tags$code(data_check$data_file) + ), + tags$li( + "Number of population pools =", + as.character(data_check$n_pop) + ), + tags$li( + "Filtering with Minimum Read Count (MRC) =", + as.character(data_check$mrc) + ), + tags$li( + "Total number of loci =", as.character(data_check$n_loci), + ), + tags$li( + "Number of excluded loci (with MRC <", + as.character(data_check$n_loci), ") =", + as.character(data_check$locus_count$filter), + ", including", as.character(data_check$locus_count$mono), + "monomorphic loci." + ), + tags$li( + tags$b( + "Total number available of loci =", + as.character( + data_check$n_loci - data_check$locus_count$filter + ) + ) + ), + tags$li( + "Sex ratio in the dataset:", + tags$code(data_check$sex_ratio) + ) + ) + ) + } + + ## output + return(out) +} diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R index f61c1b96..45a51e26 100644 --- a/R-pkg/tests/testthat/test-62_analysis_project_io.R +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -299,3 +299,30 @@ test_that("check_proj_file", { expect_null(res$reftable_check) expect_null(res$statobs_check) }) + +test_that("check_data_file", { + # snp indseq + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(example_dir(), + "IndSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "indseq" + data_check <- check_data_file(data_file, data_dir, locus_type, seq_mode) + res <- format_data_info(data_check, locus_type, seq_mode) + + # snp poolseq + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "poolseq" + data_check <- check_data_file(data_file, data_dir, locus_type, seq_mode) + res <- format_data_info(data_check, locus_type, seq_mode) + + # mss + data_file <- "mss_example_001.mss" + data_dir <- data4test_dir("mss") + locus_type <- "mss" + seq_mode <- NULL + data_check <- check_data_file(data_file, data_dir, locus_type, seq_mode) + res <- format_data_info(data_check, locus_type, seq_mode) +}) From 6222eb63a5b0321d4a1e726cfe708662eef05806 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 18:19:46 +0200 Subject: [PATCH 77/82] move data check function to another file + minor modification in data check --- R-pkg/R/44_data_check.R | 28 ------------------ R-pkg/R/62_analysis_project_io.R | 29 +++++++++++++++++++ R-pkg/tests/testthat/test-44_data_check.R | 29 ------------------- .../testthat/test-62_analysis_project_io.R | 27 +++++++++++++++++ 4 files changed, 56 insertions(+), 57 deletions(-) delete mode 100644 R-pkg/R/44_data_check.R delete mode 100644 R-pkg/tests/testthat/test-44_data_check.R diff --git a/R-pkg/R/44_data_check.R b/R-pkg/R/44_data_check.R deleted file mode 100644 index 39a00c1a..00000000 --- a/R-pkg/R/44_data_check.R +++ /dev/null @@ -1,28 +0,0 @@ -#' Check data file -#' @keywords internal -#' @author Ghislain Durif -#' @param data_file string, data file name. -#' @param data_dir string, path to directory where data file is stored. -#' @param locus_type string, locus type `"mss"` or `"snp"`. -#' @param seq_mode string, `"indseq"` or `"poolseq"`. -check_data_file <- function(data_file, data_dir, locus_type, seq_mode) { - # output - out <- NULL - # ## debugging - # logging("data_file = ", data_file) - ## mss locus - if(locus_type == "mss") { - out <- read_mss_data(data_file, data_dir) - ## snp locus / indseq - } else if((locus_type == "snp") & (seq_mode == "indseq")) { - out <- read_indseq_snp_data(data_file, data_dir) - ## snp locus / poolseq - } else if((locus_type == "snp") & (seq_mode == "poolseq")) { - out <- read_poolseq_snp_data(data_file, data_dir) - } else { - stop("Issue with input arguments") - } - - ## output - return(out) -} \ No newline at end of file diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index 5fedd85b..8283bdfc 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -337,6 +337,35 @@ check_proj_file <- function(proj_dir, locus_type = "snp") { return(out) } +#' Check data file +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @param locus_type string, locus type `"mss"` or `"snp"`. +#' @param seq_mode string, `"indseq"` or `"poolseq"`. +check_data_file <- function(data_file, data_dir, locus_type, seq_mode) { + # output + out <- NULL + # ## debugging + # logging("data_file = ", data_file) + ## mss locus + if(locus_type == "mss") { + out <- read_mss_data(data_file, data_dir) + ## snp locus / indseq + } else if((locus_type == "snp") && (seq_mode == "indseq")) { + out <- read_indseq_snp_data(data_file, data_dir) + ## snp locus / poolseq + } else if((locus_type == "snp") && (seq_mode == "poolseq")) { + out <- read_poolseq_snp_data(data_file, data_dir) + } else { + stop("Issue with input arguments") + } + + ## output + return(out) +} + #' Format data info for user output #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-44_data_check.R b/R-pkg/tests/testthat/test-44_data_check.R deleted file mode 100644 index bf9100b8..00000000 --- a/R-pkg/tests/testthat/test-44_data_check.R +++ /dev/null @@ -1,29 +0,0 @@ -context("44_data_check") - -test_that("check_data_file", { - # snp indseq - data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" - data_dir <- file.path(example_dir(), - "IndSeq_SNP_estim_param") - locus_type <- "snp" - seq_mode <- "indseq" - out <- check_data_file(data_file, data_dir, locus_type, seq_mode) - expect_true(out$valid) - - # snp poolseq - data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" - data_dir <- file.path(example_dir(), - "PoolSeq_SNP_estim_param") - locus_type <- "snp" - seq_mode <- "poolseq" - out <- check_data_file(data_file, data_dir, locus_type, seq_mode) - expect_true(out$valid) - - # mss - data_file <- "mss_example_001.mss" - data_dir <- data4test_dir("mss") - locus_type <- "mss" - seq_mode <- NULL - out <- check_data_file(data_file, data_dir, locus_type, seq_mode) - expect_true(out$valid) -}) diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R index 45a51e26..7de3d785 100644 --- a/R-pkg/tests/testthat/test-62_analysis_project_io.R +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -301,6 +301,33 @@ test_that("check_proj_file", { }) test_that("check_data_file", { + # snp indseq + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "indseq" + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) + + # snp poolseq + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "poolseq" + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) + + # mss + data_file <- "mss_example_001.mss" + data_dir <- data4test_dir("mss") + locus_type <- "mss" + seq_mode <- NULL + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) +}) + + +test_that("format_data_info", { # snp indseq data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" data_dir <- file.path(example_dir(), From 461df44d6e056d98fcb1ae414170d5c2cb9095f1 Mon Sep 17 00:00:00 2001 From: GD Date: Thu, 6 May 2021 18:35:17 +0200 Subject: [PATCH 78/82] Exising and example project reset when changing mode + data check feedback (FIXME: missing req) --- R-pkg/R/61_analysis_module.R | 217 ++++++++++++++++------------------- 1 file changed, 96 insertions(+), 121 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index bb6ab8ed..1984570e 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -102,6 +102,16 @@ analysis_proj_set_server <- function(input, output, session) { ## data type callModule(data_type_server, "data_type", tag = "ap") + + ## reset project when data type change + observeEvent({c(env$ap$locus_type, env$ap$seq_mode)}, { + req(env$ap$proj_dir) + + # clean before upload + clean_proj_dir(env$ap$proj_dir) + # file modification + update_proj_file("ap") + }) ## project type callModule(proj_type_server, "proj_type") @@ -110,7 +120,7 @@ analysis_proj_set_server <- function(input, output, session) { callModule(proj_config_server, "proj_config") ## data file - callModule(data_file_server, "data_file") + # callModule(data_file_server, "data_file") # output$proj_is_ready <- renderUI({ # if(!(out$valid_proj & out$valid_data_file)) { @@ -122,9 +132,6 @@ analysis_proj_set_server <- function(input, output, session) { # } # }) # - - ## output - return(out) } #' Project type setting ui @@ -378,7 +385,8 @@ proj_file_check_ui <- function(id) { proj_file_check_server <- function(input, output, session) { ## file check - observeEvent({c(env$ap$file_modif, env$ap$proj_file_list)}, { + observeEvent({c(env$ap$file_modif, env$ap$proj_file_list, + env$ap$proj_dir, env$ap$locus_type)}, { req(env$ap$proj_dir) req(env$ap$locus_type) # file check @@ -665,9 +673,7 @@ existing_proj_server <- function(input, output, session, }) # reset file upload when another mode is chosen - observe({ - req(local$proj_type) - req(local$proj_type != "existing") + observeEvent({c(local$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { shinyjs::reset("file_input") }) @@ -774,7 +780,7 @@ example_proj_server <- function(input, output, session, }) # update possible input - observeEvent(local$proj_type, { + observeEvent({c(local$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { req(local$proj_type) req(local$proj_type == "example") req(env$ap$locus_type) @@ -884,9 +890,7 @@ data_file_ui <- function(id) { "Loading and checking the data file may take some time." ), uiOutput(ns("input_data")), - check_data_ui(ns("check_data_file")), - hr(), - uiOutput(ns("feedback_data")) + check_data_ui(ns("check_data_file")) ) } @@ -902,6 +906,9 @@ data_file_server <- function(input, output, session) { if(isTruthy(env$ap$header_check$valid) && isTruthy(env$ap$proj_file_list) && (env$ap$header_check$data_file %in% env$ap$proj_file_list)) { + # update data file in env + env$ap$data_file <- env$ap$header_check$data_file + # output helpText( icon("comment"), "Data file was already provided." @@ -910,60 +917,12 @@ data_file_server <- function(input, output, session) { input_data_file_ui(ns("input_data_file")) } }) - # observeEvent({c(env$ap$proj_file_list, env$ap$file_modif)}, { - # - # pprint("toto") - # - # output$input_data <- renderUI({ - # if(isTruthy(env$ap$header_check$valid) && - # isTruth(env$ap$proj_file_list) && - # (env$ap$header_check$data_file %in% env$ap$proj_file_list)) { - # helpText( - # icon("comment"), - # "Data file was already provided." - # ) - # } else { - # input_data_file_ui(ns("input_data_file")) - # } - # }) - # }, ignoreNULL = TRUE) + # input data file (if necessary) callModule(input_data_file_server, "input_data_file") - - - # ## Data file file - # data_file <- callModule( - # input_data_server, "input_data_file", - # proj_dir = reactive(out$proj_dir), - # existing_proj_zip = reactive(local$existing_proj_zip) - # ) - # - # # update local if data file upload - # observe({ - # req(!is.null(data_file$name)) - # local$data_file <- data_file$name - # }) - # - # # data file extracted from existing project zip file - # observe({ - # req(!is.null(local$existing_proj_zip)) - # req(!is.null(local$header_data_file)) - # - # if(local$existing_proj_zip) { - # local$data_file <- local$header_data_file - # } - # }) - # - # ## Data file check - # check_data <- callModule( - # check_data_server, "check_data_file", - # data_file = reactive(local$data_file), - # expected_data_file = reactive(local$header_data_file), - # locus_type = reactive(out$locus_type), - # seq_mode = reactive(out$seq_mode), - # proj_dir = reactive(out$proj_dir) - # ) + # check data + callModule(check_data_server, "check_data_file") } #' Input data file ui @@ -1059,65 +1018,81 @@ check_data_ui <- function(id) { ) } -#' check data server +#' Check data server #' @keywords internal #' @author Ghislain Durif check_data_server <- function(input, output, session) { - - # # data check - # observe({ - # req(!is.null(local$data_file)) - # req(!is.null(local$proj_dir)) - # req(!is.null(local$locus_type)) - # req(!is.null(local$seq_mode)) - # # check - # local$file_check <- check_data_file( - # local$data_file, local$proj_dir, - # local$locus_type, local$seq_mode, - # local$exp_data_file - # ) - # # data info - # req(!is.null(local$file_check)) - # req(!is.null(local$file_check$valid)) - # # valid data - # out$valid <- local$file_check$valid - # # data spec - # req(!is.null(local$file_check$spec)) - # out$info <- local$file_check$spec - # }) - # - # # user feedback - # output$data_info <- renderUI({ - # req(!is.null(local$file_check)) - # # show data info - # if(local$file_check$valid) { - # req(local$file_check$msg) - # helpText( - # icon("comment"), "Data file info", - # do.call( - # tags$ul, - # lapply(local$file_check$msg, function(item) { - # return(tags$li(item)) - # }) - # ) - # ) - # } else { - # tmp_msg <- NULL - # if(!is.null(local$file_check$err)) { - # tmp_msg <- do.call( - # tags$ul, - # lapply(local$file_check$err, function(item) { - # return(tags$li(item)) - # }) - # ) - # } - # helpText( - # icon("warning"), "Issue with data file.", - # tmp_msg - # ) - # } - # }) - - # output - return(out) + + ## data check + observeEvent({c(env$ap$file_modif, env$ap$proj_file_list, + env$ap$data_file)}, { + req(env$ap$proj_dir) + req(env$ap$locus_type) + req(env$ap$seq_mode) + req(env$ap$data_file) + # user feedback regarding wait + output$feedback <- renderUI({ + helpText( + icon("spinner", class = "fa-spin"), "Data check is running." + ) + }) + # data file check + env$ap$data_check <- check_data_file( + env$ap$data_file, env$ap$proj_dir, + env$ap$locus_type, env$ap$seq_mode + ) + # user feedback regarding wait + output$feedback <- renderUI({ + NULL + }) + }) + + ## feedback + output$data_info <- renderUI({ + req(env$ap$locus_type) + req(env$ap$seq_mode) + # data case + tmp_data_case <- NULL + if(env$ap$locus_type == "mss") { + tmp_data_case <- "Microsat/Sequence" + ## snp locus / indseq + } else if((env$ap$locus_type == "snp") && + (env$ap$seq_mode == "indseq")) { + tmp_data_case <- "SNP IndSeq" + ## snp locus / poolseq + } else if((env$ap$locus_type == "snp") && + (env$ap$seq_mode == "poolseq")) { + tmp_data_case <- "SNP PoolSeq" + } + + # output + if(isTruthy(env$ap$data_check)) { + if(isTruthy(env$ap$data_check$valid)) { + format_data_info( + env$ap$data_check, env$ap$locus_type, env$ap$seq_mode + ) + } else if(isTruthy(env$ap$data_check$msg)) { + tags$div( + tags$p( + icon("warning"), + "Issue with your", tags$b(tmp_data_case), "data file:", + do.call( + tags$ul, lapply(env$ap$data_check$msg, tags$li) + ) + ), + style = "color: #F89406;" + ) + } else { + tags$div( + tags$p( + icon("warning"), + "Issue with your", tags$b(tmp_data_case), "data file." + ), + style = "color: #F89406;" + ) + } + } else { + NULL + } + }) } From b130111b2368554341d47dcda0e33fce0dc18cc7 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 7 May 2021 08:41:53 +0200 Subject: [PATCH 79/82] fix typo --- R-pkg/R/21_historical_model.R | 2 +- R-pkg/R/43_data_read.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-pkg/R/21_historical_model.R b/R-pkg/R/21_historical_model.R index 70d4fdbb..cd811cf4 100644 --- a/R-pkg/R/21_historical_model.R +++ b/R-pkg/R/21_historical_model.R @@ -74,7 +74,7 @@ hist_model_server <- function(input, output, session, updateTextAreaInput(session, "scenario", value = local$raw_scenario) }) - # unvalidate if scenario is edited + # invalidate if scenario is edited observeEvent(input$scenario, { local$validated <- FALSE }) diff --git a/R-pkg/R/43_data_read.R b/R-pkg/R/43_data_read.R index 41442ab7..1e970e01 100644 --- a/R-pkg/R/43_data_read.R +++ b/R-pkg/R/43_data_read.R @@ -461,7 +461,7 @@ check_snp_indseq <- function(content, indiv_info, snp_type, locus_count, snp_tab <- Reduce("bind_rows", snp_list) rm("snp_list") - # check for unvalid locus + # check for invalid locus if(any(!snp_tab$valid)) { # missing pop is_missing_pop <- !is.na(snp_tab$missing_pop) From 02befb83a179371c0a18d6fb874354a575c6f634 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 7 May 2021 08:42:14 +0200 Subject: [PATCH 80/82] additional env variable and improve reset --- R-pkg/R/04_environment.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/R-pkg/R/04_environment.R b/R-pkg/R/04_environment.R index bd835334..7407850b 100644 --- a/R-pkg/R/04_environment.R +++ b/R-pkg/R/04_environment.R @@ -35,6 +35,7 @@ init_diyabcrf_env <- function() { proj_dir = NULL, # project directory locus_type = "snp", # "snp" or "mss" seq_mode = "indseq", # "indseq" or "poolseq" + proj_type = "new", # "new", "existing" or "example" proj_file_list = NULL, # content of the project file_modif = NULL, # counter for project file modification # (upload, new header, ...) @@ -112,12 +113,31 @@ init_diyabcrf_env <- function() { list2reactiveValues(tmp_rf, rf) } +#' Project env reset +#' @keywords internal +#' @author Ghislain Durif +reset_ap <- function() { + env$ap$header_check <- NULL + env$ap$reftable_check <- NULL + env$ap$statobs_check <- NULL + env$ap$data_file <- NULL + env$ap$data_check <- NULL +} + #' Reset environment for DIYABC-RF pipeline #' @keywords internal #' @author Ghislain Durif reset_diyabcrf_env <- function() { # proj dir env$ap$proj_dir <- mk_proj_dir("diyabc_rf") + # ap reset + reset_ap() + # reset specific var + env$ap$locus_type <- "snp" + env$ap$seq_mode <- "indseq" + env$ap$proj_type <- "new" + # proj files + update_proj_file("ap") } #' Initialize environment for data generation pipeline From 70f43fedbc3c96a776c3adbc58d3d311d8c3678d Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 7 May 2021 08:45:50 +0200 Subject: [PATCH 81/82] use global env proj type + better job at reseting project (TODO: fix data check) --- R-pkg/R/61_analysis_module.R | 584 ++++++++++++++++++----------------- 1 file changed, 294 insertions(+), 290 deletions(-) diff --git a/R-pkg/R/61_analysis_module.R b/R-pkg/R/61_analysis_module.R index 1984570e..802439d5 100644 --- a/R-pkg/R/61_analysis_module.R +++ b/R-pkg/R/61_analysis_module.R @@ -42,25 +42,25 @@ analysis_module_ui <- function(id) { #' @keywords internal #' @author Ghislain Durif analysis_module_server <- function(input, output, session) { - + ## project setting proj_set <- callModule(analysis_proj_set_server, "proj_set") - + ## Training set sub-module # training_set <- callModule(training_set_server, "train_set") - + ## random forest sub-module # rf <- callModule(rf_module_server, "rf") - + ## action proj_admin <- callModule(proj_admin_server, "proj_admin", tag = "ap") - + ## reset observeEvent(proj_admin$reset, { req(proj_admin$reset) session$reload() }) - + ## clean on exit session$onSessionEnded(function() { isolate(tryCatch(function() { @@ -90,16 +90,18 @@ analysis_proj_set_ui <- function(id) { ) } + + #' Analysis project setting server #' @keywords internal #' @author Ghislain Durif #' @importFrom dplyr distinct #' @importFrom fs file_copy file_delete analysis_proj_set_server <- function(input, output, session) { - + ## project name callModule(proj_name_server, "proj_name", tag = "ap") - + ## data type callModule(data_type_server, "data_type", tag = "ap") @@ -109,10 +111,12 @@ analysis_proj_set_server <- function(input, output, session) { # clean before upload clean_proj_dir(env$ap$proj_dir) + # reset env + reset_ap() # file modification update_proj_file("ap") }) - + ## project type callModule(proj_type_server, "proj_type") @@ -120,8 +124,8 @@ analysis_proj_set_server <- function(input, output, session) { callModule(proj_config_server, "proj_config") ## data file - # callModule(data_file_server, "data_file") - + callModule(data_file_server, "data_file") + # output$proj_is_ready <- renderUI({ # if(!(out$valid_proj & out$valid_data_file)) { # h3(icon("warning"), "Project set up is not ready.", @@ -191,29 +195,23 @@ proj_type_ui <- function(id) { #' @importFrom fs file_copy file_delete proj_type_server <- function(input, output, session) { + ## project type + observeEvent(input$proj_type, { + req(input$proj_type) + env$ap$proj_type <- input$proj_type + }) + ## New project - callModule( - new_proj_server, "new_proj", - proj_type = reactive(input$proj_type) - ) - + callModule(new_proj_server, "new_proj") + ## Existing project - callModule( - existing_proj_server, "existing_proj", - proj_type = reactive(input$proj_type) - ) - + callModule(existing_proj_server, "existing_proj") + ## Example project - callModule( - example_proj_server, "example_proj", - proj_type = reactive(input$proj_type) - ) - + callModule(example_proj_server, "example_proj") + ## File list for existing or example project - callModule( - proj_file_list_server, "proj_file_list", - proj_type = reactive(input$proj_type) - ) + callModule(proj_file_list_server, "proj_file_list") } #' Feedback on project file list ui @@ -231,17 +229,8 @@ proj_file_list_ui <- function(id) { #' Feedback on project file list server #' @keywords internal #' @author Ghislain Durif -#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. -proj_file_list_server <- function(input, output, session, - proj_type = reactive({NULL})) { - # init local - local <- reactiveValues(proj_type = NULL) - - # get input - observe({ - local$proj_type <- proj_type() - }) - +proj_file_list_server <- function(input, output, session) { + # # debugging # observe({ # req(env$ap$file_modif) @@ -251,119 +240,124 @@ proj_file_list_server <- function(input, output, session, # pprint("file list") # pprint(env$ap$proj_file_list) # }) - + # feedback on list of uploaded files observeEvent( - {c(env$ap$file_modif, env$ap$proj_file_list, local$proj_type)}, { - req(local$proj_type %in% c("existing", "example")) - # output - output$feedback_proj_file <- renderUI({ - # default - tag_list <- tags$div( - icon("warning"), "No file was uploaded.", - style = "color: #F89406; margin-top: -15px;" - ) - # else - if(isTruthy(env$ap) && isTruthy(env$ap$proj_file_list)) { - # project files - proj_file_list <- env$ap$proj_file_list - - if(length(proj_file_list) > 0) { - # expected files - expected_files1 <- c("headerRF.txt", "header.txt") - expected_files2 <- c("statobsRF.txt", "reftableRF.bin") - expected_files <- c(expected_files1, expected_files2) - - # important project files that are present - important_files <- expected_files[expected_files %in% - proj_file_list] - - # additional files - additional_files <- proj_file_list[!proj_file_list %in% - important_files] - - # missing files ? - missing_files <- NULL - - missing_header <- !any(expected_files1 %in% proj_file_list) - if(missing_header) { - missing_files <- c(missing_files, "headerRF.txt") - } - - missing_files2 <- !(expected_files2 %in% proj_file_list) - if(any(missing_files2)) { - missing_files <- c(missing_files, - expected_files2[missing_files2]) - } - # project core files - subitem1 <- NULL - if(length(important_files) > 0) { - subitem1 <- tags$div( - do.call(tags$ul, lapply( - important_files, - function(item) return(tags$li(tags$code(item))) - )) - ) - } else { - subitem1 <- tags$b("none") - } - # additional files - subitem2 <- NULL - if(length(additional_files) > 0) { - subitem2 <- tags$div( - do.call(tags$ul, lapply( - additional_files, - function(item) return(tags$li(tags$code(item))) - )) - ) - } else { - subitem2 <- tags$b("none") - } - item1 <- helpText( - h5(icon("comment"), tags$b("Uploaded files")), - fluidRow( - column( - width = 6, - tagList( - tags$p("Project core files:", subitem1) - ) - ), - column( - width = 6, - tagList( - tags$p("Additional files:", subitem2) + {c(env$ap$file_modif, env$ap$proj_file_list, env$ap$proj_type)}, + { + req(env$ap$proj_type %in% c("existing", "example")) + # output + output$feedback_proj_file <- renderUI({ + # default + tag_list <- tags$div( + icon("warning"), "No file was uploaded.", + style = "color: #F89406; margin-top: -15px;" + ) + # else + if(isTruthy(env$ap) && isTruthy(env$ap$proj_file_list)) { + # project files + proj_file_list <- env$ap$proj_file_list + + if(length(proj_file_list) > 0) { + # expected files + expected_files1 <- c("headerRF.txt", "header.txt") + expected_files2 <- c("statobsRF.txt", "reftableRF.bin") + expected_files <- c(expected_files1, expected_files2) + + # important project files that are present + important_files <- expected_files[expected_files %in% + proj_file_list] + + # additional files + additional_files <- proj_file_list[!proj_file_list %in% + important_files] + + # missing files ? + missing_files <- NULL + + missing_header <- !any(expected_files1 %in% + proj_file_list) + if(missing_header) { + missing_files <- c(missing_files, "headerRF.txt") + } + + missing_files2 <- !(expected_files2 %in% proj_file_list) + if(any(missing_files2)) { + missing_files <- c(missing_files, + expected_files2[missing_files2]) + } + # project core files + subitem1 <- NULL + if(length(important_files) > 0) { + subitem1 <- tags$div( + do.call(tags$ul, lapply( + important_files, + function(item) + return(tags$li(tags$code(item))) + )) + ) + } else { + subitem1 <- tags$b("none") + } + # additional files + subitem2 <- NULL + if(length(additional_files) > 0) { + subitem2 <- tags$div( + do.call(tags$ul, lapply( + additional_files, + function(item) + return(tags$li(tags$code(item))) + )) + ) + } else { + subitem2 <- tags$b("none") + } + item1 <- helpText( + h5(icon("comment"), tags$b("Uploaded files")), + fluidRow( + column( + width = 6, + tagList( + tags$p("Project core files:", subitem1) + ) + ), + column( + width = 6, + tagList( + tags$p("Additional files:", subitem2) + ) ) ) ) - ) - # missing files - item2 <- NULL - if(length(missing_files) > 0) { - item2 <- tags$div( - tags$p( - icon("warning"), - "Potentially missing files", - "for an existing project:", - tags$div( - do.call(tags$ul, lapply( - missing_files, - function(item) - return(tags$li(tags$code(item))) - )) + # missing files + item2 <- NULL + if(length(missing_files) > 0) { + item2 <- tags$div( + tags$p( + icon("warning"), + "Potentially missing files", + "for an existing project:", + tags$div( + do.call(tags$ul, lapply( + missing_files, + function(item) + return(tags$li(tags$code(item))) + )) + ), + tags$b("Note:"), + "you will be able to generate them below." ), - tags$b("Note:"), - "you will be able to generate them below." - ), - style = "color: #F89406;" - ) + style = "color: #F89406;" + ) + } + tag_list <- tagList(item1, item2) } - tag_list <- tagList(item1, item2) } - } - # output - tag_list - }) - }) + # output + tag_list + }) + } + ) } #' Project file check ui @@ -383,21 +377,23 @@ proj_file_check_ui <- function(id) { #' @keywords internal #' @author Ghislain Durif proj_file_check_server <- function(input, output, session) { - + ## file check - observeEvent({c(env$ap$file_modif, env$ap$proj_file_list, - env$ap$proj_dir, env$ap$locus_type)}, { - req(env$ap$proj_dir) - req(env$ap$locus_type) - # file check - file_check <- check_proj_file( - env$ap$proj_dir, env$ap$locus_type - ) - # update env - env$ap$header_check <- file_check$header_check - env$ap$reftable_check <- file_check$reftable_check - env$ap$statobs_check <- file_check$statobs_check - }) + observeEvent( + {c(env$ap$file_modif, env$ap$proj_file_list, + env$ap$proj_dir, env$ap$locus_type)}, + { + req(env$ap$proj_type %in% c("existing", "example")) + req(env$ap$proj_dir) + req(env$ap$locus_type) + # file check + file_check <- check_proj_file(env$ap$proj_dir, env$ap$locus_type) + # update env + env$ap$header_check <- file_check$header_check + env$ap$reftable_check <- file_check$reftable_check + env$ap$statobs_check <- file_check$statobs_check + } + ) ## OUTPUT # global @@ -546,26 +542,17 @@ new_proj_ui <- function(id) { #' New project server #' @keywords internal #' @author Ghislain Durif -#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. -new_proj_server <- function(input, output, session, - proj_type = reactive({NULL})) { - - # init local - local <- reactiveValues(proj_type = NULL) - - # get input - observe({ - local$proj_type <- proj_type() - }) +new_proj_server <- function(input, output, session) { # clean project directory when choosing this mode - observeEvent(local$proj_type, { - req(local$proj_type) - req(local$proj_type == "new") + observeEvent(env$ap$proj_type, { + req(env$ap$proj_type == "new") req(env$ap$proj_dir) # clean before upload clean_proj_dir(env$ap$proj_dir) + # reset env + reset_ap() # file modification update_proj_file("ap") }) @@ -644,62 +631,53 @@ existing_proj_ui <- function(id) { #' Existing project server #' @keywords internal #' @author Ghislain Durif -#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. -existing_proj_server <- function(input, output, session, - proj_type = reactive({NULL})) { - # init local - local <- reactiveValues(proj_type = NULL) - - # get input - observe({ - local$proj_type <- proj_type() - }) - +existing_proj_server <- function(input, output, session) { + # # file_input = data.frame with fields 'name', 'size', 'type', 'datapath' # # debugging # observe({ # pprint("file input") # print(input$file_input) # }) - + # Feedback on file upload observe({ - req(local$proj_type) - req(local$proj_type == "existing") - + req(env$ap$proj_type == "existing") + # feedback on missing file feedbackWarning("file_input", !isTruthy(input$file_input), "Missing file(s).") }) - + # reset file upload when another mode is chosen - observeEvent({c(local$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { + observeEvent({c(env$ap$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { shinyjs::reset("file_input") }) # clean project directory when choosing this mode - observeEvent(local$proj_type, { - req(local$proj_type) - req(local$proj_type == "existing") + observeEvent(env$ap$proj_type, { + req(env$ap$proj_type == "existing") req(env$ap$proj_dir) # clean before upload clean_proj_dir(env$ap$proj_dir) + # reset env + reset_ap() # file modification update_proj_file("ap") }) - + # manage file upload (copy to project directory) observeEvent(input$file_input, { req(input$file_input) req(env$ap$proj_dir) - + # upload input_check <- tryCatch( proj_file_input(input$file_input, env$ap$proj_dir), error = function(e) return(NULL) ) - + # feedback output$feedback_existing <- renderUI({ if(is.null(input_check) || !input_check$valid) { @@ -721,14 +699,16 @@ existing_proj_server <- function(input, output, session, NULL } }) - + # update project file list and check files if(!is.null(input_check) && isTruthy(input_check$valid)) { # file modification update_proj_file("ap") } else { - # clean after failed upload + # clean before upload clean_proj_dir(env$ap$proj_dir) + # reset env + reset_ap() # file modification update_proj_file("ap") } @@ -755,36 +735,26 @@ example_proj_ui <- function(id) { #' Example project server #' @keywords internal #' @author Ghislain Durif -#' @param proj_type character string, `"new"`, `"existing"` or `"example"`. -example_proj_server <- function(input, output, session, - proj_type = reactive({NULL})) { - - # init local - local <- reactiveValues(proj_type = NULL) - - # get input - observe({ - local$proj_type <- proj_type() - }) +example_proj_server <- function(input, output, session) { # clean project directory when choosing this mode - observeEvent(local$proj_type, { - req(local$proj_type) - req(local$proj_type == "example") + observeEvent(env$ap$proj_type, { + req(env$ap$proj_type == "example") req(env$ap$proj_dir) # clean before upload clean_proj_dir(env$ap$proj_dir) + # reset env + reset_ap() # file modification update_proj_file("ap") }) - + # update possible input - observeEvent({c(local$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { - req(local$proj_type) - req(local$proj_type == "example") + observeEvent({c(env$ap$proj_type, env$ap$locus_type, env$ap$seq_mode)}, { + req(env$ap$proj_type == "example") req(env$ap$locus_type) - + ## MSS if(env$ap$locus_type == "mss") { updateSelectInput( @@ -793,7 +763,7 @@ example_proj_server <- function(input, output, session, choices = c("", "Not available at the moment"), selected = NULL ) - ## SNP + ## SNP } else if(env$ap$locus_type == "snp") { req(env$ap$seq_mode) ## IndSeq @@ -811,7 +781,7 @@ example_proj_server <- function(input, output, session, choices = c("", possible_choices), selected = NULL ) - ## PoolSeq + ## PoolSeq } else if(env$ap$seq_mode == "poolseq") { possible_choices <- basename( list.dirs( @@ -829,7 +799,7 @@ example_proj_server <- function(input, output, session, } } }) - + # manage file upload (copy to project directory) observeEvent(input$proj_example, { req(input$proj_example) @@ -837,21 +807,11 @@ example_proj_server <- function(input, output, session, # copy files proj_files <- list.files( - file.path( - example_dir(), - input$proj_example - ) + file.path(example_dir(), input$proj_example) ) fs::file_copy( - path = file.path( - example_dir(), - input$proj_example, - proj_files - ), - new_path = file.path( - env$ap$proj_dir, - proj_files - ), + path = file.path(example_dir(), input$proj_example, proj_files), + new_path = file.path(env$ap$proj_dir, proj_files), overwrite = TRUE ) @@ -885,11 +845,8 @@ data_file_ui <- function(id) { ns <- NS(id) tagList( h3("Data file"), - helpText( - icon("clock"), - "Loading and checking the data file may take some time." - ), uiOutput(ns("input_data")), + br(), check_data_ui(ns("check_data_file")) ) } @@ -953,7 +910,7 @@ input_data_file_server <- function(input, output, session) { feedbackWarning("data_file", !isTruthy(input$data_file), "Missing data file.") }) - + ## get data file observeEvent(input$data_file, { # input$data_file = data.frame with 4 columns: @@ -971,7 +928,7 @@ input_data_file_server <- function(input, output, session) { fs::file_copy(input$data_file$datapath, file.path(local$proj_dir, out$name), overwrite = TRUE) - + if(file.exists(input$data_file$datapath)) { # logging("deleting:", input$data_file$datapath) fs::file_delete(input$data_file$datapath) @@ -1002,9 +959,6 @@ input_data_file_server <- function(input, output, session) { ) } }) - - ## output - return(out) } #' Check data ui @@ -1013,6 +967,25 @@ input_data_file_server <- function(input, output, session) { check_data_ui <- function(id) { ns <- NS(id) tagList( + helpText( + icon("clock"), + "Checking the data file may take some time." + ), + fluidRow( + column( + width = 4, + actionButton( + ns("check"), + label = "Check data", + icon = icon("check"), + width = '100%' + ) + ), + column( + width = 8, + uiOutput(ns("feedback_check")) + ) + ), uiOutput(ns("feedback")), uiOutput(ns("data_info")) ) @@ -1023,31 +996,60 @@ check_data_ui <- function(id) { #' @author Ghislain Durif check_data_server <- function(input, output, session) { - ## data check - observeEvent({c(env$ap$file_modif, env$ap$proj_file_list, - env$ap$data_file)}, { + # init local + local <- reactiveValues(run = FALSE) + + ## warning + output$feedback_check <- renderUI({ + if(!isTruthy(env$ap$data_check) && !local$run) { + tags$p( + tags$div( + icon("warning"), "Data file was not checked.", + style = "color: #F89406;" + ) + ) + } else { + NULL + } + }) + + ## run data check + observeEvent(input$check, { + req(input$check) + req(!local$run) req(env$ap$proj_dir) req(env$ap$locus_type) req(env$ap$seq_mode) req(env$ap$data_file) - # user feedback regarding wait - output$feedback <- renderUI({ - helpText( - icon("spinner", class = "fa-spin"), "Data check is running." - ) - }) + + # ask to run check + local$run <- TRUE + }) + + ## data check + observeEvent(local$run, { + req(isTruthy(local$run)) + req(env$ap$data_file) + req(env$ap$proj_dir) + req(env$ap$locus_type) + req(env$ap$seq_mode) + + pprint(env$ap$data_file) + pprint(env$ap$proj_dir) + pprint(env$ap$locus_type) + pprint(env$ap$seq_mode) + # data file check env$ap$data_check <- check_data_file( - env$ap$data_file, env$ap$proj_dir, + env$ap$data_file, env$ap$proj_dir, env$ap$locus_type, env$ap$seq_mode ) - # user feedback regarding wait - output$feedback <- renderUI({ - NULL - }) + + # run is over + local$run <- FALSE }) - ## feedback + ## data info output$data_info <- renderUI({ req(env$ap$locus_type) req(env$ap$seq_mode) @@ -1056,43 +1058,45 @@ check_data_server <- function(input, output, session) { if(env$ap$locus_type == "mss") { tmp_data_case <- "Microsat/Sequence" ## snp locus / indseq - } else if((env$ap$locus_type == "snp") && + } else if((env$ap$locus_type == "snp") && (env$ap$seq_mode == "indseq")) { tmp_data_case <- "SNP IndSeq" ## snp locus / poolseq - } else if((env$ap$locus_type == "snp") && + } else if((env$ap$locus_type == "snp") && (env$ap$seq_mode == "poolseq")) { tmp_data_case <- "SNP PoolSeq" } - - # output - if(isTruthy(env$ap$data_check)) { - if(isTruthy(env$ap$data_check$valid)) { - format_data_info( - env$ap$data_check, env$ap$locus_type, env$ap$seq_mode - ) - } else if(isTruthy(env$ap$data_check$msg)) { - tags$div( - tags$p( - icon("warning"), - "Issue with your", tags$b(tmp_data_case), "data file:", - do.call( - tags$ul, lapply(env$ap$data_check$msg, tags$li) - ) - ), - style = "color: #F89406;" - ) - } else { - tags$div( - tags$p( - icon("warning"), - "Issue with your", tags$b(tmp_data_case), "data file." - ), - style = "color: #F89406;" - ) - } - } else { - NULL - } + + # # output + # if(isTruthy(env$ap$data_check)) { + # if(isTruthy(env$ap$data_check$valid)) { + # # format_data_info( + # # env$ap$data_check, env$ap$locus_type, env$ap$seq_mode + # # ) + # NULL + # } else if(isTruthy(env$ap$data_check$msg)) { + # tags$div( + # tags$p( + # icon("warning"), + # "Issue with your", tags$b(tmp_data_case), "data file:", + # do.call( + # tags$ul, lapply(env$ap$data_check$msg, tags$li) + # ) + # ), + # style = "color: #F89406;" + # ) + # } else { + # tags$div( + # tags$p( + # icon("warning"), + # "Issue with your", tags$b(tmp_data_case), "data file." + # ), + # style = "color: #F89406;" + # ) + # } + # } else { + # NULL + # } + NULL }) } From 0cd01d958f0ad398c7442728cbe4ffdd348504d7 Mon Sep 17 00:00:00 2001 From: GD Date: Fri, 7 May 2021 09:07:08 +0200 Subject: [PATCH 82/82] WIP: run data check in background --- R-pkg/DESCRIPTION | 1 + R-pkg/NAMESPACE | 3 ++ R-pkg/R/62_analysis_project_io.R | 25 +++++++++++++++++ .../testthat/test-62_analysis_project_io.R | 28 +++++++++++++++++++ 4 files changed, 57 insertions(+) diff --git a/R-pkg/DESCRIPTION b/R-pkg/DESCRIPTION index 17bf70aa..6f4d93cf 100644 --- a/R-pkg/DESCRIPTION +++ b/R-pkg/DESCRIPTION @@ -55,6 +55,7 @@ VignetteBuilder: knitr Imports: dplyr, fs, + future, ggplot2, lubridate, jsonlite, diff --git a/R-pkg/NAMESPACE b/R-pkg/NAMESPACE index bca99a4a..cca76099 100644 --- a/R-pkg/NAMESPACE +++ b/R-pkg/NAMESPACE @@ -25,6 +25,9 @@ importFrom(fs,file_chmod) importFrom(fs,file_copy) importFrom(fs,file_delete) importFrom(fs,path_home) +importFrom(future,future) +importFrom(future,multisession) +importFrom(future,plan) importFrom(ggplot2,aes) importFrom(ggplot2,aes_string) importFrom(ggplot2,annotate) diff --git a/R-pkg/R/62_analysis_project_io.R b/R-pkg/R/62_analysis_project_io.R index 8283bdfc..88fbdb38 100644 --- a/R-pkg/R/62_analysis_project_io.R +++ b/R-pkg/R/62_analysis_project_io.R @@ -366,6 +366,31 @@ check_data_file <- function(data_file, data_dir, locus_type, seq_mode) { return(out) } +#' Check data file in background +#' @keywords internal +#' @author Ghislain Durif +#' @param data_file string, data file name. +#' @param data_dir string, path to directory where data file is stored. +#' @param locus_type string, locus type `"mss"` or `"snp"`. +#' @param seq_mode string, `"indseq"` or `"poolseq"`. +#' @importFrom future plan multisession future +check_data_file_bg <- function(data_file, data_dir, locus_type, seq_mode) { + + # FIXME + + # setup background run + oplan <- plan(multisession) + on.exit(plan(oplan)) + + # setup future + out <- future({ + check_data_file_bg(data_file, data_dir, locus_type, seq_mode) + }) + + ## output + return(out) +} + #' Format data info for user output #' @keywords internal #' @author Ghislain Durif diff --git a/R-pkg/tests/testthat/test-62_analysis_project_io.R b/R-pkg/tests/testthat/test-62_analysis_project_io.R index 7de3d785..cdaf40bd 100644 --- a/R-pkg/tests/testthat/test-62_analysis_project_io.R +++ b/R-pkg/tests/testthat/test-62_analysis_project_io.R @@ -326,6 +326,34 @@ test_that("check_data_file", { expect_true(out$valid) }) +test_that("check_data_file", { + # FIXME + # snp indseq + data_file <- "indseq_SNP_sim_dataset_4POP_001.snp" + data_dir <- file.path(data4test_dir(), "IndSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "indseq" + out <- check_data_file_bg(data_file, data_dir, locus_type, seq_mode) + expect_false(resolved(out)) + expect_true(out$valid) + + # snp poolseq + data_file <- "poolseq_SNP_sim_dataset_4POP_cov100_001.snp" + data_dir <- file.path(data4test_dir(), "PoolSeq_SNP_estim_param") + locus_type <- "snp" + seq_mode <- "poolseq" + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) + + # mss + data_file <- "mss_example_001.mss" + data_dir <- data4test_dir("mss") + locus_type <- "mss" + seq_mode <- NULL + out <- check_data_file(data_file, data_dir, locus_type, seq_mode) + expect_true(out$valid) +}) + test_that("format_data_info", { # snp indseq