Wedge-lab
diff --git a/‎DESCRIPTION‎
Lines changed: 5 additions & 5 deletions b/‎DESCRIPTION‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎Dockerfile‎
100644100755
Lines changed: 25 additions & 11 deletions b/‎Dockerfile‎
100644100755
Lines changed: 25 additions & 11 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 6 additions & 5 deletions b/‎NAMESPACE‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎R/Battenberg-package.R‎
Lines changed: 2 additions & 1 deletion b/‎R/Battenberg-package.R‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/battenberg.R‎
100644100755
Lines changed: 28 additions & 19 deletions b/‎R/battenberg.R‎
100644100755
Lines changed: 28 additions & 19 deletions
diff --git a/‎R/clonal_ascat.R‎
Lines changed: 8 additions & 0 deletions b/‎R/clonal_ascat.R‎
Lines changed: 8 additions & 0 deletions
@@ -3,7 +3,7 @@ Maintainer: Stefan Dentro <[email protected]>
 License: GPL-3
 Type: Package
 Title: Battenberg subclonal copy number caller
-Version: 2.2.8
+Version: 2.2.9
 Authors@R: c(person("David", "Wedge", role=c("aut"), email="[email protected]"),
     person("Peter", "Van Loo", role=c("aut")),
     person("Stefan","Dentro", email="[email protected]", role=c("aut", "cre")),
@@ -19,17 +19,17 @@ Depends:
   grDevices
 Imports:
   RColorBrewer,
-  ASCAT (>= 2.5),
+  ASCAT (>= 2.5.1),
   ggplot2,
   readr,
   gtools,
   gridExtra,
   doParallel,
   parallel,
-  foreach
+  foreach,
+  splines
 URL: https://github.com/Wedge-Oxford/battenberg
 LazyLoad: yes
-License: file LICENSE
 Suggests:
     testthat
-RoxygenNote: 6.0.1
+RoxygenNote: 7.1.0
@@ -1,20 +1,21 @@
-#FROM r-base
 FROM ubuntu:16.04
 
+USER root
+
 # Add dependencies
 RUN apt-get update && apt-get install -y libxml2 libxml2-dev libcurl4-gnutls-dev r-cran-rgl git libssl-dev curl
 
 RUN mkdir /tmp/downloads
 
-RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.2.1.tar.gz && \
+RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.7.tar.gz && \
     mkdir /tmp/downloads/htslib && \
     tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \
     make -C /tmp/downloads/htslib && \
     rm -f /tmp/downloads/tmp.tar.gz
 
 ENV HTSLIB /tmp/downloads/htslib
 
-RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v2.1.2.tar.gz && \
+RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v4.0.0.tar.gz && \
     mkdir /tmp/downloads/alleleCount && \
     tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \
     cd /tmp/downloads/alleleCount/c && \
@@ -30,21 +31,34 @@ RUN curl -sSL -o tmp.tar.gz --retry 10 https://mathgen.stats.ox.ac.uk/impute/imp
     cp /tmp/downloads/impute2/impute2 /usr/local/bin && \
     rm -rf /tmp/downloads/impute2 /tmp/downloads/tmp.tar.gz
 
-RUN R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("devtools","RColorBrewer","ggplot2","gridExtra","readr","doParallel","foreach"))'
+RUN R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("gtools", "optparse", "devtools","RColorBrewer","ggplot2","gridExtra","readr","doParallel","foreach", "splines"))'
 RUN R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")'
-RUN R -q -e 'devtools::install_github("Wedge-Oxford/battenberg")'
 
-RUN curl -sSL -o battenberg_wgs.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_wgs.R
+RUN mkdir -p /opt/battenberg
+COPY . /opt/battenberg/
+RUN R -q -e 'install.packages("/opt/battenberg", repos=NULL, type="source")'
+
 # modify paths to reference files
-RUN cat battenberg_wgs.R | \
+RUN cat /opt/battenberg/inst/example/battenberg_wgs.R | \
     sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
     sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
     sed 's|G1000PREFIX_AC = \".*|G1000PREFIX_AC = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesloci2012_chr\"|' | \
     sed 's|GCCORRECTPREFIX = \".*|GCCORRECTPREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_gcContent/1000_genomes_GC_corr_chr_\"|' | \
-    sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci_270415.txt.gz\"|' > /usr/local/bin/battenberg_wgs.R && rm -f battenberg_wgs.R
+    sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci_270415.txt.gz\"|' | \
+    sed 's|REPLICCORRECTPREFIX = \".*|REPLICCORRECTPREFIX = \"/opt/battenberg_reference/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_\"|' > /usr/local/bin/battenberg_wgs.R
+
+RUN cp /opt/battenberg/inst/example/filter_sv_brass.R /usr/local/bin/filter_sv_brass.R
+RUN cp /opt/battenberg/inst/example/battenberg_cleanup.sh /usr/local/bin/battenberg_cleanup.sh
 
-#RUN curl -sSL -o battenberg_snp6.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_snp6.R
-#RUN cat battenberg_snp6.R | \
+#RUN cat /opt/battenberg/inst/example/battenberg_snp6.R | \
 #    sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
 #    sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
-#    sed 's|SNP6_REF_INFO_FILE = \".*|SNP6_REF_INFO_FILE = \"/opt/battenberg_reference/battenberg_snp6/snp6_ref_info_file.txt\"|' > /usr/local/bin/battenberg_snp6.R && rm -f battenberg_wgs.R
+#    sed 's|SNP6_REF_INFO_FILE = \".*|SNP6_REF_INFO_FILE = \"/opt/battenberg_reference/battenberg_snp6/snp6_ref_info_file.txt\"|' > /usr/local/bin/battenberg_snp6.R
+
+## USER CONFIGURATION
+RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu
+
+USER    ubuntu
+WORKDIR /home/ubuntu
+
+CMD ["/bin/bash"]
@@ -1,19 +1,17 @@
 # Generated by roxygen2: do not edit by hand
 
-export(plot.haplotype.data)
+export()
 export(GetChromosomeBAFs)
 export(GetChromosomeBAFs_SNP6)
 export(allele_ratio_plot)
 export(battenberg)
-export(calc_psi_t)
 export(calc_rho_psi_refit)
 export(callSubclones)
 export(cel2baf.logr)
 export(cnfit_to_refit_suggestions)
 export(combine.baf.files)
 export(combine.impute.output)
 export(coverage_plot)
-export(find_centroid_of_global_minima)
 export(fit.copy.number)
 export(gc.correct)
 export(gc.correct.wgs)
@@ -25,14 +23,14 @@ export(getBAFsAndLogRs)
 export(infer_gender_birdseed)
 export(make_posthoc_plots)
 export(parse.imputeinfofile)
+export(plot.haplotype.data)
 export(prepare_snp6)
 export(prepare_wgs)
 export(read_table_generic)
 export(run.impute)
-export(runASCAT)
-export(run_clonal_ASCAT)
 export(run_haplotyping)
 export(segment.baf.phased)
+export(segment.baf.phased.legacy)
 export(segment.baf.phased.sv)
 export(squaresplot)
 export(suggest_refit)
@@ -55,4 +53,7 @@ importFrom(gridExtra,grid.arrange)
 importFrom(gtools,mixedsort)
 importFrom(parallel,makeCluster)
 importFrom(parallel,stopCluster)
+importFrom(readr,cols)
 importFrom(readr,read_table)
+importFrom(readr,write_tsv)
+importFrom(splines,ns)
@@ -1,10 +1,11 @@
 #' @import stats graphics grDevices utils ggplot2
 #' @importFrom RColorBrewer brewer.pal
-#' @importFrom readr read_table
+#' @importFrom readr read_table write_tsv cols
 #' @importFrom gridExtra grid.arrange arrangeGrob
 #' @importFrom ASCAT make_segments ascat.plotSunrise ascat.plotAscatProfile ascat.plotNonRounded
 #' @importFrom gtools mixedsort
 #' @importFrom parallel makeCluster stopCluster
 #' @importFrom doParallel registerDoParallel
 #' @importFrom foreach foreach %dopar%
+#' @importFrom splines ns
 NULL
@@ -8,7 +8,8 @@
 #' @param imputeinfofile Full path to a Battenberg impute info file with pointers to Impute2 reference data
 #' @param g1000prefix Full prefix path to 1000 Genomes SNP loci data, as part of the Battenberg reference data
 #' @param problemloci Full path to a problem loci file that contains SNP loci that should be filtered out
-#' @param gccorrectprefix Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
+#' @param gccorrectprefix Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NULL)
+#' @param repliccorrectprefix Full prefix path to replication timing files, as part of the Battenberg reference data, not required for SNP6 data (Default: NULL)
 #' @param g1000allelesprefix Full prefix path to 1000 Genomes SNP alleles data, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
 #' @param ismale A boolean set to TRUE if the donor is male, set to FALSE if female, not required for SNP6 data (Default: NA)
 #' @param data_type String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)
@@ -30,7 +31,7 @@
 #' @param min_normal_depth Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)
 #' @param min_base_qual Minimum base quality required for a read to be counted when allele counting (Default: 20)
 #' @param min_map_qual Minimum mapping quality required for a read to be counted when allele counting (Default: 35)
-#' @param calc_seg_baf_option Sets way to calculate BAF per segment: 1=mean, 2=median (Default: 1)
+#' @param calc_seg_baf_option Sets way to calculate BAF per segment: 1=mean, 2=median, 3=ifelse median==0 | 1, mean, median (Default: 3)
 #' @param skip_allele_counting Provide TRUE when allele counting can be skipped (i.e. its already done) (Default: FALSE)
 #' @param skip_preprocessing Provide TRUE when preprocessing is already complete (Default: FALSE)
 #' @param skip_phasing  Provide TRUE when phasing is already complete (Default: FALSE)
@@ -40,35 +41,45 @@
 #' @param norm.geno.clust.exe  Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: normalize_affy_geno_cluster.pl)
 #' @param birdseed_report_file Sex inference output file, SNP6 pipeline only (Default: birdseed.report.txt)
 #' @param heterozygousFilter Legacy option to set a heterozygous SNP filter, SNP6 pipeline only (Default: "none")
+#' @param prior_breakpoints_file A two column file with prior breakpoints to be used during segmentation (Default: NULL)
 #' @author sd11
 #' @export
-battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, 
-                      gccorrectprefix=NA, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1,
+battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, gccorrectprefix=NULL,
+                      repliccorrectprefix=NULL, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1,
                       segmentation_gamma=10, segmentation_kmin=3, phasing_kmin=1, clonality_dist_metric=0, ascat_dist_metric=1, min_ploidy=1.6,
                       max_ploidy=4.8, min_rho=0.1, min_goodness=0.63, uninformative_BAF_threshold=0.51, min_normal_depth=10, min_base_qual=20, 
-                      min_map_qual=35, calc_seg_baf_option=1, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F,
+                      min_map_qual=35, calc_seg_baf_option=3, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F,
                       snp6_reference_info_file=NA, apt.probeset.genotype.exe="apt-probeset-genotype", apt.probeset.summarize.exe="apt-probeset-summarize", 
-                      norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none") {
+                      norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none",
+                      prior_breakpoints_file=NULL) {
 
   requireNamespace("foreach")
   requireNamespace("doParallel")
   requireNamespace("parallel")
 
   if (data_type=="wgs" & is.na(ismale)) {
-    print("Please provide a boolean denominator whether this sample represents a male donor")
-    q(save="no", status=1)
+    stop("Please provide a boolean denominator whether this sample represents a male donor")
   }
 
   if (data_type=="wgs" & is.na(g1000allelesprefix)) {
-    print("Please provide a path to 1000 Genomes allele reference files")
-    q(save="no", status=1)
+    stop("Please provide a path to 1000 Genomes allele reference files")
   }
 
-  if (data_type=="wgs" & is.na(gccorrectprefix)) {
-    print("Please provide a path to GC content reference files")
-    q(save="no", status=1)
+  if (data_type=="wgs" & is.null(gccorrectprefix)) {
+    stop("Please provide a path to GC content reference files")
   }
 
+  if (!file.exists(problemloci)) {
+       stop("Please provide a path to a problematic loci file")
+  }
+
+  if (!file.exists(imputeinfofile)) {
+	  stop("Please provide a path to an impute info file")
+  }
+
+  # check whether the impute_info.txt file contains correct paths
+  check.imputeinfofile(imputeinfofile, ismale)
+
   if (data_type=="wgs" | data_type=="WGS") {
     chrom_names = get.chrom.names(imputeinfofile, ismale)
     logr_file = paste(tumourname, "_mutantLogR_gcCorrected.tab", sep="")
@@ -92,7 +103,8 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
                   normalname=normalname, 
                   g1000allelesprefix=g1000allelesprefix, 
                   g1000prefix=g1000prefix, 
-                  gccorrectprefix=gccorrectprefix, 
+                  gccorrectprefix=gccorrectprefix,
+                  repliccorrectprefix=repliccorrectprefix,
                   min_base_qual=min_base_qual, 
                   min_map_qual=min_map_qual, 
                   allelecounter_exe=allelecounter_exe, 
@@ -164,6 +176,7 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
   segment.baf.phased(samplename=tumourname,
                      inputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""), 
                      outputfile=paste(tumourname, ".BAFsegmented.txt", sep=""),
+                     prior_breakpoints_file=prior_breakpoints_file,
                      gamma=segmentation_gamma,
                      phasegamma=phasing_gamma,
                      kmin=segmentation_kmin,
@@ -198,7 +211,7 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
                 output.figures.prefix=paste(tumourname,"_subclones_chr", sep=""), 
                 output.gw.figures.prefix=paste(tumourname,"_BattenbergProfile", sep=""),
                 masking_output_file=paste(tumourname, "_segment_masking_details.txt", sep=""),
-                sv_breakpoints_file="NA",
+                prior_breakpoints_file=prior_breakpoints_file,
                 chr_names=chrom_names, 
                 gamma=platform_gamma, 
                 segmentation.gamma=NA, 
@@ -222,9 +235,5 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
                              rho_psi_file=paste(tumourname, "_rho_and_psi.txt", sep=""),
                              gamma_param=platform_gamma)
 
-  
-  
-  
-    
 }
 
@@ -1423,6 +1423,8 @@ runASCAT = function(lrr, baf, lrrsegmented, bafsegmented, chromosomes, dist_choi
     psi = NA
     ploidy = NA
     rho = NA
+    psi_opt1_plot = -1
+    rho_opt1_plot = -1
   }
 
   # separated plotting from logic: create distanceplot here
@@ -1634,6 +1636,12 @@ run_clonal_ASCAT = function(lrr, baf, lrrsegmented, bafsegmented, chromosomes, s
 
   # Recalculate the psi_t for this rho using only clonal segments 
   psi_t = recalc_psi_t(psi_without_ref, rho_without_ref, gamma_param, lrrsegmented, segBAF.table, siglevel_BAF, maxdist_BAF, include_subcl_segments=F)
+
+  # If there aren't any clonally fit segments, the above yields NA. In this case, revert to the original grid search psi_t
+  if (is.na(psi_t)) {
+	  print("Recalculated psi_t was NA, reverting to grid search solution. This occurs when no segment could be fit with a clonal state, check sample for contamination")
+	  psi_t = psi_without_ref
+  }
 
   output_optimum_pair = list(psi = psi_opt1, rho = rho_opt1, ploidy = ploidy_opt1)
   #output_optimum_pair_without_ref = list(psi = psi_without_ref, rho = rho_without_ref, ploidy = ploidy_without_ref)