Skip to content

Commit 8bfb278

Browse files
authored
Merge pull request #47 from Wedge-Oxford/dev
Merging Battenberg v2.2.9
2 parents 89323a2 + 1bdf005 commit 8bfb278

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1188
-727
lines changed

DESCRIPTION

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Maintainer: Stefan Dentro <[email protected]>
33
License: GPL-3
44
Type: Package
55
Title: Battenberg subclonal copy number caller
6-
Version: 2.2.8
6+
Version: 2.2.9
77
Authors@R: c(person("David", "Wedge", role=c("aut"), email="[email protected]"),
88
person("Peter", "Van Loo", role=c("aut")),
99
person("Stefan","Dentro", email="[email protected]", role=c("aut", "cre")),
@@ -19,17 +19,17 @@ Depends:
1919
grDevices
2020
Imports:
2121
RColorBrewer,
22-
ASCAT (>= 2.5),
22+
ASCAT (>= 2.5.1),
2323
ggplot2,
2424
readr,
2525
gtools,
2626
gridExtra,
2727
doParallel,
2828
parallel,
29-
foreach
29+
foreach,
30+
splines
3031
URL: https://github.com/Wedge-Oxford/battenberg
3132
LazyLoad: yes
32-
License: file LICENSE
3333
Suggests:
3434
testthat
35-
RoxygenNote: 6.0.1
35+
RoxygenNote: 7.1.0

Dockerfile

100644100755
Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
1-
#FROM r-base
21
FROM ubuntu:16.04
32

3+
USER root
4+
45
# Add dependencies
56
RUN apt-get update && apt-get install -y libxml2 libxml2-dev libcurl4-gnutls-dev r-cran-rgl git libssl-dev curl
67

78
RUN mkdir /tmp/downloads
89

9-
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.2.1.tar.gz && \
10+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.7.tar.gz && \
1011
mkdir /tmp/downloads/htslib && \
1112
tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \
1213
make -C /tmp/downloads/htslib && \
1314
rm -f /tmp/downloads/tmp.tar.gz
1415

1516
ENV HTSLIB /tmp/downloads/htslib
1617

17-
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v2.1.2.tar.gz && \
18+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v4.0.0.tar.gz && \
1819
mkdir /tmp/downloads/alleleCount && \
1920
tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \
2021
cd /tmp/downloads/alleleCount/c && \
@@ -30,21 +31,34 @@ RUN curl -sSL -o tmp.tar.gz --retry 10 https://mathgen.stats.ox.ac.uk/impute/imp
3031
cp /tmp/downloads/impute2/impute2 /usr/local/bin && \
3132
rm -rf /tmp/downloads/impute2 /tmp/downloads/tmp.tar.gz
3233

33-
RUN R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("devtools","RColorBrewer","ggplot2","gridExtra","readr","doParallel","foreach"))'
34+
RUN R -q -e 'source("http://bioconductor.org/biocLite.R"); biocLite(c("gtools", "optparse", "devtools","RColorBrewer","ggplot2","gridExtra","readr","doParallel","foreach", "splines"))'
3435
RUN R -q -e 'devtools::install_github("Crick-CancerGenomics/ascat/ASCAT")'
35-
RUN R -q -e 'devtools::install_github("Wedge-Oxford/battenberg")'
3636

37-
RUN curl -sSL -o battenberg_wgs.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_wgs.R
37+
RUN mkdir -p /opt/battenberg
38+
COPY . /opt/battenberg/
39+
RUN R -q -e 'install.packages("/opt/battenberg", repos=NULL, type="source")'
40+
3841
# modify paths to reference files
39-
RUN cat battenberg_wgs.R | \
42+
RUN cat /opt/battenberg/inst/example/battenberg_wgs.R | \
4043
sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
4144
sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
4245
sed 's|G1000PREFIX_AC = \".*|G1000PREFIX_AC = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesloci2012_chr\"|' | \
4346
sed 's|GCCORRECTPREFIX = \".*|GCCORRECTPREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_gcContent/1000_genomes_GC_corr_chr_\"|' | \
44-
sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci_270415.txt.gz\"|' > /usr/local/bin/battenberg_wgs.R && rm -f battenberg_wgs.R
47+
sed 's|PROBLEMLOCI = \".*|PROBLEMLOCI = \"/opt/battenberg_reference/battenberg_problem_loci/probloci_270415.txt.gz\"|' | \
48+
sed 's|REPLICCORRECTPREFIX = \".*|REPLICCORRECTPREFIX = \"/opt/battenberg_reference/battenberg_wgs_replic_correction_1000g_v3/1000_genomes_replication_timing_chr_\"|' > /usr/local/bin/battenberg_wgs.R
49+
50+
RUN cp /opt/battenberg/inst/example/filter_sv_brass.R /usr/local/bin/filter_sv_brass.R
51+
RUN cp /opt/battenberg/inst/example/battenberg_cleanup.sh /usr/local/bin/battenberg_cleanup.sh
4552

46-
#RUN curl -sSL -o battenberg_snp6.R https://raw.githubusercontent.com/Wedge-Oxford/battenberg/master/inst/example/battenberg_snp6.R
47-
#RUN cat battenberg_snp6.R | \
53+
#RUN cat /opt/battenberg/inst/example/battenberg_snp6.R | \
4854
# sed 's|IMPUTEINFOFILE = \".*|IMPUTEINFOFILE = \"/opt/battenberg_reference/1000genomes_2012_v3_impute/impute_info.txt\"|' | \
4955
# sed 's|G1000PREFIX = \".*|G1000PREFIX = \"/opt/battenberg_reference/1000genomes_2012_v3_loci/1000genomesAlleles2012_chr\"|' | \
50-
# sed 's|SNP6_REF_INFO_FILE = \".*|SNP6_REF_INFO_FILE = \"/opt/battenberg_reference/battenberg_snp6/snp6_ref_info_file.txt\"|' > /usr/local/bin/battenberg_snp6.R && rm -f battenberg_wgs.R
56+
# sed 's|SNP6_REF_INFO_FILE = \".*|SNP6_REF_INFO_FILE = \"/opt/battenberg_reference/battenberg_snp6/snp6_ref_info_file.txt\"|' > /usr/local/bin/battenberg_snp6.R
57+
58+
## USER CONFIGURATION
59+
RUN adduser --disabled-password --gecos '' ubuntu && chsh -s /bin/bash && mkdir -p /home/ubuntu
60+
61+
USER ubuntu
62+
WORKDIR /home/ubuntu
63+
64+
CMD ["/bin/bash"]

NAMESPACE

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export(plot.haplotype.data)
3+
export()
44
export(GetChromosomeBAFs)
55
export(GetChromosomeBAFs_SNP6)
66
export(allele_ratio_plot)
77
export(battenberg)
8-
export(calc_psi_t)
98
export(calc_rho_psi_refit)
109
export(callSubclones)
1110
export(cel2baf.logr)
1211
export(cnfit_to_refit_suggestions)
1312
export(combine.baf.files)
1413
export(combine.impute.output)
1514
export(coverage_plot)
16-
export(find_centroid_of_global_minima)
1715
export(fit.copy.number)
1816
export(gc.correct)
1917
export(gc.correct.wgs)
@@ -25,14 +23,14 @@ export(getBAFsAndLogRs)
2523
export(infer_gender_birdseed)
2624
export(make_posthoc_plots)
2725
export(parse.imputeinfofile)
26+
export(plot.haplotype.data)
2827
export(prepare_snp6)
2928
export(prepare_wgs)
3029
export(read_table_generic)
3130
export(run.impute)
32-
export(runASCAT)
33-
export(run_clonal_ASCAT)
3431
export(run_haplotyping)
3532
export(segment.baf.phased)
33+
export(segment.baf.phased.legacy)
3634
export(segment.baf.phased.sv)
3735
export(squaresplot)
3836
export(suggest_refit)
@@ -55,4 +53,7 @@ importFrom(gridExtra,grid.arrange)
5553
importFrom(gtools,mixedsort)
5654
importFrom(parallel,makeCluster)
5755
importFrom(parallel,stopCluster)
56+
importFrom(readr,cols)
5857
importFrom(readr,read_table)
58+
importFrom(readr,write_tsv)
59+
importFrom(splines,ns)

R/Battenberg-package.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#' @import stats graphics grDevices utils ggplot2
22
#' @importFrom RColorBrewer brewer.pal
3-
#' @importFrom readr read_table
3+
#' @importFrom readr read_table write_tsv cols
44
#' @importFrom gridExtra grid.arrange arrangeGrob
55
#' @importFrom ASCAT make_segments ascat.plotSunrise ascat.plotAscatProfile ascat.plotNonRounded
66
#' @importFrom gtools mixedsort
77
#' @importFrom parallel makeCluster stopCluster
88
#' @importFrom doParallel registerDoParallel
99
#' @importFrom foreach foreach %dopar%
10+
#' @importFrom splines ns
1011
NULL

R/battenberg.R

100644100755
Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
#' @param imputeinfofile Full path to a Battenberg impute info file with pointers to Impute2 reference data
99
#' @param g1000prefix Full prefix path to 1000 Genomes SNP loci data, as part of the Battenberg reference data
1010
#' @param problemloci Full path to a problem loci file that contains SNP loci that should be filtered out
11-
#' @param gccorrectprefix Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
11+
#' @param gccorrectprefix Full prefix path to GC content files, as part of the Battenberg reference data, not required for SNP6 data (Default: NULL)
12+
#' @param repliccorrectprefix Full prefix path to replication timing files, as part of the Battenberg reference data, not required for SNP6 data (Default: NULL)
1213
#' @param g1000allelesprefix Full prefix path to 1000 Genomes SNP alleles data, as part of the Battenberg reference data, not required for SNP6 data (Default: NA)
1314
#' @param ismale A boolean set to TRUE if the donor is male, set to FALSE if female, not required for SNP6 data (Default: NA)
1415
#' @param data_type String that contains either wgs or snp6 depending on the supplied input data (Default: wgs)
@@ -30,7 +31,7 @@
3031
#' @param min_normal_depth Minimum depth required in the matched normal for a SNP to be considered as part of the wgs analysis (Default: 10)
3132
#' @param min_base_qual Minimum base quality required for a read to be counted when allele counting (Default: 20)
3233
#' @param min_map_qual Minimum mapping quality required for a read to be counted when allele counting (Default: 35)
33-
#' @param calc_seg_baf_option Sets way to calculate BAF per segment: 1=mean, 2=median (Default: 1)
34+
#' @param calc_seg_baf_option Sets way to calculate BAF per segment: 1=mean, 2=median, 3=ifelse median==0 | 1, mean, median (Default: 3)
3435
#' @param skip_allele_counting Provide TRUE when allele counting can be skipped (i.e. its already done) (Default: FALSE)
3536
#' @param skip_preprocessing Provide TRUE when preprocessing is already complete (Default: FALSE)
3637
#' @param skip_phasing Provide TRUE when phasing is already complete (Default: FALSE)
@@ -40,35 +41,45 @@
4041
#' @param norm.geno.clust.exe Helper tool for extracting data from CEL files, SNP6 pipeline only (Default: normalize_affy_geno_cluster.pl)
4142
#' @param birdseed_report_file Sex inference output file, SNP6 pipeline only (Default: birdseed.report.txt)
4243
#' @param heterozygousFilter Legacy option to set a heterozygous SNP filter, SNP6 pipeline only (Default: "none")
44+
#' @param prior_breakpoints_file A two column file with prior breakpoints to be used during segmentation (Default: NULL)
4345
#' @author sd11
4446
#' @export
45-
battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci,
46-
gccorrectprefix=NA, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1,
47+
battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file, imputeinfofile, g1000prefix, problemloci, gccorrectprefix=NULL,
48+
repliccorrectprefix=NULL, g1000allelesprefix=NA, ismale=NA, data_type="wgs", impute_exe="impute2", allelecounter_exe="alleleCounter", nthreads=8, platform_gamma=1, phasing_gamma=1,
4749
segmentation_gamma=10, segmentation_kmin=3, phasing_kmin=1, clonality_dist_metric=0, ascat_dist_metric=1, min_ploidy=1.6,
4850
max_ploidy=4.8, min_rho=0.1, min_goodness=0.63, uninformative_BAF_threshold=0.51, min_normal_depth=10, min_base_qual=20,
49-
min_map_qual=35, calc_seg_baf_option=1, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F,
51+
min_map_qual=35, calc_seg_baf_option=3, skip_allele_counting=F, skip_preprocessing=F, skip_phasing=F,
5052
snp6_reference_info_file=NA, apt.probeset.genotype.exe="apt-probeset-genotype", apt.probeset.summarize.exe="apt-probeset-summarize",
51-
norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none") {
53+
norm.geno.clust.exe="normalize_affy_geno_cluster.pl", birdseed_report_file="birdseed.report.txt", heterozygousFilter="none",
54+
prior_breakpoints_file=NULL) {
5255

5356
requireNamespace("foreach")
5457
requireNamespace("doParallel")
5558
requireNamespace("parallel")
5659

5760
if (data_type=="wgs" & is.na(ismale)) {
58-
print("Please provide a boolean denominator whether this sample represents a male donor")
59-
q(save="no", status=1)
61+
stop("Please provide a boolean denominator whether this sample represents a male donor")
6062
}
6163

6264
if (data_type=="wgs" & is.na(g1000allelesprefix)) {
63-
print("Please provide a path to 1000 Genomes allele reference files")
64-
q(save="no", status=1)
65+
stop("Please provide a path to 1000 Genomes allele reference files")
6566
}
6667

67-
if (data_type=="wgs" & is.na(gccorrectprefix)) {
68-
print("Please provide a path to GC content reference files")
69-
q(save="no", status=1)
68+
if (data_type=="wgs" & is.null(gccorrectprefix)) {
69+
stop("Please provide a path to GC content reference files")
7070
}
7171

72+
if (!file.exists(problemloci)) {
73+
stop("Please provide a path to a problematic loci file")
74+
}
75+
76+
if (!file.exists(imputeinfofile)) {
77+
stop("Please provide a path to an impute info file")
78+
}
79+
80+
# check whether the impute_info.txt file contains correct paths
81+
check.imputeinfofile(imputeinfofile, ismale)
82+
7283
if (data_type=="wgs" | data_type=="WGS") {
7384
chrom_names = get.chrom.names(imputeinfofile, ismale)
7485
logr_file = paste(tumourname, "_mutantLogR_gcCorrected.tab", sep="")
@@ -92,7 +103,8 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
92103
normalname=normalname,
93104
g1000allelesprefix=g1000allelesprefix,
94105
g1000prefix=g1000prefix,
95-
gccorrectprefix=gccorrectprefix,
106+
gccorrectprefix=gccorrectprefix,
107+
repliccorrectprefix=repliccorrectprefix,
96108
min_base_qual=min_base_qual,
97109
min_map_qual=min_map_qual,
98110
allelecounter_exe=allelecounter_exe,
@@ -164,6 +176,7 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
164176
segment.baf.phased(samplename=tumourname,
165177
inputfile=paste(tumourname, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
166178
outputfile=paste(tumourname, ".BAFsegmented.txt", sep=""),
179+
prior_breakpoints_file=prior_breakpoints_file,
167180
gamma=segmentation_gamma,
168181
phasegamma=phasing_gamma,
169182
kmin=segmentation_kmin,
@@ -198,7 +211,7 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
198211
output.figures.prefix=paste(tumourname,"_subclones_chr", sep=""),
199212
output.gw.figures.prefix=paste(tumourname,"_BattenbergProfile", sep=""),
200213
masking_output_file=paste(tumourname, "_segment_masking_details.txt", sep=""),
201-
sv_breakpoints_file="NA",
214+
prior_breakpoints_file=prior_breakpoints_file,
202215
chr_names=chrom_names,
203216
gamma=platform_gamma,
204217
segmentation.gamma=NA,
@@ -222,9 +235,5 @@ battenberg = function(tumourname, normalname, tumour_data_file, normal_data_file
222235
rho_psi_file=paste(tumourname, "_rho_and_psi.txt", sep=""),
223236
gamma_param=platform_gamma)
224237

225-
226-
227-
228-
229238
}
230239

R/clonal_ascat.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,8 @@ runASCAT = function(lrr, baf, lrrsegmented, bafsegmented, chromosomes, dist_choi
14231423
psi = NA
14241424
ploidy = NA
14251425
rho = NA
1426+
psi_opt1_plot = -1
1427+
rho_opt1_plot = -1
14261428
}
14271429

14281430
# separated plotting from logic: create distanceplot here
@@ -1634,6 +1636,12 @@ run_clonal_ASCAT = function(lrr, baf, lrrsegmented, bafsegmented, chromosomes, s
16341636

16351637
# Recalculate the psi_t for this rho using only clonal segments
16361638
psi_t = recalc_psi_t(psi_without_ref, rho_without_ref, gamma_param, lrrsegmented, segBAF.table, siglevel_BAF, maxdist_BAF, include_subcl_segments=F)
1639+
1640+
# If there aren't any clonally fit segments, the above yields NA. In this case, revert to the original grid search psi_t
1641+
if (is.na(psi_t)) {
1642+
print("Recalculated psi_t was NA, reverting to grid search solution. This occurs when no segment could be fit with a clonal state, check sample for contamination")
1643+
psi_t = psi_without_ref
1644+
}
16371645

16381646
output_optimum_pair = list(psi = psi_opt1, rho = rho_opt1, ploidy = ploidy_opt1)
16391647
#output_optimum_pair_without_ref = list(psi = psi_without_ref, rho = rho_without_ref, ploidy = ploidy_without_ref)

0 commit comments

Comments
 (0)