diff --git a/DESCRIPTION b/DESCRIPTION index dd0c263..c2b4da0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,6 +36,7 @@ Suggests: karyoploteR, knitr, mockery, + RColorBrewer, regioneR, rmarkdown, roxygen2, diff --git a/NAMESPACE b/NAMESPACE index d2ef2fc..bca4b2b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,9 @@ export(enquos) export(expr) export(miplicorn_example) export(plot_chromoMap) +export(plot_haplotypes) export(plot_karyoploteR) +export(prep_haplotypes) export(read) export(read_file) export(sym) diff --git a/R/rainbow_haplotype.R b/R/rainbow_haplotype.R new file mode 100644 index 0000000..2852bf9 --- /dev/null +++ b/R/rainbow_haplotype.R @@ -0,0 +1,205 @@ +#------------------------------------------------ +#' Prepare data for rainbow haplotype plot +#' +#' Prepare data for a rainbow haplotype plot. Creates several additional +#' columns, adding them to the dataset. +#' +#' @param data The data to be plotted. +#' @param sample The column name representing the samples. +#' @param probe The column name representing the probes. +#' @param haplotype The column name representing the haplotypes. +#' @param rel_freq The column name representing relative abundance or frequency +#' of each haplotype. +#' @param minPopSize The minimum number of haplotypes required per probe. If +#' there are less haplotypes, the probe will be ignored. +#' @param n_colours An integer indicating the number of colors to be used. +#' @param barHeight The height of the bar used for plotting. +#' @param ... These dots are for future extensions and must be empty. +#' +#' @author Aris Paschalidis ([@arisp99](https://github.com/arisp99)), Nick J. +#' Hathaway ([@nickjhathaway](https://github.com/nickjhathaway)) +#' @seealso [plot_haplotypes()] for creating the rainbow haplotype plot. +#' +#' @export +prep_haplotypes <- function(data, + sample = s_Sample, + probe = p_name, + haplotype = h_popUID, + rel_freq = c_AveragedFrac, + minPopSize = 3, + n_colours = 11, + barHeight = 0.80, + ...) { + # For each sample, mip, and haplotype, sum over all the data points + # Gives us one relative freq value for each haplotype + data_sum <- data %>% + dplyr::mutate("{{sample}}" := factor({{ sample }})) %>% + dplyr::group_by({{ sample }}, {{ probe }}, {{ haplotype }}) %>% + dplyr::summarise("{{rel_freq}}" := sum({{ rel_freq }})) + + # Ensure relative column is a frequency. Divide abundance by total abundance + data_rel <- data_sum %>% + dplyr::group_by({{ sample }}, {{ probe }}) %>% + dplyr::mutate( + totalAbund = sum({{ rel_freq }}), + "{{rel_freq}}" := {{ rel_freq }} / .data$totalAbund + ) + + # Find the number of haplotypes + data_n_hap <- data_rel %>% + dplyr::group_by({{ sample }}, {{ probe }}, {{ haplotype }}) %>% + dplyr::mutate(s_COI = dplyr::n_distinct({{ haplotype }})) + + # Plotting info + data_plot <- data_n_hap %>% + dplyr::group_by({{ sample }}, {{ probe }}) %>% + dplyr::mutate( + relAbundCol_mod = {{ rel_freq }} * barHeight, + fracCumSum = cumsum({{ rel_freq }}) - {{ rel_freq }}, + fracModCumSum = cumsum(.data$relAbundCol_mod) - .data$relAbundCol_mod, + fakeFrac = 1 / unique(.data$s_COI), + fakeFracMod = .data$fakeFrac * barHeight, + fakeFracCumSum = cumsum(.data$fakeFrac) - .data$fakeFrac, + fakeFracModCumSum = cumsum(.data$fakeFracMod) - .data$fakeFracMod + ) %>% + dplyr::ungroup() + + # Determine how many samples have a haplotype + # Determine number of haplotypes per probe + data_counts <- data_plot %>% + dplyr::group_by({{ probe }}, {{ haplotype }}) %>% + dplyr::summarise(samp_n = dplyr::n()) %>% + dplyr::arrange({{ probe }}, dplyr::desc(.data$samp_n)) %>% + dplyr::group_by({{ probe }}) %>% + dplyr::mutate( + popid = dplyr::row_number(), + maxPopid = max(.data$popid) + ) + + data_join <- data_plot %>% + dplyr::left_join(data_counts) + + # Filter based on minimum population size + # (number of probes with the haplotype) + data_filter <- data_join %>% + dplyr::filter(.data$maxPopid >= minPopSize) %>% + dplyr::mutate("{{probe}}" := factor({{ probe }})) + + # From here on, we deal with determining the colors of each haplotype + # The shading code looks different here... L153 in original code + colorsOutput <- n_colours + targetNumber <- 0 + targetToHue <- tibble::tibble( + "{{probe}}" := character(), + hueMod = double() + ) + tempTarCol <- dplyr::pull(data_filter, {{ probe }}) + + for (tarname in levels(tempTarCol)) { + targetToHue <- targetToHue %>% + tibble::add_row( + "{{probe}}" := tarname, + hueMod = (targetNumber %% colorsOutput) + 1 + ) + targetNumber <- targetNumber + 1 + } + + data_filter <- data_filter %>% + dplyr::group_by({{ probe }}) %>% + dplyr::mutate(popidFrac = (.data$popid - 1) / (.data$maxPopid)) + + tempTarCol <- dplyr::pull(data_filter, {{ probe }}) + + targetToHue <- targetToHue %>% + dplyr::mutate("{{probe}}" := factor({{ probe }}, levels = levels(tempTarCol))) + + data_final <- data_filter %>% + dplyr::left_join(targetToHue) %>% + dplyr::mutate( + popidPerc = 100 * .data$popidFrac, + popidFracRegColor = round(abs((.data$popidPerc + (.data$hueMod / colorsOutput) * 100) %% 200 - 0.0001) %% 100), + popidPercLog = log((.data$popidFrac * 99) + 1, base = 100) * 100, + popidFracLogColor = round(abs((.data$popidPercLog + (.data$hueMod / colorsOutput) * 100) %% 200 - 0.0001) %% 100) + ) + + # Only keep needed columns to not confuse the user + dplyr::select( + data_final, + {{ sample }}, + {{ probe }}, + {{ haplotype }}, + .data$fracModCumSum, + .data$relAbundCol_mod, + .data$popidFracLogColor + ) +} + +#------------------------------------------------ +#' Create rainbow haplotype plot +#' +#' Create a rainbow haplotype plot. Each row represents a sample and each column +#' represents a unique probe. The colors in each bar represent the haplotypes +#' identified for each probe. The height of the bar illustrates the relative +#' abundance of each haplotype. +#' +#' @inheritParams prep_haplotypes +#' +#' @author Aris Paschalidis ([@arisp99](https://github.com/arisp99)), Nick J. +#' Hathaway ([@nickjhathaway](https://github.com/nickjhathaway)) +#' @seealso [prep_haplotypes()] for generating the data used for plotting. +#' +#' @export +plot_haplotypes <- function(data, + sample = s_Sample, + probe = p_name, + haplotype = h_popUID, + rel_freq = c_AveragedFrac, + n_colours = 11, + ...) { + # Prepare the data + # data <- prep_haplotypes(data) + + # Find unique samples to label the y-axis + unique_samples <- data %>% + dplyr::ungroup() %>% + dplyr::select({{ sample }}) %>% + unique() %>% + dplyr::arrange({{ sample }}) %>% + dplyr::pull() + + # Set up colors + if (!requireNamespace("RColorBrewer", quietly = TRUE)) { + cli_abort('Package "RColorBrewer" needed to create a colour palette. Please install it.') + } + colours <- RColorBrewer::brewer.pal(n_colours, "Spectral") + + # Plot + ggplot2::ggplot(data) + + ggplot2::geom_rect( + ggplot2::aes( + xmin = as.numeric({{ probe }}) - 0.5, + xmax = as.numeric({{ probe }}) + 0.5, + ymin = as.numeric({{ sample }}) + .data$fracModCumSum - 0.5, + ymax = as.numeric({{ sample }}) + .data$fracModCumSum + .data$relAbundCol_mod - 0.5, + fill = .data$popidFracLogColor + ), + color = "black" + ) + + ggplot2::scale_fill_gradientn(colours = colours) + + # Used for shading + # ggplot2::scale_fill_identity() + + ggplot2::scale_y_continuous( + breaks = seq_along(unique_samples), + labels = unique_samples + ) + + rainbow_theme() + + ggplot2::theme( + axis.text.x = ggplot2::element_blank(), + legend.position = "none" + ) +} + +# plot_haplotypes <- function(data, +# colors = RColorBrewer::brewer.pal(11, "Spectral"), +# ...) { +# prep_data <- prep_haplotypes(data, ...) diff --git a/R/utils.R b/R/utils.R index b87388f..4c8b660 100644 --- a/R/utils.R +++ b/R/utils.R @@ -59,6 +59,7 @@ theme_rainbow <- function(base_size = 12, ggplot2::theme( plot.title = ggplot2::element_text(hjust = 0.5), axis.line.x = ggplot2::element_line(color="black", size = 0.3), + axis.ticks.x = ggplot2::element_blank(), axis.line.y = ggplot2::element_line(color="black", size = 0.3), panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), diff --git a/man/plot_haplotypes.Rd b/man/plot_haplotypes.Rd new file mode 100644 index 0000000..5b6d625 --- /dev/null +++ b/man/plot_haplotypes.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rainbow_haplotype.R +\name{plot_haplotypes} +\alias{plot_haplotypes} +\title{Create rainbow haplotype plot} +\usage{ +plot_haplotypes( + data, + sample = s_Sample, + probe = p_name, + haplotype = h_popUID, + rel_freq = c_AveragedFrac, + n_colours = 11, + ... +) +} +\arguments{ +\item{data}{The data to be plotted.} + +\item{sample}{The column name representing the samples.} + +\item{probe}{The column name representing the probes.} + +\item{haplotype}{The column name representing the haplotypes.} + +\item{rel_freq}{The column name representing relative abundance or frequency +of each haplotype.} + +\item{n_colours}{An integer indicating the number of colors to be used.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\description{ +Create a rainbow haplotype plot. Each row represents a sample and each column +represents a unique probe. The colors in each bar represent the haplotypes +identified for each probe. The height of the bar illustrates the relative +abundance of each haplotype. +} +\seealso{ +\code{\link[=prep_haplotypes]{prep_haplotypes()}} for generating the data used for plotting. +} +\author{ +Aris Paschalidis (\href{https://github.com/arisp99}{@arisp99}), Nick J. +Hathaway (\href{https://github.com/nickjhathaway}{@nickjhathaway}) +} diff --git a/man/prep_haplotypes.Rd b/man/prep_haplotypes.Rd new file mode 100644 index 0000000..0c4f6bb --- /dev/null +++ b/man/prep_haplotypes.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rainbow_haplotype.R +\name{prep_haplotypes} +\alias{prep_haplotypes} +\title{Prepare data for rainbow haplotype plot} +\usage{ +prep_haplotypes( + data, + sample = s_Sample, + probe = p_name, + haplotype = h_popUID, + rel_freq = c_AveragedFrac, + minPopSize = 3, + n_colours = 11, + barHeight = 0.8, + ... +) +} +\arguments{ +\item{data}{The data to be plotted.} + +\item{sample}{The column name representing the samples.} + +\item{probe}{The column name representing the probes.} + +\item{haplotype}{The column name representing the haplotypes.} + +\item{rel_freq}{The column name representing relative abundance or frequency +of each haplotype.} + +\item{minPopSize}{The minimum number of haplotypes required per probe. If +there are less haplotypes, the probe will be ignored.} + +\item{n_colours}{An integer indicating the number of colors to be used.} + +\item{barHeight}{The height of the bar used for plotting.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\description{ +Prepare data for a rainbow haplotype plot. Creates several additional +columns, adding them to the dataset. +} +\seealso{ +\code{\link[=plot_haplotypes]{plot_haplotypes()}} for creating the rainbow haplotype plot. +} +\author{ +Aris Paschalidis (\href{https://github.com/arisp99}{@arisp99}), Nick J. +Hathaway (\href{https://github.com/nickjhathaway}{@nickjhathaway}) +} diff --git a/tests/testthat/_snaps/themes/rainbow-theme.svg b/tests/testthat/_snaps/themes/rainbow-theme.svg index 623b338..c9cd05e 100644 --- a/tests/testthat/_snaps/themes/rainbow-theme.svg +++ b/tests/testthat/_snaps/themes/rainbow-theme.svg @@ -43,11 +43,6 @@ - - - - - 1.0 1.5 2.0 diff --git a/tests/testthat/_snaps/themes/theme-rainbow.svg b/tests/testthat/_snaps/themes/theme-rainbow.svg index cae2ad9..0e35e8b 100644 --- a/tests/testthat/_snaps/themes/theme-rainbow.svg +++ b/tests/testthat/_snaps/themes/theme-rainbow.svg @@ -43,11 +43,6 @@ - - - - - 1.0 1.5 2.0