diff --git a/DESCRIPTION b/DESCRIPTION
index dd0c263..c2b4da0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -36,6 +36,7 @@ Suggests:
karyoploteR,
knitr,
mockery,
+ RColorBrewer,
regioneR,
rmarkdown,
roxygen2,
diff --git a/NAMESPACE b/NAMESPACE
index d2ef2fc..bca4b2b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,7 +14,9 @@ export(enquos)
export(expr)
export(miplicorn_example)
export(plot_chromoMap)
+export(plot_haplotypes)
export(plot_karyoploteR)
+export(prep_haplotypes)
export(read)
export(read_file)
export(sym)
diff --git a/R/rainbow_haplotype.R b/R/rainbow_haplotype.R
new file mode 100644
index 0000000..2852bf9
--- /dev/null
+++ b/R/rainbow_haplotype.R
@@ -0,0 +1,205 @@
+#------------------------------------------------
+#' Prepare data for rainbow haplotype plot
+#'
+#' Prepare data for a rainbow haplotype plot. Creates several additional
+#' columns, adding them to the dataset.
+#'
+#' @param data The data to be plotted.
+#' @param sample The column name representing the samples.
+#' @param probe The column name representing the probes.
+#' @param haplotype The column name representing the haplotypes.
+#' @param rel_freq The column name representing relative abundance or frequency
+#' of each haplotype.
+#' @param minPopSize The minimum number of haplotypes required per probe. If
+#' there are less haplotypes, the probe will be ignored.
+#' @param n_colours An integer indicating the number of colors to be used.
+#' @param barHeight The height of the bar used for plotting.
+#' @param ... These dots are for future extensions and must be empty.
+#'
+#' @author Aris Paschalidis ([@arisp99](https://github.com/arisp99)), Nick J.
+#' Hathaway ([@nickjhathaway](https://github.com/nickjhathaway))
+#' @seealso [plot_haplotypes()] for creating the rainbow haplotype plot.
+#'
+#' @export
+prep_haplotypes <- function(data,
+ sample = s_Sample,
+ probe = p_name,
+ haplotype = h_popUID,
+ rel_freq = c_AveragedFrac,
+ minPopSize = 3,
+ n_colours = 11,
+ barHeight = 0.80,
+ ...) {
+ # For each sample, mip, and haplotype, sum over all the data points
+ # Gives us one relative freq value for each haplotype
+ data_sum <- data %>%
+ dplyr::mutate("{{sample}}" := factor({{ sample }})) %>%
+ dplyr::group_by({{ sample }}, {{ probe }}, {{ haplotype }}) %>%
+ dplyr::summarise("{{rel_freq}}" := sum({{ rel_freq }}))
+
+ # Ensure relative column is a frequency. Divide abundance by total abundance
+ data_rel <- data_sum %>%
+ dplyr::group_by({{ sample }}, {{ probe }}) %>%
+ dplyr::mutate(
+ totalAbund = sum({{ rel_freq }}),
+ "{{rel_freq}}" := {{ rel_freq }} / .data$totalAbund
+ )
+
+ # Find the number of haplotypes
+ data_n_hap <- data_rel %>%
+ dplyr::group_by({{ sample }}, {{ probe }}, {{ haplotype }}) %>%
+ dplyr::mutate(s_COI = dplyr::n_distinct({{ haplotype }}))
+
+ # Plotting info
+ data_plot <- data_n_hap %>%
+ dplyr::group_by({{ sample }}, {{ probe }}) %>%
+ dplyr::mutate(
+ relAbundCol_mod = {{ rel_freq }} * barHeight,
+ fracCumSum = cumsum({{ rel_freq }}) - {{ rel_freq }},
+ fracModCumSum = cumsum(.data$relAbundCol_mod) - .data$relAbundCol_mod,
+ fakeFrac = 1 / unique(.data$s_COI),
+ fakeFracMod = .data$fakeFrac * barHeight,
+ fakeFracCumSum = cumsum(.data$fakeFrac) - .data$fakeFrac,
+ fakeFracModCumSum = cumsum(.data$fakeFracMod) - .data$fakeFracMod
+ ) %>%
+ dplyr::ungroup()
+
+ # Determine how many samples have a haplotype
+ # Determine number of haplotypes per probe
+ data_counts <- data_plot %>%
+ dplyr::group_by({{ probe }}, {{ haplotype }}) %>%
+ dplyr::summarise(samp_n = dplyr::n()) %>%
+ dplyr::arrange({{ probe }}, dplyr::desc(.data$samp_n)) %>%
+ dplyr::group_by({{ probe }}) %>%
+ dplyr::mutate(
+ popid = dplyr::row_number(),
+ maxPopid = max(.data$popid)
+ )
+
+ data_join <- data_plot %>%
+ dplyr::left_join(data_counts)
+
+ # Filter based on minimum population size
+ # (number of probes with the haplotype)
+ data_filter <- data_join %>%
+ dplyr::filter(.data$maxPopid >= minPopSize) %>%
+ dplyr::mutate("{{probe}}" := factor({{ probe }}))
+
+ # From here on, we deal with determining the colors of each haplotype
+ # The shading code looks different here... L153 in original code
+ colorsOutput <- n_colours
+ targetNumber <- 0
+ targetToHue <- tibble::tibble(
+ "{{probe}}" := character(),
+ hueMod = double()
+ )
+ tempTarCol <- dplyr::pull(data_filter, {{ probe }})
+
+ for (tarname in levels(tempTarCol)) {
+ targetToHue <- targetToHue %>%
+ tibble::add_row(
+ "{{probe}}" := tarname,
+ hueMod = (targetNumber %% colorsOutput) + 1
+ )
+ targetNumber <- targetNumber + 1
+ }
+
+ data_filter <- data_filter %>%
+ dplyr::group_by({{ probe }}) %>%
+ dplyr::mutate(popidFrac = (.data$popid - 1) / (.data$maxPopid))
+
+ tempTarCol <- dplyr::pull(data_filter, {{ probe }})
+
+ targetToHue <- targetToHue %>%
+ dplyr::mutate("{{probe}}" := factor({{ probe }}, levels = levels(tempTarCol)))
+
+ data_final <- data_filter %>%
+ dplyr::left_join(targetToHue) %>%
+ dplyr::mutate(
+ popidPerc = 100 * .data$popidFrac,
+ popidFracRegColor = round(abs((.data$popidPerc + (.data$hueMod / colorsOutput) * 100) %% 200 - 0.0001) %% 100),
+ popidPercLog = log((.data$popidFrac * 99) + 1, base = 100) * 100,
+ popidFracLogColor = round(abs((.data$popidPercLog + (.data$hueMod / colorsOutput) * 100) %% 200 - 0.0001) %% 100)
+ )
+
+ # Only keep needed columns to not confuse the user
+ dplyr::select(
+ data_final,
+ {{ sample }},
+ {{ probe }},
+ {{ haplotype }},
+ .data$fracModCumSum,
+ .data$relAbundCol_mod,
+ .data$popidFracLogColor
+ )
+}
+
+#------------------------------------------------
+#' Create rainbow haplotype plot
+#'
+#' Create a rainbow haplotype plot. Each row represents a sample and each column
+#' represents a unique probe. The colors in each bar represent the haplotypes
+#' identified for each probe. The height of the bar illustrates the relative
+#' abundance of each haplotype.
+#'
+#' @inheritParams prep_haplotypes
+#'
+#' @author Aris Paschalidis ([@arisp99](https://github.com/arisp99)), Nick J.
+#' Hathaway ([@nickjhathaway](https://github.com/nickjhathaway))
+#' @seealso [prep_haplotypes()] for generating the data used for plotting.
+#'
+#' @export
+plot_haplotypes <- function(data,
+ sample = s_Sample,
+ probe = p_name,
+ haplotype = h_popUID,
+ rel_freq = c_AveragedFrac,
+ n_colours = 11,
+ ...) {
+ # Prepare the data
+ # data <- prep_haplotypes(data)
+
+ # Find unique samples to label the y-axis
+ unique_samples <- data %>%
+ dplyr::ungroup() %>%
+ dplyr::select({{ sample }}) %>%
+ unique() %>%
+ dplyr::arrange({{ sample }}) %>%
+ dplyr::pull()
+
+ # Set up colors
+ if (!requireNamespace("RColorBrewer", quietly = TRUE)) {
+ cli_abort('Package "RColorBrewer" needed to create a colour palette. Please install it.')
+ }
+ colours <- RColorBrewer::brewer.pal(n_colours, "Spectral")
+
+ # Plot
+ ggplot2::ggplot(data) +
+ ggplot2::geom_rect(
+ ggplot2::aes(
+ xmin = as.numeric({{ probe }}) - 0.5,
+ xmax = as.numeric({{ probe }}) + 0.5,
+ ymin = as.numeric({{ sample }}) + .data$fracModCumSum - 0.5,
+ ymax = as.numeric({{ sample }}) + .data$fracModCumSum + .data$relAbundCol_mod - 0.5,
+ fill = .data$popidFracLogColor
+ ),
+ color = "black"
+ ) +
+ ggplot2::scale_fill_gradientn(colours = colours) +
+ # Used for shading
+ # ggplot2::scale_fill_identity() +
+ ggplot2::scale_y_continuous(
+ breaks = seq_along(unique_samples),
+ labels = unique_samples
+ ) +
+ rainbow_theme() +
+ ggplot2::theme(
+ axis.text.x = ggplot2::element_blank(),
+ legend.position = "none"
+ )
+}
+
+# plot_haplotypes <- function(data,
+# colors = RColorBrewer::brewer.pal(11, "Spectral"),
+# ...) {
+# prep_data <- prep_haplotypes(data, ...)
diff --git a/R/utils.R b/R/utils.R
index b87388f..4c8b660 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -59,6 +59,7 @@ theme_rainbow <- function(base_size = 12,
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5),
axis.line.x = ggplot2::element_line(color="black", size = 0.3),
+ axis.ticks.x = ggplot2::element_blank(),
axis.line.y = ggplot2::element_line(color="black", size = 0.3),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
diff --git a/man/plot_haplotypes.Rd b/man/plot_haplotypes.Rd
new file mode 100644
index 0000000..5b6d625
--- /dev/null
+++ b/man/plot_haplotypes.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rainbow_haplotype.R
+\name{plot_haplotypes}
+\alias{plot_haplotypes}
+\title{Create rainbow haplotype plot}
+\usage{
+plot_haplotypes(
+ data,
+ sample = s_Sample,
+ probe = p_name,
+ haplotype = h_popUID,
+ rel_freq = c_AveragedFrac,
+ n_colours = 11,
+ ...
+)
+}
+\arguments{
+\item{data}{The data to be plotted.}
+
+\item{sample}{The column name representing the samples.}
+
+\item{probe}{The column name representing the probes.}
+
+\item{haplotype}{The column name representing the haplotypes.}
+
+\item{rel_freq}{The column name representing relative abundance or frequency
+of each haplotype.}
+
+\item{n_colours}{An integer indicating the number of colors to be used.}
+
+\item{...}{These dots are for future extensions and must be empty.}
+}
+\description{
+Create a rainbow haplotype plot. Each row represents a sample and each column
+represents a unique probe. The colors in each bar represent the haplotypes
+identified for each probe. The height of the bar illustrates the relative
+abundance of each haplotype.
+}
+\seealso{
+\code{\link[=prep_haplotypes]{prep_haplotypes()}} for generating the data used for plotting.
+}
+\author{
+Aris Paschalidis (\href{https://github.com/arisp99}{@arisp99}), Nick J.
+Hathaway (\href{https://github.com/nickjhathaway}{@nickjhathaway})
+}
diff --git a/man/prep_haplotypes.Rd b/man/prep_haplotypes.Rd
new file mode 100644
index 0000000..0c4f6bb
--- /dev/null
+++ b/man/prep_haplotypes.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rainbow_haplotype.R
+\name{prep_haplotypes}
+\alias{prep_haplotypes}
+\title{Prepare data for rainbow haplotype plot}
+\usage{
+prep_haplotypes(
+ data,
+ sample = s_Sample,
+ probe = p_name,
+ haplotype = h_popUID,
+ rel_freq = c_AveragedFrac,
+ minPopSize = 3,
+ n_colours = 11,
+ barHeight = 0.8,
+ ...
+)
+}
+\arguments{
+\item{data}{The data to be plotted.}
+
+\item{sample}{The column name representing the samples.}
+
+\item{probe}{The column name representing the probes.}
+
+\item{haplotype}{The column name representing the haplotypes.}
+
+\item{rel_freq}{The column name representing relative abundance or frequency
+of each haplotype.}
+
+\item{minPopSize}{The minimum number of haplotypes required per probe. If
+there are less haplotypes, the probe will be ignored.}
+
+\item{n_colours}{An integer indicating the number of colors to be used.}
+
+\item{barHeight}{The height of the bar used for plotting.}
+
+\item{...}{These dots are for future extensions and must be empty.}
+}
+\description{
+Prepare data for a rainbow haplotype plot. Creates several additional
+columns, adding them to the dataset.
+}
+\seealso{
+\code{\link[=plot_haplotypes]{plot_haplotypes()}} for creating the rainbow haplotype plot.
+}
+\author{
+Aris Paschalidis (\href{https://github.com/arisp99}{@arisp99}), Nick J.
+Hathaway (\href{https://github.com/nickjhathaway}{@nickjhathaway})
+}
diff --git a/tests/testthat/_snaps/themes/rainbow-theme.svg b/tests/testthat/_snaps/themes/rainbow-theme.svg
index 623b338..c9cd05e 100644
--- a/tests/testthat/_snaps/themes/rainbow-theme.svg
+++ b/tests/testthat/_snaps/themes/rainbow-theme.svg
@@ -43,11 +43,6 @@
-
-
-
-
-
1.0
1.5
2.0
diff --git a/tests/testthat/_snaps/themes/theme-rainbow.svg b/tests/testthat/_snaps/themes/theme-rainbow.svg
index cae2ad9..0e35e8b 100644
--- a/tests/testthat/_snaps/themes/theme-rainbow.svg
+++ b/tests/testthat/_snaps/themes/theme-rainbow.svg
@@ -43,11 +43,6 @@
-
-
-
-
-
1.0
1.5
2.0