diff --git a/DESCRIPTION b/DESCRIPTION index b5c93850..092ef34c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -77,3 +77,4 @@ LinkingTo: NeedsCompilation: yes VignetteBuilder: knitr Config/roxygen2/version: 8.0.0 +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index ced6d96a..ec610e43 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,8 @@ export(coloc_wrapper) export(colocboost_analysis_pipeline) export(compute_LD) export(compute_qtl_enrichment) +export(compute_sldsc_M_ref) +export(compute_sldsc_annot_sd) export(ctwas_bimfile_loader) export(dentist) export(dentist_single_window) @@ -49,6 +51,7 @@ export(glmnet_weights) export(harmonize_gwas) export(harmonize_twas) export(invert_minmax_scaling) +export(is_binary_sldsc_annot) export(l0learn_weights) export(lasso_weights) export(lassosum_rss) @@ -79,6 +82,7 @@ export(match_ref_panel) export(mcp_weights) export(merge_mash_data) export(merge_sumstats_matrices) +export(meta_sldsc_random) export(mr_analysis) export(mr_ash_rss_weights) export(mr_format) @@ -97,6 +101,7 @@ export(prs_cs) export(prs_cs_weights) export(raiss) export(read_afreq) +export(read_sldsc_trait) export(region_to_df) export(rss_analysis_pipeline) export(rss_basic_qc) @@ -104,7 +109,9 @@ export(scad_weights) export(sdpr) export(sdpr_weights) export(slalom) +export(sldsc_postprocessing_pipeline) export(standardise_sumstats_columns) +export(standardize_sldsc_trait) export(summary_stats_qc) export(susie_ash_weights) export(susie_inf_weights) @@ -138,6 +145,7 @@ importFrom(IRanges,start) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) importFrom(coloc,coloc.bf_bf) +importFrom(data.table,fread) importFrom(doFuture,registerDoFuture) importFrom(dplyr,across) importFrom(dplyr,all_of) @@ -198,6 +206,7 @@ importFrom(stats,optim) importFrom(stats,pchisq) importFrom(stats,pnorm) importFrom(stats,predict) +importFrom(stats,qnorm) importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,var) diff --git a/R/sldsc_wrapper.R b/R/sldsc_wrapper.R index b8e17c80..6afb3f05 100644 --- a/R/sldsc_wrapper.R +++ b/R/sldsc_wrapper.R @@ -546,6 +546,11 @@ sldsc_postprocessing_pipeline <- function(trait_single_prefixes, message("[sldsc] Detecting binary vs continuous annotations...") is_binary_full <- is_binary_sldsc_annot(target_anno_dir = target_anno_dir) + # Polyfun renames target columns to `_0` (file_idx=0 in --ref-ld-chr); + # mirror that suffix so intersect() with pivot_run$categories matches. + names(sd_annot_full) <- paste0(names(sd_annot_full), "_0") + names(is_binary_full) <- paste0(names(is_binary_full), "_0") + # Auto-detect target categories from a representative run. if (is.null(target_categories)) { pivot_run <- NULL @@ -564,6 +569,33 @@ sldsc_postprocessing_pipeline <- function(trait_single_prefixes, if (is.null(pivot_run)) stop("sldsc_postprocessing_pipeline: cannot auto-detect target_categories.") target_categories <- intersect(pivot_run$categories, names(sd_annot_full)) + # Fallback when .annot.gz names + "_0" do not match polyfun's .results + # Category. Happens when the pipeline ran with --snp-list, since + # ldsc.py --l2 hardcodes the LD score column to "L2" (single annot) or + # "L2" (joint), instead of preserving the .annot.gz names. + # Trust polyfun's invariant that target categories occupy the first + # length(sd_annot_full) rows of .results, then rename positionally. + if (length(target_categories) == 0L) { + n_target <- length(sd_annot_full) + n_baseline <- length(pivot_run$categories) - n_target + old_names <- names(sd_annot_full) + target_categories <- pivot_run$categories[seq_len(n_target)] + names(sd_annot_full) <- target_categories + names(is_binary_full) <- target_categories + baseline_preview <- if (n_baseline > 0L) + paste(utils::head(pivot_run$categories[-seq_len(n_target)], 3), collapse = ", ") + else "(none)" + message(sprintf(paste0( + "[sldsc] sd_annot/is_binary names did not match polyfun .results categories;\n", + " falling back to positional rename (target = first %d rows of .results)\n", + " target (%d): %s -> %s\n", + " baseline (%d): %s%s"), + n_target, + n_target, paste(old_names, collapse = ", "), + paste(target_categories, collapse = ", "), + n_baseline, baseline_preview, + if (n_baseline > 3L) ", ..." else "")) + } message(sprintf("[sldsc] Auto-detected %d target categories", length(target_categories))) } diff --git a/man/compute_sldsc_M_ref.Rd b/man/compute_sldsc_M_ref.Rd new file mode 100644 index 00000000..a3b45530 --- /dev/null +++ b/man/compute_sldsc_M_ref.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{compute_sldsc_M_ref} +\alias{compute_sldsc_M_ref} +\title{Reference-panel SNP count at a given MAF cutoff} +\usage{ +compute_sldsc_M_ref( + target_anno_dir = NULL, + frqfile_dir = NULL, + plink_name = "ADSP_chr", + maf_cutoff = 0.05 +) +} +\arguments{ +\item{target_anno_dir}{Character or NULL. Directory of `.l2.ldscore` files +produced by polyfun's `compute_ldscores.py`. Required when +`maf_cutoff == 0`.} + +\item{frqfile_dir}{Character or NULL. Directory of PLINK `.frq` files. +Required when `maf_cutoff > 0`.} + +\item{plink_name}{Character. Filename prefix of `.frq` files.} + +\item{maf_cutoff}{Numeric, default `0.05`.} +} +\value{ +Scalar integer. +} +\description{ +Returns the number of SNPs in the reference panel above the MAF + cutoff. When `maf_cutoff > 0`, counts MAF > cutoff entries across all + `.frq` files. When `maf_cutoff == 0`, counts rows of `.l2.ldscore` + files in `target_anno_dir`. +} diff --git a/man/compute_sldsc_annot_sd.Rd b/man/compute_sldsc_annot_sd.Rd new file mode 100644 index 00000000..f39c90f7 --- /dev/null +++ b/man/compute_sldsc_annot_sd.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{compute_sldsc_annot_sd} +\alias{compute_sldsc_annot_sd} +\title{Compute per-annotation standard deviation, MAF-restricted} +\usage{ +compute_sldsc_annot_sd( + target_anno_dir, + frqfile_dir = NULL, + plink_name = "ADSP_chr", + maf_cutoff = 0.05, + annot_cols = NULL +) +} +\arguments{ +\item{target_anno_dir}{Character. Directory containing target annotation files +(one per chromosome) in polyfun's `.annot.gz` format.} + +\item{frqfile_dir}{Character or NULL. Directory containing PLINK `.frq` files +for the reference panel. Required when `maf_cutoff > 0`; the function +errors if missing.} + +\item{plink_name}{Character. Filename prefix of the `.frq` files +(e.g. `"ADSP_chr"`). Files are expected at `{plink_name}{chr}.frq`.} + +\item{maf_cutoff}{Numeric, default `0.05`.} + +\item{annot_cols}{Character or integer vector, default NULL. Annotation columns +to compute sd for. If NULL, all annotation columns are used.} +} +\value{ +Named numeric vector of \eqn{sd_C} values, one per annotation. +} +\description{ +Computes the standard deviation of each annotation column in the + target annotation files, restricted to SNPs above a MAF cutoff via PLINK + `.frq` files. Required for internal consistency with polyfun's regression, + which operates on MAF > cutoff SNPs by default. +} diff --git a/man/is_binary_sldsc_annot.Rd b/man/is_binary_sldsc_annot.Rd new file mode 100644 index 00000000..d45269f0 --- /dev/null +++ b/man/is_binary_sldsc_annot.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{is_binary_sldsc_annot} +\alias{is_binary_sldsc_annot} +\title{Detect whether each annotation is binary or continuous} +\usage{ +is_binary_sldsc_annot(target_anno_dir, annot_cols = NULL) +} +\arguments{ +\item{target_anno_dir}{Character. Directory containing the target `.annot.gz` +files (one per chromosome).} + +\item{annot_cols}{Character or integer vector, default NULL.} +} +\value{ +Named logical vector: TRUE for binary, FALSE for continuous. +} +\description{ +Inspects each annotation column and returns whether its values + lie in \{0, 1\} (binary) or take other values (continuous). +} diff --git a/man/meta_sldsc_random.Rd b/man/meta_sldsc_random.Rd new file mode 100644 index 00000000..542acd02 --- /dev/null +++ b/man/meta_sldsc_random.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{meta_sldsc_random} +\alias{meta_sldsc_random} +\title{Random-effects meta-analysis of S-LDSC quantities across traits} +\usage{ +meta_sldsc_random( + per_trait_estimates, + category, + quantity = c("tau_star", "enrichment", "enrichstat") +) +} +\arguments{ +\item{per_trait_estimates}{Named list of per-trait results (each with a +`summary` data frame).} + +\item{category}{Character. Annotation name to meta-analyze.} + +\item{quantity}{Character: `"tau_star"`, `"enrichment"`, or `"enrichstat"`.} +} +\value{ +List with `mean`, `se`, `p`, `n_traits`, `traits_used`, `tau2`. +} +\description{ +DerSimonian-Laird random-effects meta-analysis of one S-LDSC + quantity for one annotation across multiple traits. +} +\details{ +Per-trait \eqn{SE_i} sources: + - `quantity = "tau_star"`: jackknife SE from per-block \eqn{\tau^*}. + - `quantity = "enrichment"`: polyfun-reported `Enrichment_std_error`. + - `quantity = "enrichstat"`: back-solved SE from polyfun's `Enrichment_p`. +} diff --git a/man/read_sldsc_trait.Rd b/man/read_sldsc_trait.Rd new file mode 100644 index 00000000..0d5e49bb --- /dev/null +++ b/man/read_sldsc_trait.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{read_sldsc_trait} +\alias{read_sldsc_trait} +\title{Read S-LDSC outputs from polyfun for one trait/run} +\usage{ +read_sldsc_trait(prefix) +} +\arguments{ +\item{prefix}{Character. Path prefix to the polyfun outputs for one trait/run. +The function appends `.results`, `.log`, and `.part_delete` to this prefix. +Example: `"/path/to/cwd/CAD_META.filtered.sumstats.gz"`.} +} +\value{ +A named list. See `sldsc_postprocessing_pipeline` for components. +} +\description{ +Reads the regression outputs produced by `polyfun/ldsc.py` for a + single polyfun run (one trait, one annotation set) and returns them as a + tidy list ready for downstream standardization. Hides the underlying file + formats; downstream code consumes only modeling quantities. +} +\examples{ +\dontrun{ +run <- read_sldsc_trait("/output/CAD_META.filtered.sumstats.gz") +run$tau["my_target_annotation"] +} + +} diff --git a/man/sldsc_postprocessing_pipeline.Rd b/man/sldsc_postprocessing_pipeline.Rd new file mode 100644 index 00000000..fd61169b --- /dev/null +++ b/man/sldsc_postprocessing_pipeline.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{sldsc_postprocessing_pipeline} +\alias{sldsc_postprocessing_pipeline} +\title{End-to-end S-LDSC post-processing across traits, single + joint in one pass} +\usage{ +sldsc_postprocessing_pipeline( + trait_single_prefixes, + trait_joint_prefix, + target_anno_dir, + frqfile_dir = NULL, + plink_name = "ADSP_chr", + maf_cutoff = 0.05, + target_categories = NULL +) +} +\arguments{ +\item{trait_single_prefixes}{Named list. For each trait, a character vector +of length \eqn{N} giving the polyfun output prefixes for the \eqn{N} +single-target runs (order must match `target_categories`).} + +\item{trait_joint_prefix}{Named character. For each trait, the polyfun output +prefix for the joint run. Pass `NA` (or `""`) for a trait without a joint run.} + +\item{target_anno_dir}{Character. Directory of target `.annot.gz` files used +for `sd_C` and binary detection (typically the joint-mode dir).} + +\item{frqfile_dir}{Character or NULL.} + +\item{plink_name}{Character. Default `"ADSP_chr"`.} + +\item{maf_cutoff}{Numeric, default `0.05`.} + +\item{target_categories}{Character vector or NULL. Auto-detected from the +first available run if NULL.} +} +\value{ +List with `per_trait`, `meta` (three frames), `params`. +} +\description{ +Top-level orchestration. Reads polyfun outputs (one single-target + run per target plus, when available, one joint run per trait), standardizes + both modes, and runs the default random-effects meta across all traits. +} diff --git a/man/standardize_sldsc_trait.Rd b/man/standardize_sldsc_trait.Rd new file mode 100644 index 00000000..dd66cb20 --- /dev/null +++ b/man/standardize_sldsc_trait.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldsc_wrapper.R +\name{standardize_sldsc_trait} +\alias{standardize_sldsc_trait} +\title{Standardize tau and compute EnrichStat for one polyfun run} +\usage{ +standardize_sldsc_trait( + trait_data, + sd_annot, + M_ref, + target_categories = NULL, + mode = c("single", "joint") +) +} +\arguments{ +\item{trait_data}{List from \code{\link{read_sldsc_trait}}.} + +\item{sd_annot}{Named numeric vector from \code{\link{compute_sldsc_annot_sd}}.} + +\item{M_ref}{Scalar from \code{\link{compute_sldsc_M_ref}}.} + +\item{target_categories}{Character vector or NULL. If NULL, intersects +`trait_data$categories` with `names(sd_annot)`.} + +\item{mode}{Character: `"single"` or `"joint"`.} +} +\value{ +A list with `summary` (data frame), `tau_star_blocks` (matrix), + `h2g`, `n_blocks`, `mode`. +} +\description{ +Applies the Gazal standardization + \eqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g} to the point and + to each jackknife block. For `mode = "single"`, additionally computes + EnrichStat and back-solves its standard error from polyfun's reported + `Enrichment_p` using \eqn{|Z| = \Phi^{-1}(1 - p/2)}. +}