diff --git a/DESCRIPTION b/DESCRIPTION
index b5c93850..092ef34c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -77,3 +77,4 @@ LinkingTo:
NeedsCompilation: yes
VignetteBuilder: knitr
Config/roxygen2/version: 8.0.0
+RoxygenNote: 7.3.3
diff --git a/NAMESPACE b/NAMESPACE
index ced6d96a..ec610e43 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,6 +20,8 @@ export(coloc_wrapper)
export(colocboost_analysis_pipeline)
export(compute_LD)
export(compute_qtl_enrichment)
+export(compute_sldsc_M_ref)
+export(compute_sldsc_annot_sd)
export(ctwas_bimfile_loader)
export(dentist)
export(dentist_single_window)
@@ -49,6 +51,7 @@ export(glmnet_weights)
export(harmonize_gwas)
export(harmonize_twas)
export(invert_minmax_scaling)
+export(is_binary_sldsc_annot)
export(l0learn_weights)
export(lasso_weights)
export(lassosum_rss)
@@ -79,6 +82,7 @@ export(match_ref_panel)
export(mcp_weights)
export(merge_mash_data)
export(merge_sumstats_matrices)
+export(meta_sldsc_random)
export(mr_analysis)
export(mr_ash_rss_weights)
export(mr_format)
@@ -97,6 +101,7 @@ export(prs_cs)
export(prs_cs_weights)
export(raiss)
export(read_afreq)
+export(read_sldsc_trait)
export(region_to_df)
export(rss_analysis_pipeline)
export(rss_basic_qc)
@@ -104,7 +109,9 @@ export(scad_weights)
export(sdpr)
export(sdpr_weights)
export(slalom)
+export(sldsc_postprocessing_pipeline)
export(standardise_sumstats_columns)
+export(standardize_sldsc_trait)
export(summary_stats_qc)
export(susie_ash_weights)
export(susie_inf_weights)
@@ -138,6 +145,7 @@ importFrom(IRanges,start)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(coloc,coloc.bf_bf)
+importFrom(data.table,fread)
importFrom(doFuture,registerDoFuture)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
@@ -198,6 +206,7 @@ importFrom(stats,optim)
importFrom(stats,pchisq)
importFrom(stats,pnorm)
importFrom(stats,predict)
+importFrom(stats,qnorm)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,var)
diff --git a/R/sldsc_wrapper.R b/R/sldsc_wrapper.R
index b8e17c80..6afb3f05 100644
--- a/R/sldsc_wrapper.R
+++ b/R/sldsc_wrapper.R
@@ -546,6 +546,11 @@ sldsc_postprocessing_pipeline <- function(trait_single_prefixes,
message("[sldsc] Detecting binary vs continuous annotations...")
is_binary_full <- is_binary_sldsc_annot(target_anno_dir = target_anno_dir)
+ # Polyfun renames target columns to `
_0` (file_idx=0 in --ref-ld-chr);
+ # mirror that suffix so intersect() with pivot_run$categories matches.
+ names(sd_annot_full) <- paste0(names(sd_annot_full), "_0")
+ names(is_binary_full) <- paste0(names(is_binary_full), "_0")
+
# Auto-detect target categories from a representative run.
if (is.null(target_categories)) {
pivot_run <- NULL
@@ -564,6 +569,33 @@ sldsc_postprocessing_pipeline <- function(trait_single_prefixes,
if (is.null(pivot_run))
stop("sldsc_postprocessing_pipeline: cannot auto-detect target_categories.")
target_categories <- intersect(pivot_run$categories, names(sd_annot_full))
+ # Fallback when .annot.gz names + "_0" do not match polyfun's .results
+ # Category. Happens when the pipeline ran with --snp-list, since
+ # ldsc.py --l2 hardcodes the LD score column to "L2" (single annot) or
+ # "L2" (joint), instead of preserving the .annot.gz names.
+ # Trust polyfun's invariant that target categories occupy the first
+ # length(sd_annot_full) rows of .results, then rename positionally.
+ if (length(target_categories) == 0L) {
+ n_target <- length(sd_annot_full)
+ n_baseline <- length(pivot_run$categories) - n_target
+ old_names <- names(sd_annot_full)
+ target_categories <- pivot_run$categories[seq_len(n_target)]
+ names(sd_annot_full) <- target_categories
+ names(is_binary_full) <- target_categories
+ baseline_preview <- if (n_baseline > 0L)
+ paste(utils::head(pivot_run$categories[-seq_len(n_target)], 3), collapse = ", ")
+ else "(none)"
+ message(sprintf(paste0(
+ "[sldsc] sd_annot/is_binary names did not match polyfun .results categories;\n",
+ " falling back to positional rename (target = first %d rows of .results)\n",
+ " target (%d): %s -> %s\n",
+ " baseline (%d): %s%s"),
+ n_target,
+ n_target, paste(old_names, collapse = ", "),
+ paste(target_categories, collapse = ", "),
+ n_baseline, baseline_preview,
+ if (n_baseline > 3L) ", ..." else ""))
+ }
message(sprintf("[sldsc] Auto-detected %d target categories", length(target_categories)))
}
diff --git a/man/compute_sldsc_M_ref.Rd b/man/compute_sldsc_M_ref.Rd
new file mode 100644
index 00000000..a3b45530
--- /dev/null
+++ b/man/compute_sldsc_M_ref.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{compute_sldsc_M_ref}
+\alias{compute_sldsc_M_ref}
+\title{Reference-panel SNP count at a given MAF cutoff}
+\usage{
+compute_sldsc_M_ref(
+ target_anno_dir = NULL,
+ frqfile_dir = NULL,
+ plink_name = "ADSP_chr",
+ maf_cutoff = 0.05
+)
+}
+\arguments{
+\item{target_anno_dir}{Character or NULL. Directory of `.l2.ldscore` files
+produced by polyfun's `compute_ldscores.py`. Required when
+`maf_cutoff == 0`.}
+
+\item{frqfile_dir}{Character or NULL. Directory of PLINK `.frq` files.
+Required when `maf_cutoff > 0`.}
+
+\item{plink_name}{Character. Filename prefix of `.frq` files.}
+
+\item{maf_cutoff}{Numeric, default `0.05`.}
+}
+\value{
+Scalar integer.
+}
+\description{
+Returns the number of SNPs in the reference panel above the MAF
+ cutoff. When `maf_cutoff > 0`, counts MAF > cutoff entries across all
+ `.frq` files. When `maf_cutoff == 0`, counts rows of `.l2.ldscore`
+ files in `target_anno_dir`.
+}
diff --git a/man/compute_sldsc_annot_sd.Rd b/man/compute_sldsc_annot_sd.Rd
new file mode 100644
index 00000000..f39c90f7
--- /dev/null
+++ b/man/compute_sldsc_annot_sd.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{compute_sldsc_annot_sd}
+\alias{compute_sldsc_annot_sd}
+\title{Compute per-annotation standard deviation, MAF-restricted}
+\usage{
+compute_sldsc_annot_sd(
+ target_anno_dir,
+ frqfile_dir = NULL,
+ plink_name = "ADSP_chr",
+ maf_cutoff = 0.05,
+ annot_cols = NULL
+)
+}
+\arguments{
+\item{target_anno_dir}{Character. Directory containing target annotation files
+(one per chromosome) in polyfun's `.annot.gz` format.}
+
+\item{frqfile_dir}{Character or NULL. Directory containing PLINK `.frq` files
+for the reference panel. Required when `maf_cutoff > 0`; the function
+errors if missing.}
+
+\item{plink_name}{Character. Filename prefix of the `.frq` files
+(e.g. `"ADSP_chr"`). Files are expected at `{plink_name}{chr}.frq`.}
+
+\item{maf_cutoff}{Numeric, default `0.05`.}
+
+\item{annot_cols}{Character or integer vector, default NULL. Annotation columns
+to compute sd for. If NULL, all annotation columns are used.}
+}
+\value{
+Named numeric vector of \eqn{sd_C} values, one per annotation.
+}
+\description{
+Computes the standard deviation of each annotation column in the
+ target annotation files, restricted to SNPs above a MAF cutoff via PLINK
+ `.frq` files. Required for internal consistency with polyfun's regression,
+ which operates on MAF > cutoff SNPs by default.
+}
diff --git a/man/is_binary_sldsc_annot.Rd b/man/is_binary_sldsc_annot.Rd
new file mode 100644
index 00000000..d45269f0
--- /dev/null
+++ b/man/is_binary_sldsc_annot.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{is_binary_sldsc_annot}
+\alias{is_binary_sldsc_annot}
+\title{Detect whether each annotation is binary or continuous}
+\usage{
+is_binary_sldsc_annot(target_anno_dir, annot_cols = NULL)
+}
+\arguments{
+\item{target_anno_dir}{Character. Directory containing the target `.annot.gz`
+files (one per chromosome).}
+
+\item{annot_cols}{Character or integer vector, default NULL.}
+}
+\value{
+Named logical vector: TRUE for binary, FALSE for continuous.
+}
+\description{
+Inspects each annotation column and returns whether its values
+ lie in \{0, 1\} (binary) or take other values (continuous).
+}
diff --git a/man/meta_sldsc_random.Rd b/man/meta_sldsc_random.Rd
new file mode 100644
index 00000000..542acd02
--- /dev/null
+++ b/man/meta_sldsc_random.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{meta_sldsc_random}
+\alias{meta_sldsc_random}
+\title{Random-effects meta-analysis of S-LDSC quantities across traits}
+\usage{
+meta_sldsc_random(
+ per_trait_estimates,
+ category,
+ quantity = c("tau_star", "enrichment", "enrichstat")
+)
+}
+\arguments{
+\item{per_trait_estimates}{Named list of per-trait results (each with a
+`summary` data frame).}
+
+\item{category}{Character. Annotation name to meta-analyze.}
+
+\item{quantity}{Character: `"tau_star"`, `"enrichment"`, or `"enrichstat"`.}
+}
+\value{
+List with `mean`, `se`, `p`, `n_traits`, `traits_used`, `tau2`.
+}
+\description{
+DerSimonian-Laird random-effects meta-analysis of one S-LDSC
+ quantity for one annotation across multiple traits.
+}
+\details{
+Per-trait \eqn{SE_i} sources:
+ - `quantity = "tau_star"`: jackknife SE from per-block \eqn{\tau^*}.
+ - `quantity = "enrichment"`: polyfun-reported `Enrichment_std_error`.
+ - `quantity = "enrichstat"`: back-solved SE from polyfun's `Enrichment_p`.
+}
diff --git a/man/read_sldsc_trait.Rd b/man/read_sldsc_trait.Rd
new file mode 100644
index 00000000..0d5e49bb
--- /dev/null
+++ b/man/read_sldsc_trait.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{read_sldsc_trait}
+\alias{read_sldsc_trait}
+\title{Read S-LDSC outputs from polyfun for one trait/run}
+\usage{
+read_sldsc_trait(prefix)
+}
+\arguments{
+\item{prefix}{Character. Path prefix to the polyfun outputs for one trait/run.
+The function appends `.results`, `.log`, and `.part_delete` to this prefix.
+Example: `"/path/to/cwd/CAD_META.filtered.sumstats.gz"`.}
+}
+\value{
+A named list. See `sldsc_postprocessing_pipeline` for components.
+}
+\description{
+Reads the regression outputs produced by `polyfun/ldsc.py` for a
+ single polyfun run (one trait, one annotation set) and returns them as a
+ tidy list ready for downstream standardization. Hides the underlying file
+ formats; downstream code consumes only modeling quantities.
+}
+\examples{
+\dontrun{
+run <- read_sldsc_trait("/output/CAD_META.filtered.sumstats.gz")
+run$tau["my_target_annotation"]
+}
+
+}
diff --git a/man/sldsc_postprocessing_pipeline.Rd b/man/sldsc_postprocessing_pipeline.Rd
new file mode 100644
index 00000000..fd61169b
--- /dev/null
+++ b/man/sldsc_postprocessing_pipeline.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{sldsc_postprocessing_pipeline}
+\alias{sldsc_postprocessing_pipeline}
+\title{End-to-end S-LDSC post-processing across traits, single + joint in one pass}
+\usage{
+sldsc_postprocessing_pipeline(
+ trait_single_prefixes,
+ trait_joint_prefix,
+ target_anno_dir,
+ frqfile_dir = NULL,
+ plink_name = "ADSP_chr",
+ maf_cutoff = 0.05,
+ target_categories = NULL
+)
+}
+\arguments{
+\item{trait_single_prefixes}{Named list. For each trait, a character vector
+of length \eqn{N} giving the polyfun output prefixes for the \eqn{N}
+single-target runs (order must match `target_categories`).}
+
+\item{trait_joint_prefix}{Named character. For each trait, the polyfun output
+prefix for the joint run. Pass `NA` (or `""`) for a trait without a joint run.}
+
+\item{target_anno_dir}{Character. Directory of target `.annot.gz` files used
+for `sd_C` and binary detection (typically the joint-mode dir).}
+
+\item{frqfile_dir}{Character or NULL.}
+
+\item{plink_name}{Character. Default `"ADSP_chr"`.}
+
+\item{maf_cutoff}{Numeric, default `0.05`.}
+
+\item{target_categories}{Character vector or NULL. Auto-detected from the
+first available run if NULL.}
+}
+\value{
+List with `per_trait`, `meta` (three frames), `params`.
+}
+\description{
+Top-level orchestration. Reads polyfun outputs (one single-target
+ run per target plus, when available, one joint run per trait), standardizes
+ both modes, and runs the default random-effects meta across all traits.
+}
diff --git a/man/standardize_sldsc_trait.Rd b/man/standardize_sldsc_trait.Rd
new file mode 100644
index 00000000..dd66cb20
--- /dev/null
+++ b/man/standardize_sldsc_trait.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sldsc_wrapper.R
+\name{standardize_sldsc_trait}
+\alias{standardize_sldsc_trait}
+\title{Standardize tau and compute EnrichStat for one polyfun run}
+\usage{
+standardize_sldsc_trait(
+ trait_data,
+ sd_annot,
+ M_ref,
+ target_categories = NULL,
+ mode = c("single", "joint")
+)
+}
+\arguments{
+\item{trait_data}{List from \code{\link{read_sldsc_trait}}.}
+
+\item{sd_annot}{Named numeric vector from \code{\link{compute_sldsc_annot_sd}}.}
+
+\item{M_ref}{Scalar from \code{\link{compute_sldsc_M_ref}}.}
+
+\item{target_categories}{Character vector or NULL. If NULL, intersects
+`trait_data$categories` with `names(sd_annot)`.}
+
+\item{mode}{Character: `"single"` or `"joint"`.}
+}
+\value{
+A list with `summary` (data frame), `tau_star_blocks` (matrix),
+ `h2g`, `n_blocks`, `mode`.
+}
+\description{
+Applies the Gazal standardization
+ \eqn{\tau^*_C = \tau_C \cdot sd_C \cdot M_{ref} / h^2_g} to the point and
+ to each jackknife block. For `mode = "single"`, additionally computes
+ EnrichStat and back-solves its standard error from polyfun's reported
+ `Enrichment_p` using \eqn{|Z| = \Phi^{-1}(1 - p/2)}.
+}