From d89e3918a10ea4c81de1960b2565fd83aa23cb8a Mon Sep 17 00:00:00 2001 From: al4225 Date: Fri, 8 May 2026 10:46:52 -0400 Subject: [PATCH 1/2] sldsc_enrichment: normalize annot dataframe for ldsc.py --print-snps in snp-list mode When --snp-list is set, Step C invokes polyfun's ldsc.py --l2 --print-snps (rather than compute_ldscores.py). ldsc.py strict-positionally reads cols 0..3 as CHR/BP/SNP/CM and cols 4+ as numeric annotations, and requires the .annot SNP set to equal the .bim SNP set in identical order. Add a small normalize_for_ldsc() helper, applied to both single and joint annot dataframes before fwrite when use_print_snps is true. No-op otherwise. Without this, snp-list-flavored settings (ADSP allm_snplist / m50_snplist, 1000G allm_snplist / m50_snplist) failed Step C with TypeError on 1000G (A1/A2 strings parsed as numeric) or ValueError on ADSP (annot vs bim shape mismatch). Co-Authored-By: Claude Opus 4.7 (1M context) --- code/enrichment/sldsc_enrichment.ipynb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/code/enrichment/sldsc_enrichment.ipynb b/code/enrichment/sldsc_enrichment.ipynb index 1bf98ce85..0fff76a9a 100644 --- a/code/enrichment/sldsc_enrichment.ipynb +++ b/code/enrichment/sldsc_enrichment.ipynb @@ -836,12 +836,36 @@ "\n", " emit_single_local <- ${\"TRUE\" if emit_single else \"FALSE\"}\n", " emit_joint_local <- ${\"TRUE\" if emit_joint else \"FALSE\"}\n", + " use_print_snps_local <- ${\"TRUE\" if use_print_snps else \"FALSE\"}\n", + " bfile_prefix <- \"${_input[-1]:na}\"\n", + "\n", + " # Reshape annot to match .bim panel for ldsc.py --l2 --print-snps\n", + " # (drop A1/A2/MAF, expand to .bim rows filling 0, take CM from .bim).\n", + " normalize_for_ldsc <- function(df) {\n", + " if (!use_print_snps_local) return(df)\n", + " df <- df[, !names(df) %in% c(\"A1\", \"A2\", \"MAF\", \"CM\"), drop = FALSE]\n", + " annot_cols <- setdiff(names(df), c(\"CHR\", \"BP\", \"SNP\"))\n", + " bim <- as.data.frame(fread(paste0(bfile_prefix, \".bim\"), header = FALSE,\n", + " col.names = c(\"CHR\", \"SNP\", \"CM\", \"BP\", \"A1\", \"A2\")))\n", + " bim$CHR <- as.character(bim$CHR); df$CHR <- as.character(df$CHR)\n", + " idx <- match(bim$SNP, df$SNP)\n", + " out <- data.frame(CHR = bim$CHR, BP = bim$BP, SNP = bim$SNP, CM = bim$CM,\n", + " stringsAsFactors = FALSE)\n", + " for (col in annot_cols) {\n", + " v <- rep(0, nrow(bim))\n", + " non_na <- !is.na(idx)\n", + " v[non_na] <- df[[col]][idx[non_na]]\n", + " out[[col]] <- v\n", + " }\n", + " out\n", + " }\n", "\n", " # ---- Write N single-target .annot files (when requested) ----\n", " if (emit_single_local) {\n", " for (i in seq_len(N_local)) {\n", " out_anno <- ref_anno\n", " out_anno$ANNOT <- score_list[[i]]\n", + " out_anno <- normalize_for_ldsc(out_anno)\n", " name <- paste0(\"${annotation_name}\", \"_single_\", i)\n", " out_path_gz <- file.path(\"${cwd:a}\", name, paste0(name, \".${input_chroms[_index]}.annot.gz\"))\n", " out_path_tsv <- sub(\"\\\\.gz$\", \"\", out_path_gz)\n", @@ -856,6 +880,7 @@ " for (i in seq_len(N_local)) {\n", " joint_anno[[paste0(\"ANNOT_\", i)]] <- score_list[[i]]\n", " }\n", + " joint_anno <- normalize_for_ldsc(joint_anno)\n", " joint_name <- paste0(\"${annotation_name}\", \"_joint\")\n", " joint_out_gz <- file.path(\"${cwd:a}\", joint_name, paste0(joint_name, \".${input_chroms[_index]}.annot.gz\"))\n", " joint_out_tsv <- sub(\"\\\\.gz$\", \"\", joint_out_gz)\n", From c08438dbf070f9aeab848d00d3a1a7d337ac4a35 Mon Sep 17 00:00:00 2001 From: al4225 Date: Fri, 8 May 2026 10:46:53 -0400 Subject: [PATCH 2/2] sldsc_enrichment [meta_subset]: split tau_star into single/joint via view helper postprocess writes per_trait[i]$summary with wide names (tau_star_single, tau_star_joint, ...) so a single per-trait list can hold both modes. But meta_sldsc_random looks up bare names (tau_star, tau_star_se), so passing subset_per_trait directly returned NULLs for all 96 traits, leaving meta output empty. Project subset_per_trait through pecotmr:::.sldsc_view_for_meta() once per mode (single | joint) before calling meta_sldsc_random. Output structure now mirrors postprocess for the "all" group: tau_star_single, tau_star_joint, enrichment, enrichstat. Co-Authored-By: Claude Opus 4.7 (1M context) --- code/enrichment/sldsc_enrichment.ipynb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/code/enrichment/sldsc_enrichment.ipynb b/code/enrichment/sldsc_enrichment.ipynb index 0fff76a9a..f94d40fb5 100644 --- a/code/enrichment/sldsc_enrichment.ipynb +++ b/code/enrichment/sldsc_enrichment.ipynb @@ -1256,10 +1256,15 @@ "\n", " subset_per_trait <- res$per_trait[subset_traits]\n", "\n", + " # Map wide names (tau_star_single/joint) to bare names meta_sldsc_random expects.\n", + " view_single <- pecotmr:::.sldsc_view_for_meta(subset_per_trait, \"single\")\n", + " view_joint <- pecotmr:::.sldsc_view_for_meta(subset_per_trait, \"joint\")\n", + "\n", " out <- list(\n", - " tau_star = setNames(lapply(target_cats, function(c) meta_sldsc_random(subset_per_trait, c, \"tau_star\")), target_cats),\n", - " enrichment = setNames(lapply(target_cats, function(c) meta_sldsc_random(subset_per_trait, c, \"enrichment\")), target_cats),\n", - " enrichstat = setNames(lapply(target_cats, function(c) meta_sldsc_random(subset_per_trait, c, \"enrichstat\")), target_cats)\n", + " tau_star_single = setNames(lapply(target_cats, function(c) meta_sldsc_random(view_single, c, \"tau_star\")), target_cats),\n", + " tau_star_joint = setNames(lapply(target_cats, function(c) meta_sldsc_random(view_joint, c, \"tau_star\")), target_cats),\n", + " enrichment = setNames(lapply(target_cats, function(c) meta_sldsc_random(view_single, c, \"enrichment\")), target_cats),\n", + " enrichstat = setNames(lapply(target_cats, function(c) meta_sldsc_random(view_single, c, \"enrichstat\")), target_cats)\n", " )\n", "\n", " saveRDS(out, \"${_output[0]}\")\n",