StatFunGen · al4225 · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,32 @@
 # mfsusieR (development version)
 
+- susieR 0.16.1 compat: `track_ibss_fit.mf_individual` now delegates
+  to `susieR:::make_track_snapshot` so the per-iteration snapshots
+  satisfy the new `is_compact_track_snapshots` validator that
+  `ibss_finalize -> make_susie_track_history` runs at finalize.
+  Without this, `mfsusie(..., track_fit = TRUE)` errored with
+  "fit$trace is not a compact SuSiE track" against susieR >= 0.16.1.
+  The snapshot records mfsusieR's `sigma2` (a `list[M]`) as
+  `NA_real_` because susieR's helper assumes a scalar; the real
+  per-iteration values remain available on `fit$elbo` and
+  `fit$sigma2`.
+- New argument `save_mu_method = c("complete", "alpha_collapsed",
+  "lead")` on `mfsusie()` and the `fsusie()` wrapper. Default
+  `"complete"` is unchanged; `"alpha_collapsed"` and `"lead"`
+  shrink `fit$mu` and `fit$mu2` by roughly factor `p`. Under
+  `"alpha_collapsed"`, `coef.mfsusie` and `mf_post_smooth` are
+  numerically equivalent to `"complete"` (tolerance 1e-12), via
+  a precomputed `fit$coef_wavelet[[l]][[m]]` for the raw-X coef
+  path. Under `"lead"`, the same accessors return a cheap
+  lead-variable summary biased toward `j* = which.max(alpha[l, ])`
+  and the fit also carries `fit$top_index[l]`. Both 1D modes
+  error on `predict.mfsusie(newx)`, the plot per-variant
+  clfsr matrix, and `model_init` warm-start; refit with
+  `"complete"` for those operations.
+- New helper `mf_thin(fit, method)` performs the same trim as
+  `save_mu_method` but as a post-fit operation, so callers can
+  keep a complete checkpoint for warm-starts and a thinned copy
+  for distribution.
 - `pip` now incorporates the `V[l] > prior_tol` filter. The
   per-effect effective slab variance (mean over (m, s) of
   `sum_k pi[l, m, s, k] * var_k`) populates `model$V[l]` after

diff --git a/R/ibss_methods.R b/R/ibss_methods.R
@@ -231,21 +231,41 @@ ibss_initialize.mf_individual <- function(data, params) {
 #' IBSS iteration tracking (no-op by default)
 #'
 #' Tracking adds runtime overhead and large-state copies. mfsusieR
-#' Tracking is supported only when `params$track_fit` is `TRUE`,
+#' supports tracking only when `params$track_fit` is `TRUE`,
 #' otherwise this is a no-op (matches the susieR pattern but with
 #' a smaller default footprint, since per-effect curves are
 #' large).
 #'
+#' Snapshot format: each element of `tracking` is built by
+#' `susieR:::make_track_snapshot(model, iteration)` so the list
+#' satisfies the `is_compact_track_snapshots` validator that
+#' `ibss_finalize -> make_susie_track_history` runs at finalize
+#' (introduced in susieR 0.16.1). `model$sigma2` is sanitised to
+#' `NA_real_` for the snapshot because mfsusieR's `sigma2` is a
+#' `list[M]` whereas susieR's helper assumes a scalar; the real
+#' per-iteration values remain on `fit$sigma2` after finalize.
+#'
 #' @keywords internal
 #' @noRd
 track_ibss_fit.mf_individual <- function(data, params, model, tracking, iter, elbo, ...) {
   if (!isTRUE(params$track_fit)) return(tracking)
-  tracking[[iter]] <- list(
-    alpha  = model$alpha,
-    sigma2 = model$sigma2,
-    pi_V   = model$pi_V,
-    elbo   = elbo[iter]
-  )
+  # Delegate to susieR's `make_track_snapshot` so the snapshot list
+  # passes the `is_compact_track_snapshots` validator that
+  # `ibss_finalize -> make_susie_track_history` runs at finalize.
+  # The helper's three data.frames (`alpha`, `effect`, `iteration`)
+  # are populated from `model$alpha`, `model$V`, `model$lbf`,
+  # `model$lbf_variable`, all of which mfsusieR carries with
+  # the same shape as susieR.
+  #
+  # mfsusieR's `model$sigma2` is `list[M]` (one entry per outcome,
+  # each a scalar or length-S vector) whereas `make_track_snapshot ->
+  # track_scalar` expects a length-1 numeric. Pass a sanitised copy
+  # with `sigma2 = NA_real_` so the snapshot records NA rather than
+  # crashing on `as.numeric(list)`. The per-iteration ELBO is still
+  # available on `fit$elbo`.
+  m_compact <- model
+  m_compact$sigma2 <- NA_real_
+  tracking[[iter]] <- make_track_snapshot(m_compact, iter - 1L)
   tracking
 }
 

diff --git a/R/individual_data_methods.R b/R/individual_data_methods.R
@@ -801,8 +801,9 @@ post_loglik_prior_hook.mf_individual <- function(data, params, model, ser_stats,
         pi_warm_start  = pi_prev)
       model$G_prior[[m]][[s]]$fitted_g$pi <- new_pi
       model$pi_V[[l]][[m]][s, ]            <- new_pi
-      model$fitted_g_per_effect[[l]][[m]][[s]] <-
-        model$G_prior[[m]][[s]]$fitted_g
+      if (!is.null(model$fitted_g_per_effect))
+        model$fitted_g_per_effect[[l]][[m]][[s]] <-
+          model$G_prior[[m]][[s]]$fitted_g
     }
   }
   model
@@ -840,7 +841,8 @@ post_loglik_prior_hook.mf_individual <- function(data, params, model, ser_stats,
       fit <- do.call(ebnm_fn, args)
       model$G_prior[[m]][[s]]$fitted_g <- fit$fitted_g
       model$pi_V[[l]][[m]][s, ]         <- fit$fitted_g$pi
-      model$fitted_g_per_effect[[l]][[m]][[s]] <- fit$fitted_g
+      if (!is.null(model$fitted_g_per_effect))
+        model$fitted_g_per_effect[[l]][[m]][[s]] <- fit$fitted_g
     }
   }
   model

diff --git a/R/mfsusie.R b/R/mfsusie.R
@@ -178,6 +178,16 @@
 #'   correction acts on variable selection probabilities only;
 #'   posterior moments given inclusion are unchanged. Default
 #'   `FALSE`.
+#' @param cross_iter_prior logical. When `TRUE` (default), the fitted
+#'   mixture weights `pi` for each effect `l` are stored after each
+#'   SER step and reloaded as the starting point for the same effect
+#'   in the next outer IBSS iteration (per-effect warm-start of the
+#'   prior). When `FALSE`, each SER step starts from the shared
+#'   initial prior regardless of previous iterations, which is closer
+#'   to the IBSS theoretical derivation (shared prior across effects).
+#'   Setting `FALSE` may reduce FDR inflation in high-resolution
+#'   wavelet fits where per-effect prior persistence can overfit to
+#'   cross-scale covariance structure in the data.
 #' @param max_inner_em_steps integer, number of M-step + alpha-update
 #'   sweeps run inside the post-loglik prior hook per (effect,
 #'   outer iter), to keep mixture pi and alpha in sync per effect per outer
@@ -202,6 +212,38 @@
 #'   `mf_post_smooth(fit, X = X, Y = Y, ...)` instead; useful
 #'   when sharing fits where the per-individual data should
 #'   not travel with the fit.
+#' @param save_mu_method one of `"complete"` (default),
+#'   `"aggregated"`, or `"lead"`. Controls the storage shape
+#'   of the per-effect posterior moments `fit$mu` and `fit$mu2`
+#'   after the IBSS loop finishes.
+#'
+#'   `"complete"` keeps the full `p x T_basis[m]` per-(effect,
+#'   outcome) moments; this is the only mode that supports
+#'   `model_init` warm-starts, `predict.mfsusie(newx)`, and the
+#'   per-variant lfsr toggle in plots.
+#'
+#'   `"aggregated"` replaces each `p x T` matrix by the
+#'   alpha-weighted 1 x T summary
+#'   `mu[[l]][[m]] = sum_j alpha[l, j] * mu_full[l, j, ]` (and
+#'   the analogous second-moment summary). Storage shrinks by
+#'   roughly factor `p`. The post-fit consumers `coef.mfsusie`,
+#'   `mf_post_smooth`, `summary`, `print`, and the alpha-aggregated
+#'   plot views are numerically equivalent to `"complete"`. To
+#'   keep `coef` lossless on the raw-X scale, the fit also
+#'   carries `coef_wavelet[[l]][[m]] = sum_j alpha[l, j] *
+#'   mu_full[l, j, ] / csd_X[j]` as a separate 1 x T summary.
+#'
+#'   `"lead"` keeps only the lead variable per effect:
+#'   `mu[[l]][[m]] = mu_full[l, j*, ]` where
+#'   `j* = which.max(alpha[l, ])`, plus `fit$top_index[l] = j*`.
+#'   This is a cheap single-variable coefficient summary, biased
+#'   toward the lead. `coef.mfsusie` and `mf_post_smooth` work but
+#'   are not the alpha-weighted posterior mean; document accordingly.
+#'
+#'   Both 1D modes (`"aggregated"`, `"lead"`) error on
+#'   `predict.mfsusie(newx)`, the plot per-variant lfsr toggle,
+#'   and `model_init`. Use `mf_thin()` to thin a complete fit
+#'   after the fact while keeping the original for warm-starts.
 #'
 #' @return A list of class `c("mfsusie", "susie")` carrying:
 #' \describe{
@@ -279,15 +321,31 @@ mfsusie <- function(X, Y,
                     alpha_thin_eps            = 5e-5,
                     model_init                = NULL,
                     small_sample_correction   = FALSE,
+                    cross_iter_prior          = TRUE,
                     max_inner_em_steps            = 5L,
-                    attach_smoothing_inputs   = TRUE) {
+                    attach_smoothing_inputs   = TRUE,
+                    save_mu_method            = c("complete",
+                                                  "aggregated",
+                                                  "lead")) {
   if (!is.logical(small_sample_correction) ||
       length(small_sample_correction) != 1L ||
       is.na(small_sample_correction)) {
     stop("`small_sample_correction` must be `TRUE` or `FALSE`.")
   }
   prior_variance_scope    <- match.arg(prior_variance_scope)
   residual_variance_scope <- match.arg(residual_variance_scope)
+  save_mu_method          <- match.arg(save_mu_method)
+
+  # Reject thinned fits as model_init: the IBSS warm-start needs the
+  # full p x T_basis[m] mu / mu2, and 1D modes have already discarded
+  # the per-variant axis. The error references the storage mode of
+  # the supplied fit, not the new call's requested mode.
+  if (!is.null(model_init)) {
+    init_mode <- attr(model_init, "save_mu_method") %||% "complete"
+    if (init_mode != "complete") {
+      stop_save_mu_method_combo("model_init", init_mode)
+    }
+  }
 
   if (!is.null(mixture_null_weight) &&
       prior_variance_scope %in% c("per_scale_normal",
@@ -384,6 +442,7 @@ mfsusie <- function(X, Y,
     small_sample_correction    = small_sample_correction,
     small_sample_df            = if (small_sample_correction) data$n - 1L
                                  else NULL,
+    cross_iter_prior           = isTRUE(cross_iter_prior),
     max_inner_em_steps             = as.integer(max_inner_em_steps)
   )
 
@@ -448,6 +507,11 @@ mfsusie <- function(X, Y,
     outcome_names   = names(data$D) %||% names(Y)
   )
 
+  # 7. Apply save_mu_method storage policy. Default "complete" leaves
+  #    fit unchanged. Non-complete modes shrink mu/mu2 by factor p
+  #    and disable predict(newx), per-variant lfsr, and model_init.
+  fit <- mf_apply_save_mu_method(fit, save_mu_method)
+
   fit
 }
 

diff --git a/R/mfsusie_methods.R b/R/mfsusie_methods.R
@@ -45,13 +45,17 @@ mf_invert_per_outcome <- function(coef_wavelet, m, dwt_meta,
 }
 
 # Per-effect coefficient matrix (p x T_basis) for effect l, outcome m,
-# on the standardized X scale: alpha_lj * mu_lj_t.
+# on the standardized X scale: alpha_lj * mu_lj_t. Only meaningful when
+# the fit carries per-variant mu (save_mu_method = "complete"); the only
+# caller (mf_total_wavelet, used by predict.mfsusie(newx)) is gated by
+# predict's save_mu_method guard, so this need not branch.
 mf_effect_wavelet <- function(fit, l, m) {
   fit$alpha[l, ] * fit$mu[[l]][[m]]
 }
 
 # Coefficient sum across all L effects per outcome, on standardized
-# X scale, in the wavelet domain.
+# X scale, in the wavelet domain. Only callable for fits with
+# save_mu_method = "complete"; predict.mfsusie enforces this upstream.
 mf_total_wavelet <- function(fit, m) {
   p     <- ncol(fit$alpha)
   T_pad <- fit$dwt_meta$T_basis[m]
@@ -92,6 +96,10 @@ predict.mfsusie <- function(object, newx = NULL, ...) {
       "`newx` has %d columns but the fit was trained on X with %d columns.",
       ncol(newx), ncol(object$alpha)))
   }
+  mode <- mf_save_mu_method(object)
+  if (mode != "complete") {
+    stop_save_mu_method_combo("predict.mfsusie(newx)", mode)
+  }
 
   meta <- object$dwt_meta
   M    <- meta$M
@@ -162,18 +170,17 @@ coef.mfsusie <- function(object, smooth_method = NULL, ...) {
   meta <- object$dwt_meta
   L    <- nrow(object$alpha)
   M    <- meta$M
-  X_scale <- meta$X_scale
   out  <- vector("list", M)
   for (m in seq_len(M)) {
     coef_l_wavelet <- matrix(0, nrow = L, ncol = meta$T_basis[m])
     for (l in seq_len(L)) {
-      # Per-variable unscale of mu: mu lives in (X-standardized,
-      # Y-standardized-then-DWT'd) units; dividing by `X_scale[j]`
-      # converts it to (X-raw, Y-standardized-then-DWT'd) units so
-      # the alpha-weighted sum is the per-effect coefficient on the
-      # raw X scale.
-      mu_raw_X <- sweep(object$mu[[l]][[m]], 1L, X_scale, "/")
-      coef_l_wavelet[l, ] <- colSums(object$alpha[l, ] * mu_raw_X)
+      # Per-effect raw-X wavelet coefficient curve. Helper dispatches
+      # on save_mu_method: "complete" runs sum_j alpha_lj mu_lj /
+      # csd_X[j]; "aggregated" reads the precomputed
+      # fit$coef_wavelet[[l]][[m]] (the per-j scaling cannot be
+      # recovered after alpha-collapse); "lead" returns mu[j*, ] /
+      # csd_X[j*].
+      coef_l_wavelet[l, ] <- get_coef_wavelet_curve(object, l, m)
     }
     # Inverse DWT with `column_center = 0` so the Y intercept
     # `cm_Y` is NOT added back to a per-effect estimate. The `csd_Y`
@@ -698,14 +705,23 @@ mf_post_smooth <- function(fit,
       next
     }
 
+    has_per_variant_mu <- mf_has_per_variant_mu(fit)
     for (l in seq_len(L)) {
       out <- kernel(fit, l, m, T_m, level)
       effect_curves[[m]][[l]]  <- out$effect_estimate
       credible_bands[[m]][[l]] <- out$credible_band
       lfsr_curves[[m]][[l]]    <- out$lfsr
-      clfsr_curves[[m]][[l]]   <- lfsr_from_gaussian(
-        fit$mu[[l]][[m]],
-        sqrt(pmax(fit$mu2[[l]][[m]] - fit$mu[[l]][[m]]^2, 0)))
+      # Per-variant clfsr requires the full p x T mu / mu2. Under
+      # save_mu_method = "aggregated" or "lead" we only have a
+      # 1 x T summary, which would degenerate to the alpha-aggregated
+      # lfsr already in lfsr_curves; leave clfsr_curves NULL so the
+      # plot per-variant toggle errors instead of silently displaying
+      # the wrong shape.
+      clfsr_curves[[m]][[l]] <- if (has_per_variant_mu)
+        lfsr_from_gaussian(
+          fit$mu[[l]][[m]],
+          sqrt(pmax(fit$mu2[[l]][[m]] - fit$mu[[l]][[m]]^2, 0)))
+      else NULL
     }
   }
 
@@ -722,12 +738,10 @@ mf_post_smooth <- function(fit,
   meta   <- fit$dwt_meta
   kernel <- function(fit, l, m, T_m, level) {
     # Alpha-weighted aggregate across variables (no lead-variant
-    # tie-break). Mean: sum_j alpha[l, j] * mu[l, j, t]; second
-    # moment: sum_j alpha[l, j] * mu2[l, j, t]. Variance via the
-    # law of total variance.
-    mean_w <- as.numeric(fit$alpha[l, ] %*% fit$mu[[l]][[m]])
-    mu2_w  <- as.numeric(fit$alpha[l, ] %*% fit$mu2[[l]][[m]])
-    var_w  <- pmax(mu2_w - mean_w^2, 0)
+    # tie-break). Helper handles all three save_mu_method modes.
+    mom    <- get_effect_wavelet_moments(fit, l, m)
+    mean_w <- mom$mean_w
+    var_w  <- mom$var_w
 
     shrunk_w <- if (T_m == 1L) mean_w else
       scalewise_soft_threshold(mean_w, sd = sqrt(var_w),
@@ -804,8 +818,9 @@ mf_post_smooth <- function(fit,
   for (l in seq_len(L)) {
     if (l == exclude) next
     # Alpha-weighted aggregate per-position wavelet coefficient
-    # across variables. `mu[[l]][[m]]` is p x T_basis.
-    mu_w_agg <- as.numeric(fit$alpha[l, ] %*% fit$mu[[l]][[m]])
+    # across variables. Helper handles complete (p x T_basis) and
+    # the 1D save_mu_method modes uniformly.
+    mu_w_agg <- get_effect_wavelet_moments(fit, l, m)$mean_w
     eff_pos  <- dwt_invert_packed(matrix(mu_w_agg, nrow = 1L),
                                       meta, m)
     if (length(eff_pos) > T_pos) eff_pos <- eff_pos[seq_len(T_pos)]

diff --git a/R/mfsusie_plot.R b/R/mfsusie_plot.R
@@ -89,6 +89,11 @@ compute_clfsr_matrix <- function(fit, l, m, smoothed = NULL) {
       !is.null(smoothed$clfsr_curves[[m]][[l]])) {
     return(smoothed$clfsr_curves[[m]][[l]])
   }
+  mode <- mf_save_mu_method(fit)
+  if (mode != "complete") {
+    stop_save_mu_method_combo(
+      "Per-variant clfsr (compute_clfsr_matrix)", mode)
+  }
   mu <- fit$mu[[l]][[m]]
   sd <- sqrt(pmax(fit$mu2[[l]][[m]] - mu^2, 0))
   lfsr_from_gaussian(mu, sd)

diff --git a/R/model_class.R b/R/model_class.R
@@ -72,7 +72,7 @@ initialize_susie_model.mf_individual <- function(data, params, var_y, ...) {
   G_prior            <- if (is.null(prior)) NULL else prior$G_prior
   pi_V <- if (is.null(prior)) NULL else
     lapply(seq_len(L), function(.) prior$pi)
-  fitted_g_per_effect <- if (is.null(prior)) NULL else
+  fitted_g_per_effect <- if (is.null(prior) || !isTRUE(params$cross_iter_prior)) NULL else
     lapply(seq_len(L), function(.) {
       lapply(prior$G_prior, function(G_m)
         lapply(G_m, function(g_s) g_s$fitted_g))