Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1,122 changes: 548 additions & 574 deletions Evolutionary Selection Analysis.Rmd

Large diffs are not rendered by default.

3,016 changes: 873 additions & 2,143 deletions Evolutionary-Selection-Analysis.html

Large diffs are not rendered by default.

Binary file modified R/.DS_Store
Binary file not shown.
483 changes: 168 additions & 315 deletions R/case_studies/Caribbean_pupfishes.md

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions R/case_studies/Medium_ground_finch.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,34 @@ Univariate analyses were conducted for beak size across years to assess temporal

---


--- Sample size by group ---
> cat("Binary grouped data:\n")
Binary grouped data:
> print(table(prepared_binary_group[[GROUP]]))

Crescent Pond Little Lake
892 973
>
> cat("\nContinuous grouped data:\n")

Continuous grouped data:
> print(table(prepared_continuous_group[[GROUP]]))

Crescent Pond Little Lake
892 973


# 样本量几乎相等
total <- 892 + 973 # 1865
weight_Crescent <- 892 / 1865 # 0.478
weight_Little <- 973 / 1865 # 0.522

# 权重非常接近 0.5:0.5
# 所以加权平均 ≈ 简单平均



### 2. Setup and Script
`R/scripts/test_birds.R`

Expand Down
114 changes: 70 additions & 44 deletions R/functions/correlated_fitness_surface.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
#
# When group is specified, the function automatically runs separate
# analyses for each group and returns a list of results.
#
# k = NULL (default) triggers adaptive k selection:
# k_2d <- min(30, max(10, floor(sqrt(n_unique1 * n_unique2))))
# k can be overridden by the user for manual control.
# ======================================================

`%||%` <- function(a, b) if (!is.null(a)) a else b
Expand All @@ -23,7 +27,7 @@ correlated_fitness_surface <- function(
method = "auto",
scale_traits = FALSE,
group = NULL,
k = 30
k = NULL
) {
stopifnot(length(trait_cols) == 2L)
need <- c(fitness_col, trait_cols)
Expand All @@ -37,6 +41,20 @@ correlated_fitness_surface <- function(
stop("Group column '", group, "' not found in data")
}

# ======================================================
# Adaptive k selection (before group splitting) based on full dataset unique values for both traits
# ======================================================
if (is.null(k)) {
n_unique1 <- length(unique(data[[trait_cols[1]]][complete.cases(data[[trait_cols[1]]])]))
n_unique2 <- length(unique(data[[trait_cols[2]]][complete.cases(data[[trait_cols[2]]])]))
k <- min(30, max(10, floor(sqrt(n_unique1 * n_unique2))))
message(
"Adaptive k selected: k = ", k,
" (n_unique: ", trait_cols[1], " = ", n_unique1,
", ", trait_cols[2], " = ", n_unique2, ")"
)
}

# ======================================================
# If group is specified, run separate analyses for each group
# ======================================================
Expand All @@ -47,19 +65,17 @@ correlated_fitness_surface <- function(
for (g in groups) {
data_g <- data[data[[group]] == g, ]

# Recursively call without group
res <- correlated_fitness_surface(
data = data_g,
fitness_col = fitness_col,
trait_cols = trait_cols,
grid_n = grid_n,
method = method,
data = data_g,
fitness_col = fitness_col,
trait_cols = trait_cols,
grid_n = grid_n,
method = method,
scale_traits = scale_traits,
group = NULL, # No further grouping
k = k
group = NULL,
k = k
)

# Add group identifier
res$group_value <- g
results_list[[as.character(g)]] <- res
}
Expand All @@ -86,7 +102,7 @@ correlated_fitness_surface <- function(
}

message("IMPORTANT: Traits should already be standardized (mean = 0, SD = 1).")
message(" Do NOT apply scale() again within this function.")
message("Do NOT apply scale() again within this function.")

# Check if traits appear standardized
for (t in trait_cols) {
Expand Down Expand Up @@ -160,7 +176,7 @@ correlated_fitness_surface <- function(

# Prepare data frame
df_fit <- data.frame(
.y = as.numeric(y),
.y = as.numeric(y),
trait1 = as.numeric(x1s),
trait2 = as.numeric(x2s)
)
Expand All @@ -169,26 +185,33 @@ correlated_fitness_surface <- function(

cat("GAM fitting with", nrow(df_fit), "observations\n")

# Build formulas (no group because we're in main analysis)
k_adj <- min(k, nrow(df_fit) - 1)
# ======================================================
# Safety check: cap k at subset-level constraints
# ======================================================
n_unique1_sub <- length(unique(df_fit[[trait_cols[1]]]))
n_unique2_sub <- length(unique(df_fit[[trait_cols[2]]]))
k_cap <- min(30, max(10, floor(sqrt(n_unique1_sub * n_unique2_sub))))
k_adj <- min(k, k_cap, nrow(df_fit) - 1)

if (k_adj < k) {
message("Adjusting k from ", k, " to ", k_adj, " for this subset")
k_use <- k_adj
} else {
k_use <- k
}

# Build formulas with fallbacks
fml <- as.formula(paste(
".y ~ s(", trait_cols[1], ", ", trait_cols[2],
", bs = 'tp', k = ", k_adj, ")"
".y ~ s(", trait_cols[1], ",", trait_cols[2],
", bs = 'tp', k =", k_use, ")"
))
fml_alt1 <- as.formula(paste(
".y ~ s(", trait_cols[1],
", k = min(floor(", k_adj, "/2), nrow(df_fit) - 1)) + s(",
trait_cols[2],
", k = min(floor(", k_adj, "/2), nrow(df_fit) - 1))"
".y ~ s(", trait_cols[1], ", k =", max(5, floor(k_use / 2)), ") +",
"s(", trait_cols[2], ", k =", max(5, floor(k_use / 2)), ")"
))
fml_alt2 <- as.formula(paste(".y ~ ", trait_cols[1], " + ", trait_cols[2]))
fml_alt2 <- as.formula(paste(".y ~", trait_cols[1], "+", trait_cols[2]))

try_formulas <- list(
main = fml,
alt1 = fml_alt1,
alt2 = fml_alt2
)
try_formulas <- list(main = fml, alt1 = fml_alt1, alt2 = fml_alt2)

fit <- NULL
formula_used <- NULL
Expand All @@ -199,7 +222,7 @@ correlated_fitness_surface <- function(
{
cat(" Trying formula:", form_name, "\n")
fit <- mgcv::gam(try_formulas[[form_name]],
data = df_fit,
data = df_fit,
family = fam,
method = "REML"
)
Expand Down Expand Up @@ -239,23 +262,26 @@ correlated_fitness_surface <- function(
grid$.fit[is.na(grid$.fit)] <- mean(grid$.fit, na.rm = TRUE)
}

cat("Success Predictions range:", round(range(grid$.fit), 4), "\n")
cat("Predictions range:", round(range(grid$.fit), 4), "\n")

return(list(
model = fit,
grid = grid,
method = "gam",
model = fit,
grid = grid,
method = "gam",
formula_used = formula_used,
data_type = ifelse(is_binary, "binary", "continuous"),
trait_cols = trait_cols,
fitness_col = fitness_col,
group_used = NULL,
k_used = k_use,
data_type = ifelse(is_binary, "binary", "continuous"),
trait_cols = trait_cols,
fitness_col = fitness_col,
group_used = NULL,
surface_type = "correlated_fitness",
note = "Correlated fitness surface (individual fitness)"
note = "Correlated fitness surface (individual fitness)"
))
}

# ======================================================
# TPS method for continuous data
# ======================================================
if (!requireNamespace("fields", quietly = TRUE)) {
stop("For continuous fitness with tps method, install fields: install.packages('fields')")
}
Expand Down Expand Up @@ -288,14 +314,14 @@ correlated_fitness_surface <- function(
}

return(list(
model = tps_model,
grid = grid,
method = "tps",
data_type = ifelse(is_binary, "binary", "continuous"),
trait_cols = trait_cols,
fitness_col = fitness_col,
group_used = NULL,
model = tps_model,
grid = grid,
method = "tps",
data_type = ifelse(is_binary, "binary", "continuous"),
trait_cols = trait_cols,
fitness_col = fitness_col,
group_used = NULL,
surface_type = "correlated_fitness",
note = "Correlated fitness surface (individual fitness)"
note = "Correlated fitness surface (individual fitness)"
))
}
65 changes: 39 additions & 26 deletions R/functions/univariate_spline.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
# - Use prepare_selection_data() with standardize = TRUE and optional group
# - The GAM smooth term estimates the shape of the fitness function
# - DO NOT use scale() within this function (double standardization)
# - k = NULL (default) triggers adaptive k selection based on n_unique
# - k can be overridden by the user for manual control
# ======================================================

univariate_spline <- function(data,
fitness_col,
trait_col,
fitness_type = c("binary", "continuous"),
group = NULL,
k = 10) {
k = NULL) {
fitness_type <- match.arg(fitness_type)

# Input validation
Expand All @@ -36,6 +38,15 @@ univariate_spline <- function(data,
stop("`trait_col` must be numeric (standardize upstream if needed).")
}

# ======================================================
# Adaptive k selection (before group splitting) based on full dataset unique values
# ======================================================
if (is.null(k)) {
n_unique_all <- length(unique(data[[trait_col]][complete.cases(data[[trait_col]])]))
k <- min(10, max(5, n_unique_all - 1))
message("Adaptive k selected: k = ", k, " (n_unique = ", n_unique_all, ")")
}

# ======================================================
# If group is specified, run separate analyses for each group
# ======================================================
Expand All @@ -50,14 +61,14 @@ univariate_spline <- function(data,
for (g in groups) {
data_g <- data[data[[group]] == g, ]

# Recursively call without group
# Recursively call without group, passing resolved k
res <- univariate_spline(
data = data_g,
fitness_col = fitness_col,
trait_col = trait_col,
data = data_g,
fitness_col = fitness_col,
trait_col = trait_col,
fitness_type = fitness_type,
group = NULL,
k = k
group = NULL,
k = k # k is now a number, not NULL
)

# Add group identifier
Expand Down Expand Up @@ -123,15 +134,17 @@ univariate_spline <- function(data,
df <- data
df[[".y"]] <- y

# Adjust k based on unique values
# ======================================================
# Safety check: cap k at subset-level unique values
# ======================================================
n_unique <- length(unique(df[[trait_col]][complete.cases(df[[trait_col]])]))
n_obs <- sum(complete.cases(df[[trait_col]], y))

k_adj <- min(k, max(3, n_unique - 1))
k_adj <- min(k, max(5, n_unique - 1))
if (k_adj < k) {
warning(
"Reducing k from ", k, " to ", k_adj,
" (only ", n_unique, " unique values)"
" (only ", n_unique, " unique values in this subset)"
)
k <- k_adj
}
Expand All @@ -143,16 +156,16 @@ univariate_spline <- function(data,
)
}

# Create formula with smooth term (no group here)
# Create formula with smooth term
fml <- stats::as.formula(paste0(".y ~ s(", trait_col, ", k = ", k, ")"))

# Fit GAM
fit <- tryCatch(
{
mgcv::gam(fml,
data = df,
family = fam,
method = "REML",
data = df,
family = fam,
method = "REML",
na.action = stats::na.omit
)
},
Expand All @@ -174,7 +187,7 @@ univariate_spline <- function(data,
grid <- data.frame(seq(rng[1], rng[2], length.out = 200))
names(grid) <- trait_col

# Predict
# Predict on response scale with 95% CI
pr <- stats::predict(fit, newdata = grid, se.fit = TRUE, type = "link")
linkinv <- fit$family$linkinv

Expand All @@ -188,19 +201,19 @@ univariate_spline <- function(data,
}

result <- list(
model = fit,
grid = grid,
trait = trait_col,
model = fit,
grid = grid,
trait = trait_col,
fitness_type = fitness_type,
family = family_name,
k = k,
n_obs = n_obs,
fit_note = fit_note,
group_used = NULL,
trait_mean = z_mean,
trait_sd = z_sd,
family = family_name,
k = k,
n_obs = n_obs,
fit_note = fit_note,
group_used = NULL,
trait_mean = z_mean,
trait_sd = z_sd,
surface_type = "correlated_fitness_univariate",
note = "Univariate correlated fitness function (individual fitness)"
note = "Univariate correlated fitness function (individual fitness)"
)

class(result) <- "univariate_fitness"
Expand Down
Binary file modified R/results/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading