diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index eb0e70f8fb..936fd84480 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -5,7 +5,7 @@ use std::io::{self, IsTerminal, Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; -use std::time::Duration; +use std::time::{Duration, Instant}; use anyhow::{Context, Result, anyhow, bail}; use clap::{Args, CommandFactory, Parser, Subcommand, ValueEnum}; @@ -6470,37 +6470,13 @@ async fn run_interactive( logging::warn(format!("Failed to install system skills: {e}")); } - // Prune stale workspace snapshots from prior sessions (7-day default). - // Non-fatal: a flaky disk, missing `git`, or read-only home should - // never block the TUI from starting. - let snapshots = config.snapshots_config(); - if snapshots.enabled { - session_manager::prune_workspace_snapshots(&workspace, snapshots.max_age()); - } - - // Prune stale tool-output spillover files (#422). Non-fatal: home - // missing or directory unreadable just means nothing got pruned; - // we never block startup. Runs unconditionally because the - // spillover store is created lazily on first write — there's no - // user-facing setting to gate. - match crate::tools::truncate::prune_older_than(crate::tools::truncate::SPILLOVER_MAX_AGE) { - Ok(0) => {} - Ok(n) => tracing::debug!( - target: "spillover", - "boot prune removed {n} spillover file(s)" - ), - Err(err) => tracing::warn!( - target: "spillover", - ?err, - "spillover prune skipped on boot" - ), - } - - // v0.8.44: prune managed sessions on boot to prevent unbounded growth. - // Keeps at most MAX_SESSIONS (50) recent sessions; non-fatal on error. - if let Ok(manager) = session_manager::SessionManager::default_location() { - let _ = manager.cleanup_old_sessions(); + // Snapshot pruning rewrites side-repo refs and can run GC, so keep it + // before the TUI exposes snapshot list/restore commands. + if config.snapshots_config().enabled { + session_manager::prune_workspace_snapshots(&workspace, config.snapshots_config().max_age()); } + let protected_session_token = resume_session_id.clone(); + spawn_interactive_startup_maintenance(workspace.clone(), protected_session_token); // The `deepseek` launcher forwards `--yolo` to this binary via the // DEEPSEEK_YOLO env var (config.yolo), not as a CLI flag. Honour either. @@ -6533,6 +6509,81 @@ async fn run_interactive( .await } +fn spawn_interactive_startup_maintenance( + workspace: PathBuf, + protected_session_token: Option, +) { + let spawn_result = std::thread::Builder::new() + .name("codewhale-startup-maintenance".to_string()) + .spawn(move || { + // Keep the first interactive frame ahead of optional disk cleanup. + std::thread::sleep(Duration::from_millis(500)); + run_interactive_startup_maintenance(&workspace, protected_session_token.as_deref()); + }); + + if let Err(err) = spawn_result { + logging::warn(format!("Startup maintenance skipped: {err}")); + } +} + +fn startup_maintenance_protected_session_id( + resume_session_id: Option<&str>, + workspace: &Path, +) -> Option { + let session_id = resume_session_id?; + if session_id != "latest" { + return Some(session_id.to_string()); + } + + session_manager::SessionManager::default_location() + .ok() + .and_then(|manager| { + manager + .get_latest_session_for_workspace(workspace) + .ok() + .flatten() + }) + .map(|session| session.id) +} + +fn run_interactive_startup_maintenance(workspace: &Path, protected_session_token: Option<&str>) { + let started = Instant::now(); + + // Prune stale tool-output spillover files (#422). Non-fatal: home + // missing or directory unreadable just means nothing got pruned. + match crate::tools::truncate::prune_older_than(crate::tools::truncate::SPILLOVER_MAX_AGE) { + Ok(0) => {} + Ok(n) => tracing::debug!( + target: "spillover", + "boot prune removed {n} spillover file(s)" + ), + Err(err) => tracing::warn!( + target: "spillover", + ?err, + "spillover prune skipped on boot" + ), + } + + // v0.8.44: prune managed sessions to prevent unbounded growth. + // Keeps at most MAX_SESSIONS (50) recent sessions; non-fatal on error. + let protected_session_id = + startup_maintenance_protected_session_id(protected_session_token, workspace); + match session_manager::SessionManager::default_location() { + Ok(manager) => { + if let Err(err) = manager.cleanup_old_sessions_except(protected_session_id.as_deref()) { + tracing::warn!(target: "session", ?err, "session cleanup skipped on boot"); + } + } + Err(err) => tracing::warn!(target: "session", ?err, "session cleanup skipped on boot"), + } + + tracing::debug!( + target: "startup", + elapsed_ms = started.elapsed().as_millis(), + "startup maintenance finished" + ); +} + #[derive(Debug)] struct CliAutoRoute { provider: crate::config::ApiProvider, diff --git a/crates/tui/src/session_manager.rs b/crates/tui/src/session_manager.rs index efeb739d74..56b9631a0c 100644 --- a/crates/tui/src/session_manager.rs +++ b/crates/tui/src/session_manager.rs @@ -489,11 +489,22 @@ impl SessionManager { /// Clean up old sessions to stay within `MAX_SESSIONS` limit. pub fn cleanup_old_sessions(&self) -> std::io::Result<()> { + self.cleanup_old_sessions_except(None) + } + + /// Clean up old sessions while preserving an active/resumed session. + /// + /// Accepts a full id or resume prefix; startup passes the same user-facing + /// resume token that `load_session_by_prefix` accepts. + pub fn cleanup_old_sessions_except(&self, protected_id: Option<&str>) -> std::io::Result<()> { let sessions = self.list_sessions()?; if sessions.len() > MAX_SESSIONS { // Delete oldest sessions for session in sessions.iter().skip(MAX_SESSIONS) { + if protected_id.is_some_and(|id| session.id == id || session.id.starts_with(id)) { + continue; + } let _ = self.delete_session(&session.id); } } @@ -1108,6 +1119,23 @@ mod tests { workspace: &Path, updated_at: DateTime, ) { + let session = make_session_record(id, workspace, updated_at); + manager.save_session(&session).expect("save"); + } + + fn write_session_record_without_cleanup( + manager: &SessionManager, + id: &str, + workspace: &Path, + updated_at: DateTime, + ) { + let session = make_session_record(id, workspace, updated_at); + let path = manager.validated_session_path(id).expect("path"); + let content = serde_json::to_string_pretty(&session).expect("json"); + fs::write(path, content).expect("write session"); + } + + fn make_session_record(id: &str, workspace: &Path, updated_at: DateTime) -> SavedSession { let session = SavedSession { schema_version: CURRENT_SESSION_SCHEMA_VERSION, messages: vec![make_test_message("user", "hi")], @@ -1130,7 +1158,7 @@ mod tests { context_references: Vec::new(), artifacts: Vec::new(), }; - manager.save_session(&session).expect("save"); + session } fn write_empty_session_record( @@ -2184,6 +2212,31 @@ mod tests { assert_eq!(loaded.artifacts, session.artifacts); } + #[test] + fn cleanup_old_sessions_except_preserves_protected_old_session() { + let tmp = tempdir().expect("tempdir"); + let manager = SessionManager::new(tmp.path().join("sessions")).expect("new"); + let now = Utc::now(); + for idx in 0..(MAX_SESSIONS + 2) { + write_session_record_without_cleanup( + &manager, + &format!("session-{idx:02}-abcdef"), + Path::new("/tmp"), + now - chrono::Duration::minutes(idx as i64), + ); + } + + manager + .cleanup_old_sessions_except(Some("session-51")) + .expect("cleanup"); + let sessions = manager.list_sessions().expect("list"); + let ids: Vec<_> = sessions.iter().map(|session| session.id.as_str()).collect(); + + assert!(ids.contains(&"session-51-abcdef")); + assert!(!ids.contains(&"session-50-abcdef")); + assert_eq!(sessions.len(), MAX_SESSIONS + 1); + } + // ---- #406 prune_sessions_older_than ---- // // The helper is a building block for the auto-archive design: it diff --git a/crates/tui/src/snapshot/mod.rs b/crates/tui/src/snapshot/mod.rs index 08cf3e0916..1e68d50b3d 100644 --- a/crates/tui/src/snapshot/mod.rs +++ b/crates/tui/src/snapshot/mod.rs @@ -39,6 +39,7 @@ pub mod repo; #[allow(unused_imports)] pub use paths::{snapshot_dir_for, snapshot_git_dir}; +#[allow(unused_imports)] pub use prune::{DEFAULT_MAX_AGE, prune_older_than}; /// Maximum snapshots kept per workspace side-repo. Oldest are pruned diff --git a/crates/tui/src/snapshot/repo.rs b/crates/tui/src/snapshot/repo.rs index fe016fad3a..3dc3c356e2 100644 --- a/crates/tui/src/snapshot/repo.rs +++ b/crates/tui/src/snapshot/repo.rs @@ -13,6 +13,7 @@ //! directory". use std::collections::HashSet; +use std::fs::OpenOptions; use std::io; use std::path::{Component, Path, PathBuf}; use std::process::Output; @@ -51,6 +52,7 @@ pub struct SnapshotRepo { } const STALE_TMP_PACK_AGE: Duration = Duration::from_secs(60 * 60); +const SNAPSHOT_LOCK_FILE: &str = "codewhale-snapshot.lock"; /// Maximum total snapshot storage in megabytes before pruning kicks in at /// snapshot time. Keeps the side repo from blowing up the user's disk during @@ -305,6 +307,10 @@ impl SnapshotRepo { /// /// Returns the snapshot's commit SHA. pub fn snapshot(&self, label: &str) -> io::Result { + self.with_repo_write_lock(|| self.snapshot_locked(label)) + } + + fn snapshot_locked(&self, label: &str) -> io::Result { // Guard against disk blowup (#1112): if the snapshot directory has // grown beyond the limit, prune aggressively before adding more. if let Ok(current_mb) = dir_size_mb(&self.git_dir) @@ -320,7 +326,7 @@ impl SnapshotRepo { // we're under the target, or until there's nothing left. let mut age = Duration::from_secs(1); for _ in 0..10 { - let _ = self.prune_older_than(age); + let _ = self.prune_older_than_locked(age); if let Ok(new_size) = dir_size_mb(&self.git_dir) && new_size <= PRUNE_TARGET_MB { @@ -343,8 +349,8 @@ impl SnapshotRepo { target: "snapshot", "snapshot storage still over limit after pruning; wiping history" ); - let _ = self.prune_older_than(Duration::ZERO); - let _ = self.prune_unreachable_objects(); + let _ = self.prune_older_than_locked(Duration::ZERO); + let _ = self.prune_unreachable_objects_locked(); } } // Stage every tracked + untracked path the workspace exposes. @@ -419,6 +425,10 @@ impl SnapshotRepo { /// snapshot tree relative to the workspace root. We do NOT touch the /// user's own `.git` — snapshots only contain working-tree files. pub fn restore(&self, id: &SnapshotId) -> io::Result<()> { + self.with_repo_write_lock(|| self.restore_locked(id)) + } + + fn restore_locked(&self, id: &SnapshotId) -> io::Result<()> { let current_paths = self.tree_paths("HEAD")?; let target_paths = self.tree_paths(id.as_str())?; let checkout = run_git( @@ -446,12 +456,14 @@ impl SnapshotRepo { /// again would be a no-op, so the caller should continue scanning /// older snapshots. pub fn work_tree_matches_snapshot(&self, id: &SnapshotId) -> io::Result { - let diff = run_git( - &self.git_dir, - &self.work_tree, - &["diff", "--quiet", id.as_str(), "--", ":/"], - )?; - Ok(diff.status.success()) + self.with_repo_read_lock(|| { + let diff = run_git( + &self.git_dir, + &self.work_tree, + &["diff", "--quiet", id.as_str(), "--", ":/"], + )?; + Ok(diff.status.success()) + }) } fn tree_paths(&self, treeish: &str) -> io::Result> { @@ -506,6 +518,10 @@ impl SnapshotRepo { /// List up to `limit` most-recent snapshots, newest first. pub fn list(&self, limit: usize) -> io::Result> { + self.with_repo_read_lock(|| self.list_locked(limit)) + } + + fn list_locked(&self, limit: usize) -> io::Result> { // `git log -` is the short form of `--max-count=`; if `limit` // is `usize::MAX` (caller asked for "everything") we pass an empty // count so git defaults to no upper bound. @@ -550,13 +566,17 @@ impl SnapshotRepo { /// `git gc --prune=now` to actually reclaim space. Cheap and avoids /// rewriting history when nothing has aged out. pub fn prune_older_than(&self, max_age: Duration) -> io::Result { + self.with_repo_write_lock(|| self.prune_older_than_locked(max_age)) + } + + fn prune_older_than_locked(&self, max_age: Duration) -> io::Result { let now = SystemTime::now() .duration_since(UNIX_EPOCH) .map_err(|e| io_other(format!("clock error: {e}")))? .as_secs() as i64; let cutoff = now - max_age.as_secs() as i64; - let snapshots = self.list(usize::MAX)?; + let snapshots = self.list_locked(usize::MAX)?; if snapshots.is_empty() { return Ok(0); } @@ -637,7 +657,11 @@ impl SnapshotRepo { /// are preserved — only the parent chain to older snapshots is cut. /// Old objects become unreachable and gc reclaims them. pub fn prune_keep_last_n(&self, max_count: usize) -> io::Result { - let snapshots = self.list(usize::MAX)?; + self.with_repo_write_lock(|| self.prune_keep_last_n_locked(max_count)) + } + + fn prune_keep_last_n_locked(&self, max_count: usize) -> io::Result { + let snapshots = self.list_locked(usize::MAX)?; if snapshots.len() <= max_count { return Ok(0); } @@ -712,9 +736,39 @@ impl SnapshotRepo { Ok(removed) } + fn with_repo_write_lock(&self, f: impl FnOnce() -> io::Result) -> io::Result { + let lock_path = self.git_dir.join(SNAPSHOT_LOCK_FILE); + let lock_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(lock_path)?; + let mut lock = fd_lock::RwLock::new(lock_file); + let _guard = lock.write()?; + f() + } + + fn with_repo_read_lock(&self, f: impl FnOnce() -> io::Result) -> io::Result { + let lock_path = self.git_dir.join(SNAPSHOT_LOCK_FILE); + let lock_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(lock_path)?; + let lock = fd_lock::RwLock::new(lock_file); + let _guard = lock.read()?; + f() + } + /// Drop unreachable loose objects left behind by interrupted or /// orphaned side-repo operations. pub fn prune_unreachable_objects(&self) -> io::Result<()> { + self.with_repo_write_lock(|| self.prune_unreachable_objects_locked()) + } + + fn prune_unreachable_objects_locked(&self) -> io::Result<()> { let prune = run_git(&self.git_dir, &self.work_tree, &["prune", "--expire=now"])?; if !prune.status.success() { return Err(io_other(format!( diff --git a/crates/tui/src/tools/truncate.rs b/crates/tui/src/tools/truncate.rs index 55306f02b5..58e8c5a92c 100644 --- a/crates/tui/src/tools/truncate.rs +++ b/crates/tui/src/tools/truncate.rs @@ -33,19 +33,16 @@ //! tool-details pager opens the spillover file when the user //! presses the tool-details shortcut on a spilled tool cell. -use std::fs; +use std::fs::{self, FileTimes, OpenOptions}; use std::io; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::{Duration, SystemTime}; use crate::tools::spec::ToolResult; -// `Path` is only referenced from helpers gated to test builds. -#[cfg(test)] -use std::path::Path; - /// Name of the spillover directory under the CodeWhale home. pub const SPILLOVER_DIR_NAME: &str = "tool_outputs"; +const SPILLOVER_LOCK_FILE: &str = ".codewhale-tool-outputs.lock"; /// Default threshold above which a tool result is a candidate for /// spillover. Mirrors the `MAX_MEMORY_SIZE` ceiling we use elsewhere @@ -145,14 +142,38 @@ pub fn write_sha_spillover(sha: &str, content: &str) -> io::Result { "sha must be a 64-char lowercase hex digest", ) })?; + let parent = path + .parent() + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "could not resolve sha spillover directory", + ) + })? + .to_path_buf(); if path.exists() { - return Ok(path); - } - if let Some(parent) = path.parent() { - fs::create_dir_all(parent)?; + return match with_spillover_write_lock(&parent, || { + if path.exists() { + let _ = refresh_modified(&path); + } else { + crate::utils::write_atomic(&path, content.as_bytes())?; + } + Ok(path.clone()) + }) { + Ok(path) => Ok(path), + Err(_) if path.exists() => Ok(path), + Err(err) => Err(err), + }; } - crate::utils::write_atomic(&path, content.as_bytes())?; - Ok(path) + + with_spillover_write_lock(&parent, || { + if path.exists() { + let _ = refresh_modified(&path); + return Ok(path.clone()); + } + crate::utils::write_atomic(&path, content.as_bytes())?; + Ok(path.clone()) + }) } /// Write `content` to the spillover file for `id`. Creates the @@ -169,11 +190,19 @@ pub fn write_spillover(id: &str, content: &str) -> io::Result { "could not resolve spillover path (empty/invalid id or missing home directory)", ) })?; - if let Some(parent) = path.parent() { - fs::create_dir_all(parent)?; - } - crate::utils::write_atomic(&path, content.as_bytes())?; - Ok(path) + let parent = path + .parent() + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "could not resolve spillover directory", + ) + })? + .to_path_buf(); + with_spillover_write_lock(&parent, || { + crate::utils::write_atomic(&path, content.as_bytes())?; + Ok(path.clone()) + }) } /// Drop spillover files older than `max_age`. Returns the number of @@ -187,11 +216,15 @@ pub fn prune_older_than(max_age: Duration) -> io::Result { if !root.exists() { return Ok(0); } + with_spillover_write_lock(&root, || prune_older_than_locked(&root, max_age)) +} + +fn prune_older_than_locked(root: &Path, max_age: Duration) -> io::Result { let cutoff = SystemTime::now() .checked_sub(max_age) .unwrap_or(SystemTime::UNIX_EPOCH); let mut pruned = 0usize; - for entry in fs::read_dir(&root)? { + for entry in fs::read_dir(root)? { let entry = match entry { Ok(e) => e, Err(err) => { @@ -200,6 +233,12 @@ pub fn prune_older_than(max_age: Duration) -> io::Result { } }; let path = entry.path(); + if path + .file_name() + .is_some_and(|name| name == SPILLOVER_LOCK_FILE) + { + continue; + } if !path.is_file() { continue; } @@ -221,6 +260,24 @@ pub fn prune_older_than(max_age: Duration) -> io::Result { Ok(pruned) } +fn refresh_modified(path: &Path) -> io::Result<()> { + let file = OpenOptions::new().write(true).open(path)?; + file.set_times(FileTimes::new().set_modified(SystemTime::now())) +} + +fn with_spillover_write_lock(root: &Path, f: impl FnOnce() -> io::Result) -> io::Result { + fs::create_dir_all(root)?; + let lock_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .truncate(false) + .open(root.join(SPILLOVER_LOCK_FILE))?; + let mut lock = fd_lock::RwLock::new(lock_file); + let _guard = lock.write()?; + f() +} + /// Convenience for the common "too long? spill it." pattern. If /// `content` is at or below `threshold` bytes, returns `None` and the /// caller keeps the inline content. Above the threshold, writes the @@ -628,6 +685,53 @@ mod tests { }); } + #[test] + fn write_sha_spillover_refreshes_reused_file_mtime() { + let _g = setup(); + let tmp = tempdir().unwrap(); + with_test_home(tmp.path(), || { + let sha = "a".repeat(64); + let path = write_sha_spillover(&sha, "large result").expect("write sha"); + let stale_time = SystemTime::now() - Duration::from_secs(30 * 24 * 60 * 60); + let file = OpenOptions::new() + .write(true) + .open(&path) + .expect("open sha"); + file.set_times(FileTimes::new().set_modified(stale_time)) + .expect("backdate sha"); + + let reused = write_sha_spillover(&sha, "large result").expect("reuse sha"); + let refreshed = fs::metadata(&reused) + .expect("metadata") + .modified() + .expect("modified"); + + assert_eq!(reused, path); + assert!(refreshed > stale_time); + }); + } + + #[test] + fn write_sha_spillover_reuses_existing_file_when_refresh_fails() { + let _g = setup(); + let tmp = tempdir().unwrap(); + with_test_home(tmp.path(), || { + let sha = "b".repeat(64); + let path = write_sha_spillover(&sha, "first").expect("write sha"); + let original_permissions = fs::metadata(&path).expect("metadata").permissions(); + let mut readonly_permissions = original_permissions.clone(); + readonly_permissions.set_readonly(true); + fs::set_permissions(&path, readonly_permissions).expect("make readonly"); + + let reused = write_sha_spillover(&sha, "second"); + fs::set_permissions(&path, original_permissions).expect("restore permissions"); + + let reused = reused.expect("reuse readonly sha"); + assert_eq!(reused, path); + assert_eq!(fs::read_to_string(&path).unwrap(), "first"); + }); + } + #[test] fn maybe_spillover_returns_none_below_threshold() { let _g = setup();