From 1774e76eaf31d6a72e25495863315b408cbbd33a Mon Sep 17 00:00:00 2001 From: avnyu Date: Sun, 14 Dec 2025 20:08:35 +0700 Subject: [PATCH 1/4] refactor: extract into GitSource::fetch_db --- src/cargo/sources/git/source.rs | 115 +++++++++++++++++--------------- 1 file changed, 63 insertions(+), 52 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 2459b87b99d..db7b98e9e65 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -6,6 +6,7 @@ use crate::core::global_cache_tracker; use crate::core::{Dependency, Package, PackageId}; use crate::sources::IndexSummary; use crate::sources::RecursivePathSource; +use crate::sources::git::utils::GitDatabase; use crate::sources::git::utils::GitRemote; use crate::sources::git::utils::rev_to_oid; use crate::sources::source::MaybePackage; @@ -155,6 +156,67 @@ impl<'gctx> GitSource<'gctx> { }); Ok(()) } + + /// Fetch and return a [`GitDatabase`] with the resolved revision + /// for this source, + /// + /// This won't fetch anything if the required revision is + /// already available locally. + pub(crate) fn fetch_db(&self) -> CargoResult<(GitDatabase, git2::Oid)> { + let db_path = self.gctx.git_db_path().join(&self.ident); + let db_path = db_path.into_path_unlocked(); + + let db = self.remote.db_at(&db_path).ok(); + + let (db, actual_rev) = match (&self.locked_rev, db) { + // If we have a locked revision, and we have a preexisting database + // which has that revision, then no update needs to happen. + (Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid), + + // If we're in offline mode, we're not locked, and we have a + // database, then try to resolve our reference with the preexisting + // repository. + (Revision::Deferred(git_ref), Some(db)) if !self.gctx.network_allowed() => { + let offline_flag = self + .gctx + .offline_flag() + .expect("always present when `!network_allowed`"); + let rev = db.resolve(&git_ref).with_context(|| { + format!( + "failed to lookup reference in preexisting repository, and \ + can't check for updates in offline mode ({offline_flag})" + ) + })?; + (db, rev) + } + + // ... otherwise we use this state to update the git database. Note + // that we still check for being offline here, for example in the + // situation that we have a locked revision but the database + // doesn't have it. + (locked_rev, db) => { + if let Some(offline_flag) = self.gctx.offline_flag() { + anyhow::bail!( + "can't checkout from '{}': you are in the offline mode ({offline_flag})", + self.remote.url() + ); + } + + if !self.quiet { + self.gctx.shell().status( + "Updating", + format!("git repository `{}`", self.remote.url()), + )?; + } + + trace!("updating git source `{:?}`", self.remote); + + let locked_rev = locked_rev.clone().into(); + self.remote.checkout(&db_path, db, &locked_rev, self.gctx)? + } + }; + Ok((db, actual_rev)) + } } /// Indicates a [Git revision] that might be locked or deferred to be resolved. @@ -286,58 +348,7 @@ impl<'gctx> Source for GitSource<'gctx> { // exists. exclude_from_backups_and_indexing(&git_path); - let db_path = self.gctx.git_db_path().join(&self.ident); - let db_path = db_path.into_path_unlocked(); - - let db = self.remote.db_at(&db_path).ok(); - - let (db, actual_rev) = match (&self.locked_rev, db) { - // If we have a locked revision, and we have a preexisting database - // which has that revision, then no update needs to happen. - (Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid), - - // If we're in offline mode, we're not locked, and we have a - // database, then try to resolve our reference with the preexisting - // repository. - (Revision::Deferred(git_ref), Some(db)) if !self.gctx.network_allowed() => { - let offline_flag = self - .gctx - .offline_flag() - .expect("always present when `!network_allowed`"); - let rev = db.resolve(&git_ref).with_context(|| { - format!( - "failed to lookup reference in preexisting repository, and \ - can't check for updates in offline mode ({offline_flag})" - ) - })?; - (db, rev) - } - - // ... otherwise we use this state to update the git database. Note - // that we still check for being offline here, for example in the - // situation that we have a locked revision but the database - // doesn't have it. - (locked_rev, db) => { - if let Some(offline_flag) = self.gctx.offline_flag() { - anyhow::bail!( - "can't checkout from '{}': you are in the offline mode ({offline_flag})", - self.remote.url() - ); - } - - if !self.quiet { - self.gctx.shell().status( - "Updating", - format!("git repository `{}`", self.remote.url()), - )?; - } - - trace!("updating git source `{:?}`", self.remote); - - let locked_rev = locked_rev.clone().into(); - self.remote.checkout(&db_path, db, &locked_rev, self.gctx)? - } - }; + let (db, actual_rev) = self.fetch_db()?; // Don’t use the full hash, in order to contribute less to reaching the // path length limit on Windows. See From 9e9d3db952da2c077551aaa0986966381fa3056f Mon Sep 17 00:00:00 2001 From: avnyu Date: Sun, 14 Dec 2025 20:09:03 +0700 Subject: [PATCH 2/4] test: cache git submodule tests --- tests/testsuite/git.rs | 105 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 502a709d530..dd980406ac0 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -919,6 +919,12 @@ fn dep_with_submodule() { "#]]) .run(); + + let db_paths = glob::glob(paths::cargo_home().join("git/db/dep2-*").to_str().unwrap()) + .unwrap() + .map(Result::unwrap) + .collect::>(); + assert_eq!(db_paths.len(), 0, "submodule db created once"); } #[cargo_test] @@ -989,6 +995,17 @@ fn dep_with_relative_submodule() { "#]]) .run(); + + let db_paths = glob::glob( + paths::cargo_home() + .join("git/db/deployment-*") + .to_str() + .unwrap(), + ) + .unwrap() + .map(Result::unwrap) + .collect::>(); + assert_eq!(db_paths.len(), 0, "submodule db created once"); } #[cargo_test] @@ -1518,6 +1535,12 @@ project2 "#]]) .run(); + let db_paths = glob::glob(paths::cargo_home().join("git/db/dep2-*").to_str().unwrap()) + .unwrap() + .map(Result::unwrap) + .collect::>(); + assert_eq!(db_paths.len(), 0, "submodule db created once"); + git_project.change_file( ".gitmodules", &format!( @@ -1560,6 +1583,12 @@ project2 "#]]) .run(); + let db_paths = glob::glob(paths::cargo_home().join("git/db/dep3-*").to_str().unwrap()) + .unwrap() + .map(Result::unwrap) + .collect::>(); + assert_eq!(db_paths.len(), 0, "submodule db created once"); + println!("last run"); p.cargo("run") .with_stderr_data(str![[r#" @@ -4269,3 +4298,79 @@ src/main.rs "#]]) .run(); } + +#[cargo_test] +fn dep_with_cached_submodule() { + let project = project(); + let git_project = git::new("dep1", |project| { + project.file("Cargo.toml", &basic_manifest("dep1", "0.5.0")) + }); + let git_project2 = git::new("dep2", |project| { + project.file("Cargo.toml", &basic_manifest("dep2", "0.5.0")) + }); + let git_project3 = git::new("dep3", |project| project.file("lib.rs", "pub fn dep() {}")); + + let url = git_project3.root().to_url().to_string(); + + let repo = git2::Repository::open(&git_project.root()).unwrap(); + git::add_submodule(&repo, &url, Path::new("src")); + git::commit(&repo); + + let repo2 = git2::Repository::open(&git_project2.root()).unwrap(); + git::add_submodule(&repo2, &url, Path::new("src")); + git::commit(&repo2); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [package] + + name = "foo" + version = "0.5.0" + edition = "2015" + authors = ["wycats@example.com"] + + [dependencies.dep1] + git = '{}' + + [dependencies.dep2] + git = '{}' + + "#, + git_project.url(), + git_project2.url(), + ), + ) + .file( + "src/lib.rs", + r#" + extern crate dep1; pub fn foo() { dep1::dep() } + extern crate dep2; pub fn bar() { dep2::dep() } + "#, + ) + .build(); + + project + .cargo("check") + .with_stderr_data(str![[r#" +[UPDATING] git repository `[ROOTURL]/dep1` +[UPDATING] git submodule `[ROOTURL]/dep3` +[UPDATING] git repository `[ROOTURL]/dep2` +[UPDATING] git submodule `[ROOTURL]/dep3` +[LOCKING] 2 packages to latest compatible versions +[CHECKING] dep[..] v0.5.0 ([ROOTURL]/dep[..]#[..]) +[CHECKING] dep[..] v0.5.0 ([ROOTURL]/dep[..]#[..]) +[CHECKING] foo v0.5.0 ([ROOT]/foo) +[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s + +"#]]) + .run(); + + let db_paths = glob::glob(paths::cargo_home().join("git/db/dep3-*").to_str().unwrap()) + .unwrap() + .map(Result::unwrap) + .collect::>(); + assert_eq!(db_paths.len(), 0, "submodule db created once"); +} From 4f02e479629287bf3d53132d02e45235bd83e93d Mon Sep 17 00:00:00 2001 From: avnyu Date: Sun, 14 Dec 2025 20:11:52 +0700 Subject: [PATCH 3/4] feat: recursive cache git submodules fetch each submodules using git database then checkout --- src/cargo/sources/git/utils.rs | 32 +++++++++++++++++--------------- tests/testsuite/git.rs | 15 +++++++++------ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index fa5438a9909..cd69d434075 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -1,10 +1,12 @@ //! Utilities for handling git repositories, mainly around //! authentication/cloning. -use crate::core::{GitReference, Verbosity}; +use crate::core::{GitReference, SourceId, Verbosity}; use crate::sources::git::fetch::RemoteKind; use crate::sources::git::oxide; use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides; +use crate::sources::git::source::GitSource; +use crate::sources::source::Source as _; use crate::util::HumanBytes; use crate::util::errors::{CargoResult, GitCliError}; use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network}; @@ -447,7 +449,7 @@ impl<'a> GitCheckout<'a> { let target = repo.head()?.target(); Ok((target, repo)) }); - let mut repo = match head_and_repo { + let repo = match head_and_repo { Ok((head, repo)) => { if child.head_id() == head { return update_submodules(&repo, gctx, &child_remote_url); @@ -460,25 +462,25 @@ impl<'a> GitCheckout<'a> { init(&path, false)? } }; - // Fetch data from origin and reset to the head commit + // Fetch submodule database and checkout to target revision let reference = GitReference::Rev(head.to_string()); gctx.shell() .status("Updating", format!("git submodule `{child_remote_url}`"))?; - fetch( - &mut repo, - &child_remote_url, - &reference, - gctx, - RemoteKind::GitDependency, - ) - .with_context(|| { + + // GitSource created from SourceId without git precise will result to + // locked_rev being Deferred and fetch_db always try to fetch if online + let source_id = SourceId::for_git(&child_remote_url.into_url()?, reference)? + .with_git_precise(Some(head.to_string())); + + let mut source = GitSource::new(source_id, gctx)?; + source.set_quiet(true); + + let (db, actual_rev) = source.fetch_db().with_context(|| { let name = child.name().unwrap_or(""); format!("failed to fetch submodule `{name}` from {child_remote_url}",) })?; - - let obj = repo.find_object(head, None)?; - reset(&repo, &obj, gctx)?; - update_submodules(&repo, gctx, &child_remote_url) + db.copy_to(actual_rev, repo.path(), gctx)?; + Ok(()) } } } diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index dd980406ac0..4ef2e44c078 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -924,7 +924,7 @@ fn dep_with_submodule() { .unwrap() .map(Result::unwrap) .collect::>(); - assert_eq!(db_paths.len(), 0, "submodule db created once"); + assert_eq!(db_paths.len(), 1, "submodule db created once"); } #[cargo_test] @@ -1005,7 +1005,7 @@ fn dep_with_relative_submodule() { .unwrap() .map(Result::unwrap) .collect::>(); - assert_eq!(db_paths.len(), 0, "submodule db created once"); + assert_eq!(db_paths.len(), 1, "submodule db created once"); } #[cargo_test] @@ -1077,7 +1077,10 @@ Caused by: failed to update submodule `src` Caused by: - object not found - no match for id ([..]); class=Odb (9); code=NotFound (-3) + failed to fetch submodule `src` from [ROOTURL]/dep2 + +Caused by: + revspec '[..]' not found; class=Reference (4); code=NotFound (-3) "#]]; @@ -1539,7 +1542,7 @@ project2 .unwrap() .map(Result::unwrap) .collect::>(); - assert_eq!(db_paths.len(), 0, "submodule db created once"); + assert_eq!(db_paths.len(), 1, "submodule db created once"); git_project.change_file( ".gitmodules", @@ -1587,7 +1590,7 @@ project2 .unwrap() .map(Result::unwrap) .collect::>(); - assert_eq!(db_paths.len(), 0, "submodule db created once"); + assert_eq!(db_paths.len(), 1, "submodule db created once"); println!("last run"); p.cargo("run") @@ -4372,5 +4375,5 @@ fn dep_with_cached_submodule() { .unwrap() .map(Result::unwrap) .collect::>(); - assert_eq!(db_paths.len(), 0, "submodule db created once"); + assert_eq!(db_paths.len(), 1, "submodule db created once"); } From cf022a5752379e517d666ecc491298752b6190a0 Mon Sep 17 00:00:00 2001 From: avnyu Date: Sun, 14 Dec 2025 20:31:57 +0700 Subject: [PATCH 4/4] fix: remove extra 'update git submodule' when the submodule is cached --- src/cargo/sources/git/source.rs | 18 +++++++++++------- src/cargo/sources/git/utils.rs | 33 ++++++++++++++++++--------------- tests/testsuite/git.rs | 1 - 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index db7b98e9e65..d6dbe03e4c0 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -162,7 +162,7 @@ impl<'gctx> GitSource<'gctx> { /// /// This won't fetch anything if the required revision is /// already available locally. - pub(crate) fn fetch_db(&self) -> CargoResult<(GitDatabase, git2::Oid)> { + pub(crate) fn fetch_db(&self, is_submodule: bool) -> CargoResult<(GitDatabase, git2::Oid)> { let db_path = self.gctx.git_db_path().join(&self.ident); let db_path = db_path.into_path_unlocked(); @@ -203,10 +203,14 @@ impl<'gctx> GitSource<'gctx> { } if !self.quiet { - self.gctx.shell().status( - "Updating", - format!("git repository `{}`", self.remote.url()), - )?; + let scope = if is_submodule { + "submodule" + } else { + "repository" + }; + self.gctx + .shell() + .status("Updating", format!("git {scope} `{}`", self.remote.url()))?; } trace!("updating git source `{:?}`", self.remote); @@ -348,7 +352,7 @@ impl<'gctx> Source for GitSource<'gctx> { // exists. exclude_from_backups_and_indexing(&git_path); - let (db, actual_rev) = self.fetch_db()?; + let (db, actual_rev) = self.fetch_db(false)?; // Don’t use the full hash, in order to contribute less to reaching the // path length limit on Windows. See @@ -364,7 +368,7 @@ impl<'gctx> Source for GitSource<'gctx> { .join(&self.ident) .join(short_id.as_str()); let checkout_path = checkout_path.into_path_unlocked(); - db.copy_to(actual_rev, &checkout_path, self.gctx)?; + db.copy_to(actual_rev, &checkout_path, self.gctx, self.quiet)?; let source_id = self .source_id diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index cd69d434075..74bd59555b0 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -171,6 +171,7 @@ impl GitDatabase { rev: git2::Oid, dest: &Path, gctx: &GlobalContext, + quiet: bool, ) -> CargoResult> { // If the existing checkout exists, and it is fresh, use it. // A non-fresh checkout can happen if the checkout operation was @@ -184,7 +185,7 @@ impl GitDatabase { Some(co) => co, None => { let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?; - checkout.update_submodules(gctx)?; + checkout.update_submodules(gctx, quiet)?; guard.mark_ok()?; checkout } @@ -386,24 +387,27 @@ impl<'a> GitCheckout<'a> { /// Submodules set to `none` won't be fetched. /// /// [^1]: - fn update_submodules(&self, gctx: &GlobalContext) -> CargoResult<()> { - return update_submodules(&self.repo, gctx, self.remote_url().as_str()); + fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> { + return update_submodules(&self.repo, gctx, quiet, self.remote_url().as_str()); /// Recursive helper for [`GitCheckout::update_submodules`]. fn update_submodules( repo: &git2::Repository, gctx: &GlobalContext, + quiet: bool, parent_remote_url: &str, ) -> CargoResult<()> { debug!("update submodules for: {:?}", repo.workdir().unwrap()); for mut child in repo.submodules()? { - update_submodule(repo, &mut child, gctx, parent_remote_url).with_context(|| { - format!( - "failed to update submodule `{}`", - child.name().unwrap_or("") - ) - })?; + update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context( + || { + format!( + "failed to update submodule `{}`", + child.name().unwrap_or("") + ) + }, + )?; } Ok(()) } @@ -413,6 +417,7 @@ impl<'a> GitCheckout<'a> { parent: &git2::Repository, child: &mut git2::Submodule<'_>, gctx: &GlobalContext, + quiet: bool, parent_remote_url: &str, ) -> CargoResult<()> { child.init(false)?; @@ -452,7 +457,7 @@ impl<'a> GitCheckout<'a> { let repo = match head_and_repo { Ok((head, repo)) => { if child.head_id() == head { - return update_submodules(&repo, gctx, &child_remote_url); + return update_submodules(&repo, gctx, quiet, &child_remote_url); } repo } @@ -464,8 +469,6 @@ impl<'a> GitCheckout<'a> { }; // Fetch submodule database and checkout to target revision let reference = GitReference::Rev(head.to_string()); - gctx.shell() - .status("Updating", format!("git submodule `{child_remote_url}`"))?; // GitSource created from SourceId without git precise will result to // locked_rev being Deferred and fetch_db always try to fetch if online @@ -473,13 +476,13 @@ impl<'a> GitCheckout<'a> { .with_git_precise(Some(head.to_string())); let mut source = GitSource::new(source_id, gctx)?; - source.set_quiet(true); + source.set_quiet(quiet); - let (db, actual_rev) = source.fetch_db().with_context(|| { + let (db, actual_rev) = source.fetch_db(true).with_context(|| { let name = child.name().unwrap_or(""); format!("failed to fetch submodule `{name}` from {child_remote_url}",) })?; - db.copy_to(actual_rev, repo.path(), gctx)?; + db.copy_to(actual_rev, repo.path(), gctx, quiet)?; Ok(()) } } diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 4ef2e44c078..a32ffacdff1 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -4361,7 +4361,6 @@ fn dep_with_cached_submodule() { [UPDATING] git repository `[ROOTURL]/dep1` [UPDATING] git submodule `[ROOTURL]/dep3` [UPDATING] git repository `[ROOTURL]/dep2` -[UPDATING] git submodule `[ROOTURL]/dep3` [LOCKING] 2 packages to latest compatible versions [CHECKING] dep[..] v0.5.0 ([ROOTURL]/dep[..]#[..]) [CHECKING] dep[..] v0.5.0 ([ROOTURL]/dep[..]#[..])