Skip to content

Commit 0101bde

Browse files
authored
Cache submodule into git db (#16246)
### What does this PR try to resolve? My attempt to continue #10279. This: - Moves the code for creating git db into `GitSource::fetch_db` - Creates GitSource for each submodules - Replaces fetch inside update_submodule by `GitSource::fetch_db` and `db.copy_to` - Removes recursive update_submodules calls cos `db.copy_to` already recursive. Fixes #7987. ### How to test and review this PR? I tested using the original pull method: ``` ~/.cargo/target/debug/cargo update -p boring --precise 46787b7b6909cadf81cf3a8cd9dc351c9efdfdbd ~/.cargo/target/debug/cargo update -p boring --precise c037a438f8d7b91533524570237afcfeffffe496 ``` and confirmed that the time to do the second update is negligible. Also test if it can fetch submodule offline using the downloaded git db ``` git clone https://github.com/pop-os/cosmic-files && cd cosmic-files ~/.cargo/target/debug/cargo fetch --locked --target=$(rustc --print host-tuple) rm -rf ~/.cargo/git/checkout ~/.cargo/target/debug/cargo fetch --locked --target=$(rustc --print host-tuple) --offline ```
2 parents e91b2ba + cf022a5 commit 0101bde

File tree

3 files changed

+208
-81
lines changed

3 files changed

+208
-81
lines changed

src/cargo/sources/git/source.rs

Lines changed: 68 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::core::global_cache_tracker;
66
use crate::core::{Dependency, Package, PackageId};
77
use crate::sources::IndexSummary;
88
use crate::sources::RecursivePathSource;
9+
use crate::sources::git::utils::GitDatabase;
910
use crate::sources::git::utils::GitRemote;
1011
use crate::sources::git::utils::rev_to_oid;
1112
use crate::sources::source::MaybePackage;
@@ -155,6 +156,71 @@ impl<'gctx> GitSource<'gctx> {
155156
});
156157
Ok(())
157158
}
159+
160+
/// Fetch and return a [`GitDatabase`] with the resolved revision
161+
/// for this source,
162+
///
163+
/// This won't fetch anything if the required revision is
164+
/// already available locally.
165+
pub(crate) fn fetch_db(&self, is_submodule: bool) -> CargoResult<(GitDatabase, git2::Oid)> {
166+
let db_path = self.gctx.git_db_path().join(&self.ident);
167+
let db_path = db_path.into_path_unlocked();
168+
169+
let db = self.remote.db_at(&db_path).ok();
170+
171+
let (db, actual_rev) = match (&self.locked_rev, db) {
172+
// If we have a locked revision, and we have a preexisting database
173+
// which has that revision, then no update needs to happen.
174+
(Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid),
175+
176+
// If we're in offline mode, we're not locked, and we have a
177+
// database, then try to resolve our reference with the preexisting
178+
// repository.
179+
(Revision::Deferred(git_ref), Some(db)) if !self.gctx.network_allowed() => {
180+
let offline_flag = self
181+
.gctx
182+
.offline_flag()
183+
.expect("always present when `!network_allowed`");
184+
let rev = db.resolve(&git_ref).with_context(|| {
185+
format!(
186+
"failed to lookup reference in preexisting repository, and \
187+
can't check for updates in offline mode ({offline_flag})"
188+
)
189+
})?;
190+
(db, rev)
191+
}
192+
193+
// ... otherwise we use this state to update the git database. Note
194+
// that we still check for being offline here, for example in the
195+
// situation that we have a locked revision but the database
196+
// doesn't have it.
197+
(locked_rev, db) => {
198+
if let Some(offline_flag) = self.gctx.offline_flag() {
199+
anyhow::bail!(
200+
"can't checkout from '{}': you are in the offline mode ({offline_flag})",
201+
self.remote.url()
202+
);
203+
}
204+
205+
if !self.quiet {
206+
let scope = if is_submodule {
207+
"submodule"
208+
} else {
209+
"repository"
210+
};
211+
self.gctx
212+
.shell()
213+
.status("Updating", format!("git {scope} `{}`", self.remote.url()))?;
214+
}
215+
216+
trace!("updating git source `{:?}`", self.remote);
217+
218+
let locked_rev = locked_rev.clone().into();
219+
self.remote.checkout(&db_path, db, &locked_rev, self.gctx)?
220+
}
221+
};
222+
Ok((db, actual_rev))
223+
}
158224
}
159225

160226
/// Indicates a [Git revision] that might be locked or deferred to be resolved.
@@ -286,58 +352,7 @@ impl<'gctx> Source for GitSource<'gctx> {
286352
// exists.
287353
exclude_from_backups_and_indexing(&git_path);
288354

289-
let db_path = self.gctx.git_db_path().join(&self.ident);
290-
let db_path = db_path.into_path_unlocked();
291-
292-
let db = self.remote.db_at(&db_path).ok();
293-
294-
let (db, actual_rev) = match (&self.locked_rev, db) {
295-
// If we have a locked revision, and we have a preexisting database
296-
// which has that revision, then no update needs to happen.
297-
(Revision::Locked(oid), Some(db)) if db.contains(*oid) => (db, *oid),
298-
299-
// If we're in offline mode, we're not locked, and we have a
300-
// database, then try to resolve our reference with the preexisting
301-
// repository.
302-
(Revision::Deferred(git_ref), Some(db)) if !self.gctx.network_allowed() => {
303-
let offline_flag = self
304-
.gctx
305-
.offline_flag()
306-
.expect("always present when `!network_allowed`");
307-
let rev = db.resolve(&git_ref).with_context(|| {
308-
format!(
309-
"failed to lookup reference in preexisting repository, and \
310-
can't check for updates in offline mode ({offline_flag})"
311-
)
312-
})?;
313-
(db, rev)
314-
}
315-
316-
// ... otherwise we use this state to update the git database. Note
317-
// that we still check for being offline here, for example in the
318-
// situation that we have a locked revision but the database
319-
// doesn't have it.
320-
(locked_rev, db) => {
321-
if let Some(offline_flag) = self.gctx.offline_flag() {
322-
anyhow::bail!(
323-
"can't checkout from '{}': you are in the offline mode ({offline_flag})",
324-
self.remote.url()
325-
);
326-
}
327-
328-
if !self.quiet {
329-
self.gctx.shell().status(
330-
"Updating",
331-
format!("git repository `{}`", self.remote.url()),
332-
)?;
333-
}
334-
335-
trace!("updating git source `{:?}`", self.remote);
336-
337-
let locked_rev = locked_rev.clone().into();
338-
self.remote.checkout(&db_path, db, &locked_rev, self.gctx)?
339-
}
340-
};
355+
let (db, actual_rev) = self.fetch_db(false)?;
341356

342357
// Don’t use the full hash, in order to contribute less to reaching the
343358
// path length limit on Windows. See
@@ -353,7 +368,7 @@ impl<'gctx> Source for GitSource<'gctx> {
353368
.join(&self.ident)
354369
.join(short_id.as_str());
355370
let checkout_path = checkout_path.into_path_unlocked();
356-
db.copy_to(actual_rev, &checkout_path, self.gctx)?;
371+
db.copy_to(actual_rev, &checkout_path, self.gctx, self.quiet)?;
357372

358373
let source_id = self
359374
.source_id

src/cargo/sources/git/utils.rs

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
//! Utilities for handling git repositories, mainly around
22
//! authentication/cloning.
33
4-
use crate::core::{GitReference, Verbosity};
4+
use crate::core::{GitReference, SourceId, Verbosity};
55
use crate::sources::git::fetch::RemoteKind;
66
use crate::sources::git::oxide;
77
use crate::sources::git::oxide::cargo_config_to_gitoxide_overrides;
8+
use crate::sources::git::source::GitSource;
9+
use crate::sources::source::Source as _;
810
use crate::util::HumanBytes;
911
use crate::util::errors::{CargoResult, GitCliError};
1012
use crate::util::{GlobalContext, IntoUrl, MetricsCounter, Progress, network};
@@ -169,6 +171,7 @@ impl GitDatabase {
169171
rev: git2::Oid,
170172
dest: &Path,
171173
gctx: &GlobalContext,
174+
quiet: bool,
172175
) -> CargoResult<GitCheckout<'_>> {
173176
// If the existing checkout exists, and it is fresh, use it.
174177
// A non-fresh checkout can happen if the checkout operation was
@@ -182,7 +185,7 @@ impl GitDatabase {
182185
Some(co) => co,
183186
None => {
184187
let (checkout, guard) = GitCheckout::clone_into(dest, self, rev, gctx)?;
185-
checkout.update_submodules(gctx)?;
188+
checkout.update_submodules(gctx, quiet)?;
186189
guard.mark_ok()?;
187190
checkout
188191
}
@@ -384,24 +387,27 @@ impl<'a> GitCheckout<'a> {
384387
/// Submodules set to `none` won't be fetched.
385388
///
386389
/// [^1]: <https://git-scm.com/docs/git-submodule#Documentation/git-submodule.txt-none>
387-
fn update_submodules(&self, gctx: &GlobalContext) -> CargoResult<()> {
388-
return update_submodules(&self.repo, gctx, self.remote_url().as_str());
390+
fn update_submodules(&self, gctx: &GlobalContext, quiet: bool) -> CargoResult<()> {
391+
return update_submodules(&self.repo, gctx, quiet, self.remote_url().as_str());
389392

390393
/// Recursive helper for [`GitCheckout::update_submodules`].
391394
fn update_submodules(
392395
repo: &git2::Repository,
393396
gctx: &GlobalContext,
397+
quiet: bool,
394398
parent_remote_url: &str,
395399
) -> CargoResult<()> {
396400
debug!("update submodules for: {:?}", repo.workdir().unwrap());
397401

398402
for mut child in repo.submodules()? {
399-
update_submodule(repo, &mut child, gctx, parent_remote_url).with_context(|| {
400-
format!(
401-
"failed to update submodule `{}`",
402-
child.name().unwrap_or("")
403-
)
404-
})?;
403+
update_submodule(repo, &mut child, gctx, quiet, parent_remote_url).with_context(
404+
|| {
405+
format!(
406+
"failed to update submodule `{}`",
407+
child.name().unwrap_or("")
408+
)
409+
},
410+
)?;
405411
}
406412
Ok(())
407413
}
@@ -411,6 +417,7 @@ impl<'a> GitCheckout<'a> {
411417
parent: &git2::Repository,
412418
child: &mut git2::Submodule<'_>,
413419
gctx: &GlobalContext,
420+
quiet: bool,
414421
parent_remote_url: &str,
415422
) -> CargoResult<()> {
416423
child.init(false)?;
@@ -447,10 +454,10 @@ impl<'a> GitCheckout<'a> {
447454
let target = repo.head()?.target();
448455
Ok((target, repo))
449456
});
450-
let mut repo = match head_and_repo {
457+
let repo = match head_and_repo {
451458
Ok((head, repo)) => {
452459
if child.head_id() == head {
453-
return update_submodules(&repo, gctx, &child_remote_url);
460+
return update_submodules(&repo, gctx, quiet, &child_remote_url);
454461
}
455462
repo
456463
}
@@ -460,25 +467,23 @@ impl<'a> GitCheckout<'a> {
460467
init(&path, false)?
461468
}
462469
};
463-
// Fetch data from origin and reset to the head commit
470+
// Fetch submodule database and checkout to target revision
464471
let reference = GitReference::Rev(head.to_string());
465-
gctx.shell()
466-
.status("Updating", format!("git submodule `{child_remote_url}`"))?;
467-
fetch(
468-
&mut repo,
469-
&child_remote_url,
470-
&reference,
471-
gctx,
472-
RemoteKind::GitDependency,
473-
)
474-
.with_context(|| {
472+
473+
// GitSource created from SourceId without git precise will result to
474+
// locked_rev being Deferred and fetch_db always try to fetch if online
475+
let source_id = SourceId::for_git(&child_remote_url.into_url()?, reference)?
476+
.with_git_precise(Some(head.to_string()));
477+
478+
let mut source = GitSource::new(source_id, gctx)?;
479+
source.set_quiet(quiet);
480+
481+
let (db, actual_rev) = source.fetch_db(true).with_context(|| {
475482
let name = child.name().unwrap_or("");
476483
format!("failed to fetch submodule `{name}` from {child_remote_url}",)
477484
})?;
478-
479-
let obj = repo.find_object(head, None)?;
480-
reset(&repo, &obj, gctx)?;
481-
update_submodules(&repo, gctx, &child_remote_url)
485+
db.copy_to(actual_rev, repo.path(), gctx, quiet)?;
486+
Ok(())
482487
}
483488
}
484489
}

0 commit comments

Comments
 (0)