Skip to content

Commit 727c825

Browse files
author
Thomas Desveaux
committed
base_commands: improve download_fileset._find_matching_artifact for git artifacts
1 parent 99a29a4 commit 727c825

File tree

3 files changed

+285
-54
lines changed

3 files changed

+285
-54
lines changed

nimp/artifacts.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
from __future__ import annotations
2626

27-
import copy
2827
import datetime
2928
import hashlib
3029
import json
@@ -66,15 +65,18 @@
6665

6766
class Artifact(TypedDict):
6867
revision: str
69-
sortable_revision: str
7068
uri: str
7169

7270

7371
def _is_http_url(string: str) -> bool:
7472
return re.match(r'^http[s]?:\/\/.*$', string) is not None
7573

7674

77-
def list_artifacts(artifact_pattern: str, format_arguments: Mapping[str, Any], api_context) -> list[Artifact]:
75+
def list_artifacts(
76+
artifact_pattern: str,
77+
format_arguments: Mapping[str, Any],
78+
api_context_: nimp.utils.git.GitApiContext | None,
79+
) -> list[Artifact]:
7880
'''List all artifacts and their revision using the provided pattern after formatting'''
7981

8082
artifact_pattern = artifact_pattern.format_map(
@@ -103,17 +105,12 @@ def list_artifacts(artifact_pattern: str, format_arguments: Mapping[str, Any], a
103105
continue
104106

105107
group_revision = artifact_match.group('revision')
106-
sortable_revision = copy.deepcopy(group_revision)
107-
if api_context:
108-
sortable_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, group_revision)
109-
if sortable_revision is not None:
110-
all_artifacts.append(
111-
{
112-
'revision': group_revision,
113-
'sortable_revision': sortable_revision,
114-
'uri': file_uri,
115-
}
116-
)
108+
all_artifacts.append(
109+
{
110+
'revision': group_revision,
111+
'uri': file_uri,
112+
}
113+
)
117114
return all_artifacts
118115

119116

nimp/base_commands/download_fileset.py

Lines changed: 194 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,27 @@
2222

2323
'''Downloads a previously uploaded fileset to the local workspace'''
2424

25+
from __future__ import annotations
26+
2527
import copy
2628
import logging
2729
import os
2830
import shutil
31+
import subprocess
32+
import tempfile
33+
from pathlib import Path
2934
from pathlib import PurePosixPath
35+
from typing import TYPE_CHECKING
36+
from typing import Iterator
3037

3138
import nimp.artifacts
3239
import nimp.command
3340
import nimp.system
41+
from nimp.environment import Environment
42+
from nimp.utils import git
43+
44+
if TYPE_CHECKING:
45+
from giteapy.models.repository import Repository
3446

3547

3648
class DownloadFileset(nimp.command.Command):
@@ -59,31 +71,35 @@ def configure_arguments(self, env, parser):
5971
def is_available(self, env):
6072
return True, ''
6173

62-
def run(self, env):
63-
api_context = nimp.utils.git.initialize_gitea_api_context(env)
74+
def run(self, env: Environment) -> bool:
75+
api_context = git.initialize_gitea_api_context(env)
6476

65-
artifacts_source = env.artifact_repository_source
77+
artifacts_source: str = env.artifact_repository_source
6678
if env.prefer_http:
6779
artifacts_http_source = getattr(env, 'artifact_http_repository_source', None)
6880
if artifacts_http_source:
6981
artifacts_source = artifacts_http_source
7082
else:
7183
logging.warning('prefer-http provided but no artifact_http_repository_source in configuration')
7284

73-
artifact_uri_pattern = artifacts_source.rstrip('/') + '/' + env.artifact_collection[env.fileset]
85+
artifact_uri_pattern: str = artifacts_source.rstrip('/') + '/' + str(env.artifact_collection[env.fileset])
7486

7587
install_directory = env.root_dir
7688
if env.destination:
7789
install_directory = str(PurePosixPath(install_directory) / env.format(env.destination))
7890

7991
format_arguments = copy.deepcopy(vars(env))
80-
format_arguments['revision'] = '*'
81-
logging.info('Searching %s', artifact_uri_pattern.format(**format_arguments))
92+
logging.info('Searching %s', artifact_uri_pattern.format_map({**format_arguments, 'revision': '*'}))
8293
all_artifacts = nimp.system.try_execute(
83-
lambda: nimp.artifacts.list_artifacts(artifact_uri_pattern, format_arguments, api_context), OSError
94+
lambda: nimp.artifacts.list_artifacts(artifact_uri_pattern, format_arguments, api_context),
95+
OSError,
8496
)
8597
artifact_to_download = DownloadFileset._find_matching_artifact(
86-
all_artifacts, env.revision, env.min_revision, env.max_revision, api_context
98+
all_artifacts,
99+
env.revision,
100+
env.min_revision,
101+
env.max_revision,
102+
api_context,
87103
)
88104

89105
logging.info('Downloading %s%s', artifact_to_download['uri'], ' (simulation)' if env.dry_run else '')
@@ -123,39 +139,177 @@ def run(self, env):
123139

124140
return True
125141

126-
# TODO: Handle revision comparison when identified by a hash
127142
@staticmethod
128-
def _find_matching_artifact(all_artifacts, exact_revision, minimum_revision, maximum_revision, api_context):
129-
all_artifacts = sorted(all_artifacts, key=lambda artifact: int(artifact['sortable_revision'], 16), reverse=True)
130-
has_revision_input = exact_revision or minimum_revision or maximum_revision
131-
132-
if api_context:
133-
exact_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, exact_revision)
134-
minimum_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, minimum_revision)
135-
maximum_revision = nimp.utils.git.get_gitea_commit_timestamp(api_context, maximum_revision)
136-
revision_not_found = not exact_revision and not minimum_revision and not maximum_revision
137-
if has_revision_input and revision_not_found:
138-
raise ValueError('Searched commit not found on gitea repo')
139-
140-
if not api_context and (has_revision_input is not None and not has_revision_input.isdigit()):
141-
raise ValueError(
142-
'Revision seems to be a git commit hash but missing gitea api information. Please check project_branches in project configuration.'
143+
def _find_matching_artifact(
144+
all_artifacts: list[nimp.artifacts.Artifact],
145+
exact_revision: str | None,
146+
minimum_revision: str | None,
147+
maximum_revision: str | None,
148+
api_context: git.GitApiContext | None,
149+
) -> nimp.artifacts.Artifact:
150+
# fastpath for exact_revision
151+
if exact_revision is not None:
152+
if (artifact := next((a for a in all_artifacts if a['revision'] == exact_revision), None)) is not None:
153+
return artifact
154+
raise ValueError('Matching artifact not found')
155+
156+
# fastpath for maximum_revision
157+
if maximum_revision is not None:
158+
if (artifact := next((a for a in all_artifacts if a['revision'] == maximum_revision), None)) is not None:
159+
return artifact
160+
161+
if (
162+
any(git.maybe_git_revision(a['revision']) for a in all_artifacts)
163+
or (minimum_revision is not None and git.maybe_git_revision(minimum_revision))
164+
or (maximum_revision is not None and git.maybe_git_revision(maximum_revision))
165+
):
166+
logging.debug("might be looking at git revisions")
167+
if (
168+
newest_rev := DownloadFileset._get_git_newest_revision(
169+
revisions=[a['revision'] for a in all_artifacts],
170+
minimum_revision=minimum_revision,
171+
maximum_revision=maximum_revision,
172+
api_context=api_context,
173+
)
174+
) is not None:
175+
return next(a for a in all_artifacts if a['revision'] == newest_rev)
176+
177+
probably_p4_rev = all(a['revision'].isdigit() for a in all_artifacts)
178+
if probably_p4_rev:
179+
iter_: Iterator[int] = iter(int(a['revision']) for a in all_artifacts)
180+
if minimum_revision:
181+
minimum_revision_int = int(minimum_revision)
182+
iter_ = filter(lambda rev: rev >= minimum_revision_int, iter_)
183+
184+
if maximum_revision:
185+
maximum_revision_int = int(maximum_revision)
186+
iter_ = filter(lambda rev: rev <= maximum_revision_int, iter_)
187+
188+
if (revision := max(iter_, default=None)) is not None:
189+
revision_str = str(revision)
190+
return next(a for a in all_artifacts if a['revision'] == revision_str)
191+
192+
raise ValueError('Matching artifact not found')
193+
194+
@staticmethod
195+
def _get_git_newest_revision(
196+
revisions: list[str],
197+
minimum_revision: str | None,
198+
maximum_revision: str | None,
199+
api_context: git.GitApiContext | None,
200+
) -> str | None:
201+
remote: str | None = None
202+
if api_context is not None:
203+
repo: Repository = api_context['instance'].repo_get(
204+
owner=api_context['repo_owner'],
205+
repo=api_context['repo_name'],
143206
)
207+
remote = repo.clone_url
208+
logging.debug("Using remote %s from api_context", remote)
209+
210+
cwd_git_dir = git.get_git_dir()
211+
logging.debug("CWD git-dir: %s", cwd_git_dir)
144212

145-
try:
146-
if exact_revision is not None:
147-
return next(a for a in all_artifacts if a['sortable_revision'] == exact_revision)
148-
if minimum_revision is not None and maximum_revision is not None:
149-
return next(
150-
a
151-
for a in all_artifacts
152-
if int(a['sortable_revision']) >= int(minimum_revision)
153-
and int(a['sortable_revision']) <= int(maximum_revision)
213+
if remote is not None:
214+
with tempfile.TemporaryDirectory(prefix="nimp_git_") as tmp_git_dir:
215+
Path(tmp_git_dir).mkdir(parents=True, exist_ok=True)
216+
subprocess.check_call(['git', 'init', '--bare'], cwd=tmp_git_dir)
217+
218+
subprocess.check_call(['git', 'remote', 'add', 'origin', remote], cwd=tmp_git_dir)
219+
220+
# if current workdir contains a git repo, use it as alternate to prevent unnecessary burden on remote
221+
if cwd_git_dir is not None and git.is_shallow_repository(cwd_git_dir) is False:
222+
logging.debug("Add CWD git as bare repository alternate")
223+
git.add_alternates(cwd_git_dir, cwd=tmp_git_dir)
224+
225+
return DownloadFileset._find_git_newest_revision(
226+
tmp_git_dir,
227+
revisions=revisions,
228+
minimum_revision=minimum_revision,
229+
maximum_revision=maximum_revision,
154230
)
155-
if minimum_revision is not None:
156-
return next(a for a in all_artifacts if int(a['sortable_revision']) >= int(minimum_revision))
157-
if maximum_revision is not None:
158-
return next(a for a in all_artifacts if int(a['sortable_revision']) <= int(maximum_revision))
159-
return next(a for a in all_artifacts)
160-
except StopIteration:
161-
raise ValueError('Matching artifact not found')
231+
232+
elif cwd_git_dir is not None:
233+
# no remote, fallback to current git
234+
return DownloadFileset._find_git_newest_revision(
235+
cwd_git_dir,
236+
revisions=revisions,
237+
minimum_revision=minimum_revision,
238+
maximum_revision=maximum_revision,
239+
)
240+
241+
# no current git. Can't find revisions information
242+
return None
243+
244+
@staticmethod
245+
def _find_git_newest_revision(
246+
git_dir: str,
247+
revisions: list[str],
248+
minimum_revision: str | None,
249+
maximum_revision: str | None,
250+
) -> str | None:
251+
logging.debug("Find newest revisions in %s", git_dir)
252+
logging.debug("\trevisions: %s", revisions)
253+
254+
remotes = git.get_remotes(git_dir)
255+
logging.debug("Found remote %s in repository %s", remotes, git_dir)
256+
257+
to_fetch = [*revisions]
258+
if minimum_revision is not None:
259+
logging.debug("Filter newest revisions with minimum %s", minimum_revision)
260+
to_fetch.append(minimum_revision)
261+
if maximum_revision is not None:
262+
logging.debug("Filter newest revisions with maximum %s", maximum_revision)
263+
to_fetch.append(maximum_revision)
264+
265+
fetch_base_cmd = ['git', 'fetch', '--no-recurse-submodules', '--no-progress']
266+
for remote in remotes:
267+
logging.debug("Fetch revision from remote %s", remote)
268+
if subprocess.call([*fetch_base_cmd, remote, *to_fetch], cwd=git_dir) != 0:
269+
logging.debug("Failed to fetch revisions from %s", remote)
270+
# might have failed due to one (or more) unknown ref,
271+
# try one-by-one and ignore failures
272+
for rev in to_fetch:
273+
if subprocess.call([*fetch_base_cmd, remote, rev], cwd=git_dir) != 0:
274+
logging.debug("\tFailed to fetch revision %s", rev)
275+
276+
if minimum_revision is not None:
277+
minimum_revision = git.rev_parse_verify(minimum_revision, cwd=git_dir)
278+
logging.debug("Resolved minimum revision to %s", minimum_revision)
279+
if maximum_revision is not None:
280+
maximum_revision = git.rev_parse_verify(maximum_revision, cwd=git_dir)
281+
logging.debug("Resolved maximum revision to %s", maximum_revision)
282+
283+
rev_list_base_cmd = ['git', 'rev-list', '--ignore-missing', '--max-count=1', '--topo-order']
284+
285+
def _get_newest_between(rev_left: str, rev_right: str | None) -> str:
286+
if rev_right is None:
287+
return rev_left
288+
return subprocess.check_output([*rev_list_base_cmd, rev_left, rev_right], text=True).strip()
289+
290+
# keep track of both to return the potentially un-shortened one
291+
newest_revision: str | None = None
292+
newest_resolved_revision: str | None = None
293+
for revision in revisions:
294+
logging.debug("Look at revision %s", revision)
295+
# filter revisions by existing in repo and get the full rev if a short one was provided
296+
resolved_revision = git.rev_parse_verify(revision, cwd=git_dir)
297+
logging.debug("\tResolved to %s", resolved_revision)
298+
if resolved_revision is None:
299+
continue
300+
revision = resolved_revision
301+
302+
if _get_newest_between(resolved_revision, maximum_revision) == resolved_revision:
303+
logging.debug("\trevision %s is NEWER than maximum %s. Skip it.", revision, maximum_revision)
304+
continue
305+
306+
if _get_newest_between(resolved_revision, minimum_revision) == minimum_revision:
307+
logging.debug("\trevision %s is OLDER than minimum %s. Skip it.", revision, minimum_revision)
308+
continue
309+
310+
newest_resolved_revision = _get_newest_between(resolved_revision, newest_resolved_revision)
311+
if resolved_revision == newest_resolved_revision:
312+
newest_revision = revision
313+
logging.debug("newest revision is %s", newest_revision)
314+
315+
return newest_revision

0 commit comments

Comments
 (0)