Skip to content

Commit e5a1f2e

Browse files
Stebossolupton
andauthored
Triage tool deals with non-linear history (#1538)
If commits do not lie on the same linear git history. The tool would use `git fetch` because it couldn't find a direct path between the passing and failing commits. Here the changes introduce a new logic to find the correct bisection range: - apply `git merge-base` to identify common ancestors between the passing and failing commit - at build-and-run stage we're checking out the base commit from the main branch, and we apply cherry-pick for all new relevant features --------- Co-authored-by: Olli Lupton <[email protected]>
1 parent b3d3dc8 commit e5a1f2e

File tree

12 files changed

+1239
-583
lines changed

12 files changed

+1239
-583
lines changed

.github/triage/jax_toolbox_triage/args.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@ def parse_version_argument(s: str) -> typing.Dict[str, str]:
2626
return ret
2727

2828

29+
def parse_override_remotes(s: str) -> typing.Dict[str, str]:
30+
"""Function to parse the override remote
31+
32+
Inputs:
33+
s: (str) e.g. https://<token>@host/repo.git
34+
35+
Returns:
36+
ret: (typing.Dict[str,str]) Dictionary with software as key and git-url as value.
37+
"""
38+
ret: typing.Dict[str, str] = {}
39+
for part in s.split(","):
40+
sw, url = part.split(":", 1)
41+
assert sw not in ret, ret
42+
ret[sw] = url
43+
return ret
44+
45+
2946
def parse_args(args=None) -> argparse.Namespace:
3047
parser = argparse.ArgumentParser(
3148
description="""
@@ -208,6 +225,14 @@ def parse_args(args=None) -> argparse.Namespace:
208225
in question has different versions at the endpoints of the bisection range.
209226
""",
210227
)
228+
version_search_args.add_argument(
229+
"--override-remotes",
230+
type=parse_override_remotes,
231+
default={},
232+
help="""Remote URLs to be used for fetching, including auth token. E.g.:
233+
jax:https://<token>@host/repo.git,xla:https://<token>@host/repo.git
234+
""",
235+
)
211236
parser.add_argument(
212237
"-v",
213238
"--container-mount",
@@ -225,10 +250,18 @@ def parse_args(args=None) -> argparse.Namespace:
225250
help="Container runtime used, can be docker, pyxis, or local.",
226251
type=lambda s: s.lower(),
227252
)
228-
args = parser.parse_args(args=args)
229-
assert args.container_runtime in {"docker", "pyxis", "local"}, (
230-
args.container_runtime
253+
parser.add_argument(
254+
"--main-branch",
255+
type=str,
256+
default="main",
257+
help="The name of the main branch (e.g. main) to derive cherry-picks from",
231258
)
259+
args = parser.parse_args(args=args)
260+
assert args.container_runtime in {
261+
"docker",
262+
"pyxis",
263+
"local",
264+
}, args.container_runtime
232265
# --{passing,failing}-commits are deprecated aliases for --{passing,failing}-versions.
233266
for prefix in ["passing", "failing"]:
234267
commits = getattr(args, f"{prefix}_commits")
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import datetime
2+
import subprocess
3+
4+
5+
def get_commit_history(
6+
worker,
7+
package,
8+
start,
9+
end,
10+
dir,
11+
main_branch,
12+
logger=None,
13+
args=None,
14+
):
15+
"""
16+
Get the commit history for a given package between two commits.
17+
18+
Args:
19+
worker (Container): The container worker to execute commands.
20+
package (str): The name of the package.
21+
start (str): The starting commit hash.
22+
end (str): The ending commit hash.
23+
dir (str): The directory where the git repository is located.
24+
main_branch (str): The main branch name. Defaults is the default branch of the repo.
25+
logger (Logger, optional): Logger for debug information. Defaults to None.
26+
args: Additional arguments that may contain cherry-pick commits.
27+
28+
Returns:
29+
data: list, list of all the commits
30+
cherry_pick_range: str, range of cherry pick commits if any
31+
"""
32+
# In particular the end commit might not already be known if the older,
33+
# passing, container is being used for triage.
34+
commits_known = worker.exec(
35+
[
36+
"sh",
37+
"-c",
38+
f"git cat-file commit {start} && git cat-file commit {end}",
39+
],
40+
policy="once_per_container",
41+
workdir=dir,
42+
)
43+
if commits_known.returncode != 0:
44+
worker.check_exec(
45+
["git", "fetch", args.override_remotes.get(package, "origin"), start, end],
46+
workdir=dir,
47+
)
48+
49+
# detect non-linear history
50+
is_ancestor_result = worker.exec(
51+
["git", "merge-base", "--is-ancestor", start, end],
52+
workdir=dir,
53+
)
54+
is_linear = is_ancestor_result.returncode == 0
55+
cherry_pick_range = {}
56+
57+
if not is_linear:
58+
logger.info(f"Using non-linear history logic with branch {main_branch}")
59+
60+
# 1. find the linear range on the main branch
61+
passing_and_failing_cmd = worker.check_exec(
62+
[
63+
"sh",
64+
"-c",
65+
f"git merge-base {start} {end} && git merge-base {end} {main_branch}",
66+
],
67+
workdir=dir,
68+
).stdout.strip()
69+
passing_main_commit, failing_main_commit = passing_and_failing_cmd.splitlines()
70+
71+
# 2. find commits to cherry-pick from the failing branch
72+
# TODO: as an alternative approach we may need to consider `{passing_main_commit}..{start}`
73+
cherry_pick_range[package] = f"{failing_main_commit}..{end}"
74+
75+
# 3. now we can use the main branch commits for bisection
76+
start = passing_main_commit
77+
end = failing_main_commit
78+
79+
logger.info(f"cherry_pick_range: {cherry_pick_range}, start: {start}, end: {end}")
80+
81+
# now create the right git command to retrieve the history between start..end
82+
result = worker.check_exec(
83+
[
84+
"git",
85+
"log",
86+
"--first-parent",
87+
"--reverse",
88+
"--format=%H %cI",
89+
f"{start}^..{end}",
90+
],
91+
policy="once",
92+
stderr=subprocess.PIPE,
93+
workdir=dir,
94+
)
95+
96+
data = []
97+
for line in result.stdout.splitlines():
98+
commit, date = line.split()
99+
# for python < 3.11 we nee dto fix:
100+
if date.endswith("Z"):
101+
date = date[:-1] + "+00:00"
102+
date = datetime.datetime.fromisoformat(date).astimezone(datetime.timezone.utc)
103+
data.append((commit, date))
104+
105+
return data, cherry_pick_range
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import logging
2+
from .container import Container
3+
from .docker import DockerContainer
4+
from .pyxis import PyxisContainer
5+
from .local import LocalContainer
6+
7+
8+
def make_container(
9+
runtime: str, url: str, mounts: list, logger: logging.Logger, **kwargs
10+
) -> Container:
11+
"""
12+
This function creates a container object, based on the specified runtime
13+
14+
Args:
15+
runtime (str): The container runtime to use (e.g., 'docker', 'pyxis', 'local').
16+
url (str): The URL of the container.
17+
mounts (list): List of mounts to be used in the container.
18+
logger (logging.Logger): Logger instance for logging messages.
19+
**kwargs: Additional keyword arguments for specific container types.
20+
21+
Returns:
22+
Container: A container class associated with the specified runtime.
23+
"""
24+
if runtime == "local":
25+
return LocalContainer(logger=logger)
26+
27+
container_impl = DockerContainer if runtime == "docker" else PyxisContainer
28+
return container_impl(url, logger=logger, mounts=mounts, **kwargs)

0 commit comments

Comments
 (0)