Skip to content

script updates #6913

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/github-analytics-daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ jobs:
--remote origin \
--branch "${BRANCH:-release/2.8}" \
--analyze-missing-reverts-from-branch
--fork-cut-date 2024-06-24


- name: Show outstanding milestone issues
env:
Expand All @@ -79,4 +81,3 @@ jobs:
--remote origin \
--branch "${BRANCH:-release/2.8}" \
--milestone-id "${MILESTONE:-53}" \
--missing-in-branch
134 changes: 112 additions & 22 deletions tools/analytics/github_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def get_ghf_revert_revision(commit: GitCommit) -> Optional[str]:
rc is not None,
]
):
return rc.group(1)
return rc.group(1) if rc else None
return None


Expand Down Expand Up @@ -139,6 +139,7 @@ def parse_medium_format(lines: Union[str, List[str]]) -> GitCommit:
author_date=datetime.fromtimestamp(int(lines[2].split(":", 1)[1].strip())),
title=lines[4].strip(),
body="\n".join(lines[5:]),
pr_url="",
)


Expand Down Expand Up @@ -553,77 +554,156 @@ def extract_commit_hash_from_revert(text):
Returns:
str or None: The extracted commit hash, or None if not found
"""
# Pattern to match "This reverts commit <hash>."
pattern = r"This reverts commit ([0-9a-f]+)\."

match = re.search(pattern, text)
if match:
return match.group(1)
# Enhanced patterns to match various PyTorch revert formats
patterns = [
r"This reverts commit ([0-9a-f]{40})\.", # Full 40-char hash
r"This reverts commit ([0-9a-f]{7,40})\.", # Variable length hash
r"reverts commit ([0-9a-f]{7,40})", # Case insensitive
r"Revert.*commit ([0-9a-f]{7,40})", # Flexible revert format
r"Back out.*([0-9a-f]{7,40})", # Back out format
]

for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
return match.group(1)
return None


def analyze_reverts_missing_from_branch(repo: GitRepo, branch: str) -> None:
def analyze_reverts_missing_from_branch(
repo: GitRepo, branch: str, fork_cut_date: Optional[datetime] = None
) -> None:
"""
Analyze reverts applied to main branch but not applied to specified branch.
This identifies potential missing revert commits that may need to be cherry-picked
to the release branch. Also detects if reverted commits from main were cherry-picked
to the branch.
to maintain parity with main. Enhanced to validate fork branch contents and timing.

Args:
repo: GitRepo instance
branch: Target branch to analyze (e.g., release branch)
fork_cut_date: Optional date when fork was created from main
"""
# Get commits from main that are not in the specified branch
main_only_commits = build_commit_dict(repo.get_commit_list(branch, "main"))

# Get commits from the specified branch that are not in main
branch_only_commits = build_commit_dict(repo.get_commit_list("main", branch))

# Get all commits in the branch to check original PR presence
all_branch_commits = build_commit_dict(
repo._run_git_log(f"{repo.remote}/orig/{branch}")
)

branch_only_reverts = set()
branch_commit_hashes = set(all_branch_commits.keys())

print(f"Analyzing reverts in main branch not present in {branch} branch")
if fork_cut_date:
print(f"Filtering for reverts after fork cut date: {fork_cut_date}")
print(f"Total commits in main but not in {branch}: {len(main_only_commits)}")
print(f"Total commits in {branch} but not in main: {len(branch_only_commits)}")
print(f"Total commits in {branch}: {len(all_branch_commits)}")
print()

# Build set of reverted commits in branch and reverts in branch
for commit_hash, commit in branch_only_commits.items():
revert_hash = extract_commit_hash_from_revert(commit.body)
if revert_hash != None:
if revert_hash:
branch_only_reverts.add(revert_hash)
if is_revert(commit):
branch_only_reverts.add(commit_hash)

# Also check all branch commits for existing reverts
for commit_hash, commit in all_branch_commits.items():
revert_hash = extract_commit_hash_from_revert(commit.body)
if revert_hash:
branch_only_reverts.add(revert_hash)

# Find reverts in main that are not in the specified branch
reverts_missing_from_branch = []
reverts_needing_cherry_pick = []

for commit_hash, commit in main_only_commits.items():
if is_revert(commit):
# Apply fork cut date filter if specified
commit_date = commit.commit_date or commit.author_date
if fork_cut_date and commit_date < fork_cut_date:
continue

reverts_missing_from_branch.append(commit)

# Check if the original reverted commit exists in the branch
reverted_hash = extract_commit_hash_from_revert(commit.body)
if not reverted_hash:
reverted_hash = get_ghf_revert_revision(commit)

if reverted_hash and reverted_hash in branch_commit_hashes:
reverts_needing_cherry_pick.append(
{
"revert_commit": commit,
"reverted_commit_hash": reverted_hash,
"reason": f"Original commit {reverted_hash[:8]} exists in {branch}",
}
)

if not reverts_missing_from_branch:
print(f"No reverts found in main branch that are missing from {branch} branch.")
return

print(
f"Found {len(reverts_missing_from_branch)} revert(s) in main branch not present in {branch} branch:"
)
print(
f"Of these, {len(reverts_needing_cherry_pick)} revert(s) need cherry-picking for parity:"
)
print("=" * 80)

# First show reverts that definitely need cherry-picking
if reverts_needing_cherry_pick:
print("🔴 REVERTS REQUIRING CHERRY-PICK (original commit exists in branch):")
print("=" * 80)

for revert_info in reverts_needing_cherry_pick:
commit = revert_info["revert_commit"]
print(f"✅ CHERRY-PICK NEEDED: {commit.commit_hash}")
print(f" Command: git cherry-pick {commit.commit_hash}")
print(f" Date: {commit.commit_date or commit.author_date}")
print(f" Author: {commit.author}")
print(f" Title: {commit.title}")
print(f" Reverted: {revert_info['reverted_commit_hash']}")
print(f" Reason: {revert_info['reason']}")
if commit.pr_url:
print(f" PR URL: {commit.pr_url}")
print("-" * 80)

print()
print("🟡 OTHER REVERTS (may not need cherry-picking):")
print("=" * 80)

# Show remaining reverts (those not requiring cherry-pick)
already_shown = {
r["revert_commit"].commit_hash for r in reverts_needing_cherry_pick
}

for commit in reverts_missing_from_branch:
if commit.commit_hash in already_shown:
continue

# Try to identify what was reverted
revert_revision = get_revert_revision(commit)
ghf_revert_revision = get_ghf_revert_revision(commit)
reverted_commit_hash = extract_commit_hash_from_revert(commit.body)

reverted_commit_hash = None
if revert_revision:
print(f"Reverted Phabricator Diff: {revert_revision}")
elif ghf_revert_revision:
print(f"Reverted GitHub Commit: {ghf_revert_revision}")
reverted_commit_hash = ghf_revert_revision

# Check if the reverted commit was cherry-picked to the branch
cherry_picked_to_branch = False
if reverted_commit_hash:
if reverted_commit_hash in branch_only_reverts:
cherry_picked_to_branch = True
print(
f"✅ DETECTED: The reverted commit {reverted_commit_hash} was cherry-picked to {branch}"
)
cherry_picked_to_branch = (
reverted_commit_hash and reverted_commit_hash in branch_commit_hashes
)

print(f"Commit Hash: {commit.commit_hash}")
print(f"Author: {commit.author}")
Expand All @@ -632,9 +712,13 @@ def analyze_reverts_missing_from_branch(repo: GitRepo, branch: str) -> None:
if commit.pr_url:
print(f"PR URL: {commit.pr_url}")

if not cherry_picked_to_branch:
if cherry_picked_to_branch and reverted_commit_hash:
print(
f"⚠️ WARNING: Original commit {reverted_commit_hash[:8]} exists in {branch} - consider cherry-picking this revert"
)
else:
print(
f"⚠️ STATUS: The reverted commit does not appear to be in {branch}, so this revert may not be needed."
f"ℹ️ INFO: Original commit not found in {branch} - revert may not be needed"
)

print(
Expand Down Expand Up @@ -668,6 +752,11 @@ def parse_arguments():
action="store_true",
help="Analyze reverts applied to main branch but not applied to specified branch",
)
parser.add_argument(
"--fork-cut-date",
type=lambda d: datetime.strptime(d, "%Y-%m-%d"),
help="Date when fork branch was cut from main (YYYY-MM-DD format). Only reverts after this date will be considered.",
)
parser.add_argument("--date", type=lambda d: datetime.strptime(d, "%Y-%m-%d"))
parser.add_argument("--issue-num", type=int)
return parser.parse_args()
Expand Down Expand Up @@ -698,7 +787,8 @@ def main():
"Error: --branch argument is required for --analyze-missing-reverts-from-branch"
)
return
analyze_reverts_missing_from_branch(repo, args.branch)
fork_cut_date = getattr(args, "fork_cut_date", None)
analyze_reverts_missing_from_branch(repo, args.branch, fork_cut_date)
return

# Use milestone idx or search it along milestone titles
Expand Down
Loading