Skip to content

Commit 1e5eaa6

Browse files
committed
decorator filtering working: Strip ANSI escape codes from Griffe paths
Signed-off-by: Pablo Garay <[email protected]>
1 parent 378985a commit 1e5eaa6

File tree

1 file changed

+124
-20
lines changed

1 file changed

+124
-20
lines changed

scripts/check_api_backwards_compatibility.py

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import argparse
1313
import os
14+
import re
1415
import sys
1516
from collections import Counter
1617

@@ -40,19 +41,36 @@ def has_exempt_decorator(obj: Object) -> bool:
4041
"""Check if object has any exempt decorator."""
4142
if not hasattr(obj, 'decorators'):
4243
return False
44+
if not obj.decorators:
45+
return False
4346
for decorator in obj.decorators:
44-
dec_str = str(decorator)
45-
if any(exempt in dec_str for exempt in EXEMPT_DECORATORS):
47+
# Get the actual decorator name from the value attribute
48+
dec_value = str(getattr(decorator, 'value', ''))
49+
if any(exempt in dec_value for exempt in EXEMPT_DECORATORS):
4650
return True
4751
return False
4852

4953

50-
def get_filtered_paths(package: Object) -> set:
54+
def get_filtered_paths(package: Object, package_name: str) -> set:
5155
"""Get all paths that should be filtered (have exempt decorators)."""
5256
filtered = set()
57+
visited = set()
5358

54-
def visit(obj, path):
55-
current_path = f"{path}.{obj.name}" if path else obj.name
59+
def visit(obj, path, depth=0, is_root=False):
60+
# Prevent infinite recursion
61+
if depth > 20 or id(obj) in visited:
62+
return
63+
visited.add(id(obj))
64+
65+
# For root object, use the provided path; for children, append obj.name
66+
if is_root:
67+
current_path = path
68+
else:
69+
current_path = f"{path}.{obj.name}" if path else obj.name
70+
71+
# Skip aliases (imported objects)
72+
if hasattr(obj, 'is_alias') and obj.is_alias:
73+
return
5674

5775
# Skip private members
5876
if obj.name.startswith('_') and not obj.name.startswith('__'):
@@ -66,12 +84,22 @@ def visit(obj, path):
6684
# Visit children
6785
if hasattr(obj, 'members'):
6886
for member in obj.members.values():
69-
visit(member, current_path)
87+
visit(member, current_path, depth + 1, is_root=False)
7088

71-
visit(package, "")
89+
# Start with the full package name (e.g., "megatron.core")
90+
visit(package, package_name, is_root=True)
7291
return filtered
7392

7493

94+
def strip_ansi_codes(text):
95+
"""Remove ANSI escape codes from text."""
96+
if not text:
97+
return text
98+
# Pattern to match ANSI escape codes
99+
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
100+
return ansi_escape.sub('', text)
101+
102+
75103
def get_object_path(change) -> str:
76104
"""Extract the object path from a breaking change."""
77105
# Try different attributes
@@ -80,33 +108,90 @@ def get_object_path(change) -> str:
80108
getattr(change, 'path', None))
81109

82110
if path:
83-
return path
111+
return strip_ansi_codes(path)
84112

85113
# Try from values
86114
if hasattr(change, 'new_value') and change.new_value:
87-
return getattr(change.new_value, 'path', None)
115+
path = getattr(change.new_value, 'path', None)
116+
if path:
117+
return strip_ansi_codes(path)
118+
88119
if hasattr(change, 'old_value') and change.old_value:
89-
return getattr(change.old_value, 'path', None)
120+
path = getattr(change.old_value, 'path', None)
121+
if path:
122+
return strip_ansi_codes(path)
123+
124+
# Last resort: parse from explanation
125+
# Format: "filepath:line: object_path: description"
126+
# Example: "megatron/core/model_parallel_config.py:338: ModelParallelConfig.cpu_offloading_weights: Attribute value was changed"
127+
try:
128+
explanation = change.explain()
129+
# Split by ": " and get the second part (object path)
130+
parts = explanation.split(': ')
131+
if len(parts) >= 2:
132+
# Get the part after "filepath:line" but before the description
133+
# It's usually the second part
134+
object_path = parts[1]
135+
136+
# Extract the module path from file path (first part)
137+
file_part = parts[0].split(':')[0] # Get just the file path, remove line number
138+
139+
# Convert file path to module path
140+
# e.g., "megatron/core/model_parallel_config.py" -> "megatron.core.model_parallel_config"
141+
module_path = file_part.replace('/', '.').replace('\\', '.').replace('.py', '')
142+
143+
# If object_path doesn't start with module, prepend it
144+
if not object_path.startswith(module_path):
145+
full_path = f"{module_path}.{object_path}"
146+
else:
147+
full_path = object_path
148+
149+
return strip_ansi_codes(full_path)
150+
except Exception:
151+
pass
90152

91153
return None
92154

93155

94-
def should_skip_change(change, filtered_paths: set) -> bool:
156+
def should_skip_change(change, filtered_paths: set, debug=False) -> bool:
95157
"""Check if a breaking change should be skipped based on filters."""
96158
path = get_object_path(change)
97159
if not path:
98160
return False
99161

162+
# Strip parameter names from path for matching
163+
# e.g., "Class.__init__(param)" -> "Class.__init__"
164+
clean_path = path.split('(')[0] if '(' in path else path
165+
166+
# Debug specific paths
167+
if debug and ('ModelParallelConfig' in path or 'DistributedDataParallel' in path):
168+
print(f"\n 🔍 DEBUG matching:", file=sys.stderr)
169+
print(f" Original path: {path}", file=sys.stderr)
170+
print(f" Clean path: {clean_path}", file=sys.stderr)
171+
print(f" Filtered paths: {filtered_paths}", file=sys.stderr)
172+
100173
# Check exact match
101-
if path in filtered_paths:
174+
if clean_path in filtered_paths or path in filtered_paths:
175+
if debug and ('ModelParallelConfig' in path or 'DistributedDataParallel' in path):
176+
print(f" ✓ EXACT MATCH!", file=sys.stderr)
102177
return True
103178

104179
# Check if it's a child of a filtered object
105-
# e.g., MyClass.__init__ is child of MyClass
180+
# e.g., MyClass.__init__ is child of MyClass, MyClass.attr is child of MyClass
106181
for filtered_path in filtered_paths:
107-
if path.startswith(filtered_path + '.') or path.startswith(filtered_path + '('):
182+
if clean_path.startswith(filtered_path + '.'):
183+
if debug and ('ModelParallelConfig' in path or 'DistributedDataParallel' in path):
184+
print(f" ✓ CHILD MATCH with: {filtered_path}", file=sys.stderr)
185+
return True
186+
# Also check the original path in case parameter names matter
187+
if path.startswith(filtered_path + '.'):
188+
if debug and ('ModelParallelConfig' in path or 'DistributedDataParallel' in path):
189+
print(f" ✓ CHILD MATCH (orig) with: {filtered_path}", file=sys.stderr)
108190
return True
109191

192+
if debug and ('ModelParallelConfig' in path or 'DistributedDataParallel' in path):
193+
print(f" ✗ NO MATCH", file=sys.stderr)
194+
110195
return False
111196

112197

@@ -118,9 +203,6 @@ def main():
118203
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
119204
args = parser.parse_args()
120205

121-
# Clean up stale worktrees before starting
122-
cleanup_git_worktrees()
123-
124206
try:
125207
package_name = args.package
126208

@@ -147,24 +229,46 @@ def main():
147229

148230
# Get filtered paths from CURRENT version only
149231
print(f"\n🔍 Finding exempt objects in current version...", file=sys.stderr)
150-
filtered_paths = get_filtered_paths(current)
232+
filtered_paths = get_filtered_paths(current, package_name)
151233
print(f" Found {len(filtered_paths)} exempt objects", file=sys.stderr)
152234

153235
# Find breaking changes
154236
print(f"\n🔍 Comparing versions...", file=sys.stderr)
155237
all_changes = list(griffe.find_breaking_changes(baseline, current))
156238
print(f" Found {len(all_changes)} potential breaking changes", file=sys.stderr)
157239

158-
# Filter out exempt changes
240+
# Filter out exempt changes
159241
breaking_changes = []
160242
skipped_count = 0
243+
244+
# DEBUG: Print first 5 breaking changes to STDOUT for debugging
245+
print("\n===TEST DEBUG (first 5 changes)===", flush=True)
246+
print(f"Filtered paths: {filtered_paths}", flush=True)
247+
for i, change in enumerate(all_changes[:5]):
248+
path = get_object_path(change)
249+
clean_path = path.split('(')[0] if path and '(' in path else path
250+
print(f"\nChange {i+1}: {path}", flush=True)
251+
print(f" Clean: {clean_path}", flush=True)
252+
print(f" Clean repr: {repr(clean_path)}", flush=True)
253+
254+
# Test matching
255+
matched = False
256+
for fpath in filtered_paths:
257+
if clean_path and (clean_path == fpath or clean_path.startswith(fpath + '.')):
258+
print(f" ✓ MATCH with: {fpath}", flush=True)
259+
matched = True
260+
break
261+
if not matched:
262+
print(f" ✗ NO MATCH", flush=True)
263+
print("\n===END TEST DEBUG===\n", flush=True)
264+
161265
for change in all_changes:
162266
if should_skip_change(change, filtered_paths):
163267
skipped_count += 1
164268
else:
165269
breaking_changes.append(change)
166270

167-
print(f" Skipped {skipped_count} exempt | Reporting {len(breaking_changes)} breaking changes", file=sys.stderr)
271+
print(f"\n Skipped {skipped_count} exempt | Reporting {len(breaking_changes)} breaking changes", file=sys.stderr)
168272

169273
# Print results
170274
if not breaking_changes:

0 commit comments

Comments
 (0)