Skip to content

Commit 95ad3a7

Browse files
ghinksclaude
andcommitted
feat: implement SQLite-based caching for GitHub API responses
- Add comprehensive SQLite cache backend with TTL support - Implement cache manager with test isolation and rate limiting integration - Create cache key generation utilities for consistent key management - Integrate caching into get_reviewers_with_comments_for_pull_requests - Add automatic cache disabling during pytest runs - Support forever caching for closed PRs, short TTL for open PRs - Include cache statistics, cleanup, and debugging capabilities Performance improvement: Significantly reduces GitHub API calls for repeated queries by caching closed PR review data persistently. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent b1537fc commit 95ad3a7

File tree

5 files changed

+651
-0
lines changed

5 files changed

+651
-0
lines changed

reviewtally/cache/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Cache module for persistent storage of GitHub API responses

reviewtally/cache/cache_keys.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Cache key generation utilities for GitHub API responses."""
2+
3+
from __future__ import annotations
4+
5+
import hashlib
6+
from typing import Any
7+
8+
9+
def generate_pr_reviews_cache_key(
10+
owner: str,
11+
repo: str,
12+
pull_numbers: list[int],
13+
) -> str:
14+
"""
15+
Generate cache key for PR reviews data.
16+
17+
Args:
18+
owner: Repository owner/organization
19+
repo: Repository name
20+
pull_numbers: List of PR numbers
21+
22+
Returns:
23+
Cache key string
24+
25+
"""
26+
# Sort PR numbers for consistent key generation
27+
sorted_prs = sorted(pull_numbers)
28+
pr_list = ",".join(map(str, sorted_prs))
29+
30+
# Create base key
31+
base_key = f"pr_reviews:{owner}:{repo}:{pr_list}"
32+
33+
# Hash if too long (SQLite key limit considerations)
34+
max_key_length = 200
35+
if len(base_key) > max_key_length:
36+
key_hash = hashlib.sha256(base_key.encode()).hexdigest()[:16]
37+
return f"pr_reviews:{owner}:{repo}:hash_{key_hash}"
38+
39+
return base_key
40+
41+
42+
def generate_pr_comments_cache_key(
43+
owner: str,
44+
repo: str,
45+
pull_number: int,
46+
review_id: int,
47+
) -> str:
48+
"""
49+
Generate cache key for PR review comments.
50+
51+
Args:
52+
owner: Repository owner/organization
53+
repo: Repository name
54+
pull_number: PR number
55+
review_id: Review ID
56+
57+
Returns:
58+
Cache key string
59+
60+
"""
61+
return f"pr_comments:{owner}:{repo}:{pull_number}:{review_id}"
62+
63+
64+
def generate_repos_cache_key(org_name: str, languages: list[str]) -> str:
65+
"""
66+
Generate cache key for repository lists.
67+
68+
Args:
69+
org_name: Organization name
70+
languages: List of programming languages
71+
72+
Returns:
73+
Cache key string
74+
75+
"""
76+
# Sort languages for consistent key generation
77+
sorted_langs = sorted(languages) if languages else ["all"]
78+
lang_str = ",".join(sorted_langs)
79+
80+
return f"repos:{org_name}:{lang_str}"
81+
82+
83+
def generate_prs_cache_key(
84+
owner: str,
85+
repo: str,
86+
start_date: str,
87+
end_date: str,
88+
) -> str:
89+
"""
90+
Generate cache key for PR lists within date range.
91+
92+
Args:
93+
owner: Repository owner/organization
94+
repo: Repository name
95+
start_date: Start date string (ISO format)
96+
end_date: End date string (ISO format)
97+
98+
Returns:
99+
Cache key string
100+
101+
"""
102+
return f"prs:{owner}:{repo}:{start_date}:{end_date}"
103+
104+
105+
def parse_cache_key(cache_key: str) -> dict[str, Any]:
106+
"""
107+
Parse cache key to extract components.
108+
109+
Args:
110+
cache_key: Cache key to parse
111+
112+
Returns:
113+
Dictionary with parsed components
114+
115+
"""
116+
parts = cache_key.split(":")
117+
min_parts = 2
118+
if len(parts) < min_parts:
119+
return {"type": "unknown", "key": cache_key}
120+
121+
cache_type = parts[0]
122+
123+
if cache_type == "pr_reviews":
124+
min_pr_review_parts = 4
125+
if len(parts) >= min_pr_review_parts:
126+
return {
127+
"type": "pr_reviews",
128+
"owner": parts[1],
129+
"repo": parts[2],
130+
"pull_numbers": parts[3],
131+
"key": cache_key,
132+
}
133+
elif cache_type == "pr_comments":
134+
min_pr_comment_parts = 5
135+
if len(parts) >= min_pr_comment_parts:
136+
return {
137+
"type": "pr_comments",
138+
"owner": parts[1],
139+
"repo": parts[2],
140+
"pull_number": parts[3],
141+
"review_id": parts[4],
142+
"key": cache_key,
143+
}
144+
elif cache_type == "repos":
145+
min_repos_parts = 3
146+
if len(parts) >= min_repos_parts:
147+
return {
148+
"type": "repos",
149+
"org_name": parts[1],
150+
"languages": parts[2],
151+
"key": cache_key,
152+
}
153+
elif cache_type == "prs":
154+
min_prs_parts = 5
155+
if len(parts) >= min_prs_parts:
156+
return {
157+
"type": "prs",
158+
"owner": parts[1],
159+
"repo": parts[2],
160+
"start_date": parts[3],
161+
"end_date": parts[4],
162+
"key": cache_key,
163+
}
164+
165+
return {"type": cache_type, "key": cache_key}
166+
167+
168+
def is_pr_closed_cache_key(
169+
cache_key: str, pr_state_lookup: dict[int, str] | None = None,
170+
) -> bool:
171+
"""
172+
Determine if a cache key is for closed PR data (never expires).
173+
174+
Args:
175+
cache_key: Cache key to check
176+
pr_state_lookup: Optional lookup dict for PR states
177+
178+
Returns:
179+
True if this is closed PR data that should never expire
180+
181+
"""
182+
parsed = parse_cache_key(cache_key)
183+
184+
# PR reviews and comments for closed PRs never change
185+
if parsed["type"] in ("pr_reviews", "pr_comments"):
186+
# If we have state lookup, use it
187+
if pr_state_lookup and parsed["type"] == "pr_comments":
188+
pr_number = int(parsed.get("pull_number", 0))
189+
return pr_state_lookup.get(pr_number) == "closed"
190+
191+
# Otherwise, assume we should check when caching
192+
return True # Conservative approach - cache forever for now
193+
194+
return False

0 commit comments

Comments
 (0)