From 75b2c8d091bf954eb659c90d7008c4463a6b587c Mon Sep 17 00:00:00 2001 From: trucodd <135946016+trucodd@users.noreply.github.com> Date: Fri, 25 Jul 2025 00:49:30 +0000 Subject: [PATCH 1/5] refactor:remove PLR0912 ignore and fix 4 violations --- .../commands/ai_create_chapter_chunks.py | 67 ++-- .../commands/ai_create_project_chunks.py | 135 ++++---- backend/apps/github/common.py | 287 +++++++++--------- backend/apps/github/models/repository.py | 94 +++--- backend/pyproject.toml | 1 - 5 files changed, 293 insertions(+), 291 deletions(-) diff --git a/backend/apps/ai/management/commands/ai_create_chapter_chunks.py b/backend/apps/ai/management/commands/ai_create_chapter_chunks.py index 8b73079e64..da20ffbf04 100644 --- a/backend/apps/ai/management/commands/ai_create_chapter_chunks.py +++ b/backend/apps/ai/management/commands/ai_create_chapter_chunks.py @@ -100,14 +100,15 @@ def extract_chapter_content(self, chapter: Chapter) -> tuple[str, str]: prose_parts = [] metadata_parts = [] - if chapter.description: - prose_parts.append(f"Description: {chapter.description}") - - if chapter.summary: - prose_parts.append(f"Summary: {chapter.summary}") - - if hasattr(chapter, "owasp_repository") and chapter.owasp_repository: - repo = chapter.owasp_repository + # Prose content + for field, label in [("description", "Description"), ("summary", "Summary")]: + value = getattr(chapter, field, None) + if value: + prose_parts.append(f"{label}: {value}") + + # Repository content + repo = getattr(chapter, "owasp_repository", None) + if repo: if repo.description: prose_parts.append(f"Repository Description: {repo.description}") if repo.topics: @@ -116,34 +117,34 @@ def extract_chapter_content(self, chapter: Chapter) -> tuple[str, str]: if chapter.name: metadata_parts.append(f"Chapter Name: {chapter.name}") - location_parts = [] - if chapter.country: - location_parts.append(f"Country: {chapter.country}") - if chapter.region: - location_parts.append(f"Region: {chapter.region}") - if chapter.postal_code: - location_parts.append(f"Postal Code: {chapter.postal_code}") - if chapter.suggested_location: - location_parts.append(f"Location: {chapter.suggested_location}") - + # Location information - combine into single operation + location_parts = [ + f"{label}: {value}" + for value, label in [ + (chapter.country, "Country"), + (chapter.region, "Region"), + (chapter.postal_code, "Postal Code"), + (chapter.suggested_location, "Location"), + ] + if value + ] if location_parts: metadata_parts.append(f"Location Information: {', '.join(location_parts)}") - if chapter.currency: - metadata_parts.append(f"Currency: {chapter.currency}") - - if chapter.meetup_group: - metadata_parts.append(f"Meetup Group: {chapter.meetup_group}") - - if chapter.tags: - metadata_parts.append(f"Tags: {', '.join(chapter.tags)}") - - if chapter.topics: - metadata_parts.append(f"Topics: {', '.join(chapter.topics)}") - - if chapter.leaders_raw: - metadata_parts.append(f"Chapter Leaders: {', '.join(chapter.leaders_raw)}") - + # Simple and list-based metadata fields + for field, label in [ + ("currency", "Currency"), + ("meetup_group", "Meetup Group"), + ("tags", "Tags"), + ("topics", "Topics"), + ("leaders_raw", "Chapter Leaders"), + ]: + value = getattr(chapter, field, None) + if value: + display_value = ", ".join(value) if isinstance(value, list) else value + metadata_parts.append(f"{label}: {display_value}") + + # Related URLs with validation if chapter.related_urls: valid_urls = [ url diff --git a/backend/apps/ai/management/commands/ai_create_project_chunks.py b/backend/apps/ai/management/commands/ai_create_project_chunks.py index d472ea9589..c79caacd2f 100644 --- a/backend/apps/ai/management/commands/ai_create_project_chunks.py +++ b/backend/apps/ai/management/commands/ai_create_project_chunks.py @@ -87,64 +87,59 @@ def create_chunks(self, project: Project) -> list[Chunk]: ) def extract_project_content(self, project: Project) -> tuple[str, str]: - prose_parts = [] - metadata_parts = [] - - if project.name: - metadata_parts.append(f"Project Name: {project.name}") - - if project.description: - prose_parts.append(f"Description: {project.description}") - - if project.summary: - prose_parts.append(f"Summary: {project.summary}") - - if project.level: - metadata_parts.append(f"Project Level: {project.level}") - - if project.type: - metadata_parts.append(f"Project Type: {project.type}") - - if hasattr(project, "owasp_repository") and project.owasp_repository: - repo = project.owasp_repository + prose_parts: list[str] = [] + metadata_parts: list[str] = [] + + # Basic project information + for value, label, target_list in [ + (project.name, "Project Name", metadata_parts), + (project.description, "Description", prose_parts), + (project.summary, "Summary", prose_parts), + (project.level, "Project Level", metadata_parts), + (project.type, "Project Type", metadata_parts), + ]: + if value: + target_list.append(f"{label}: {value}") + + # Repository content + repo = getattr(project, "owasp_repository", None) + if repo: if repo.description: prose_parts.append(f"Repository Description: {repo.description}") if repo.topics: metadata_parts.append(f"Repository Topics: {', '.join(repo.topics)}") - if project.languages: - metadata_parts.append(f"Programming Languages: {', '.join(project.languages)}") - - if project.topics: - metadata_parts.append(f"Topics: {', '.join(project.topics)}") - - if project.licenses: - metadata_parts.append(f"Licenses: {', '.join(project.licenses)}") - - if project.tags: - metadata_parts.append(f"Tags: {', '.join(project.tags)}") - - if project.custom_tags: - metadata_parts.append(f"Custom Tags: {', '.join(project.custom_tags)}") - - stats_parts = [] - if project.stars_count > 0: - stats_parts.append(f"Stars: {project.stars_count}") - if project.forks_count > 0: - stats_parts.append(f"Forks: {project.forks_count}") - if project.contributors_count > 0: - stats_parts.append(f"Contributors: {project.contributors_count}") - if project.releases_count > 0: - stats_parts.append(f"Releases: {project.releases_count}") - if project.open_issues_count > 0: - stats_parts.append(f"Open Issues: {project.open_issues_count}") + # Process all metadata fields in groups + self._add_list_metadata( + metadata_parts, + [ + (project.languages, "Programming Languages"), + (project.topics, "Topics"), + (project.licenses, "Licenses"), + (project.tags, "Tags"), + (project.custom_tags, "Custom Tags"), + ], + ) + # Statistics + stats_parts = [ + f"{label}: {count}" + for count, label in [ + (project.stars_count, "Stars"), + (project.forks_count, "Forks"), + (project.contributors_count, "Contributors"), + (project.releases_count, "Releases"), + (project.open_issues_count, "Open Issues"), + ] + if count > 0 + ] if stats_parts: metadata_parts.append("Project Statistics: " + ", ".join(stats_parts)) - if project.leaders_raw: - metadata_parts.append(f"Project Leaders: {', '.join(project.leaders_raw)}") + # Additional metadata and dates + self._add_additional_metadata(metadata_parts, project) + # Related URLs with validation if project.related_urls: valid_urls = [ url @@ -154,25 +149,39 @@ def extract_project_content(self, project: Project) -> tuple[str, str]: if valid_urls: metadata_parts.append(f"Related URLs: {', '.join(valid_urls)}") - if project.created_at: - metadata_parts.append(f"Created: {project.created_at.strftime('%Y-%m-%d')}") + return ( + DELIMITER.join(filter(None, prose_parts)), + DELIMITER.join(filter(None, metadata_parts)), + ) - if project.updated_at: - metadata_parts.append(f"Last Updated: {project.updated_at.strftime('%Y-%m-%d')}") + def _add_list_metadata(self, metadata_parts, field_list): + """Add list-based metadata fields.""" + for value_list, label in field_list: + if value_list: + metadata_parts.append(f"{label}: {', '.join(value_list)}") + + def _add_additional_metadata(self, metadata_parts, project): + """Add additional metadata including dates and final fields.""" + # Leaders + if project.leaders_raw: + metadata_parts.append(f"Project Leaders: {', '.join(project.leaders_raw)}") - if project.released_at: - metadata_parts.append(f"Last Release: {project.released_at.strftime('%Y-%m-%d')}") + # Date fields + for date_value, label in [ + (project.created_at, "Created"), + (project.updated_at, "Last Updated"), + (project.released_at, "Last Release"), + ]: + if date_value: + metadata_parts.append(f"{label}: {date_value.strftime('%Y-%m-%d')}") + # Final metadata if project.health_score is not None: metadata_parts.append(f"Health Score: {project.health_score:.2f}") - metadata_parts.append(f"Active Project: {'Yes' if project.is_active else 'No'}") - - metadata_parts.append( - f"Issue Tracking: {'Enabled' if project.track_issues else 'Disabled'}" - ) - - return ( - DELIMITER.join(filter(None, prose_parts)), - DELIMITER.join(filter(None, metadata_parts)), + metadata_parts.extend( + [ + f"Active Project: {'Yes' if project.is_active else 'No'}", + f"Issue Tracking: {'Enabled' if project.track_issues else 'Disabled'}", + ] ) diff --git a/backend/apps/github/common.py b/backend/apps/github/common.py index 7281963983..86c7607d76 100644 --- a/backend/apps/github/common.py +++ b/backend/apps/github/common.py @@ -39,164 +39,156 @@ def sync_repository( entity_key = gh_repository.name.lower() is_owasp_site_repository = check_owasp_site_repository(entity_key) - # GitHub repository organization. - if organization is None: - gh_organization = gh_repository.organization - if gh_organization is not None: - organization = Organization.update_data(gh_organization) - - # GitHub repository owner. - if user is None: - user = User.update_data(gh_repository.owner) - - # GitHub repository. - commits = gh_repository.get_commits() - contributors = gh_repository.get_contributors() - languages = None if is_owasp_site_repository else gh_repository.get_languages() + # Setup organization and user + organization = organization or ( + Organization.update_data(gh_repository.organization) + if gh_repository.organization + else None + ) + user = user or User.update_data(gh_repository.owner) + # Create repository repository = Repository.update_data( gh_repository, - commits=commits, - contributors=contributors, - languages=languages, + commits=gh_repository.get_commits(), + contributors=gh_repository.get_contributors(), + languages=None if is_owasp_site_repository else gh_repository.get_languages(), organization=organization, user=user, ) + # Process repository content if not archived if not repository.is_archived: - # GitHub repository milestones. - kwargs = { - "direction": "desc", - "sort": "updated", - "state": "all", - } - - until = ( - latest_updated_milestone.updated_at - if (latest_updated_milestone := repository.latest_updated_milestone) - else timezone.now() - td(days=30) + _sync_repository_milestones(gh_repository, repository) + _sync_repository_issues(gh_repository, repository) + _sync_repository_pull_requests(gh_repository, repository) + + _sync_repository_releases(gh_repository, repository, is_owasp_site_repository) + _sync_repository_contributors(gh_repository, repository) + + return organization, repository + + +def _sync_repository_milestones(gh_repository, repository): + """Sync repository milestones.""" + until = ( + repository.latest_updated_milestone.updated_at + if repository.latest_updated_milestone + else timezone.now() - td(days=30) + ) + + for gh_milestone in gh_repository.get_milestones( + direction="desc", sort="updated", state="all" + ): + if gh_milestone.updated_at < until: + break + + milestone = Milestone.update_data( + gh_milestone, + author=User.update_data(gh_milestone.creator), + repository=repository, ) - for gh_milestone in gh_repository.get_milestones(**kwargs): - if gh_milestone.updated_at < until: - break + milestone.labels.clear() + for gh_milestone_label in gh_milestone.get_labels(): + try: + milestone.labels.add(Label.update_data(gh_milestone_label)) + except UnknownObjectException: + logger.exception("Couldn't get GitHub milestone label %s", milestone.url) - milestone = Milestone.update_data( - gh_milestone, - author=User.update_data(gh_milestone.creator), - repository=repository, - ) - # Labels. - milestone.labels.clear() - for gh_milestone_label in gh_milestone.get_labels(): - try: - milestone.labels.add(Label.update_data(gh_milestone_label)) - except UnknownObjectException: - logger.exception("Couldn't get GitHub milestone label %s", milestone.url) - - # GitHub repository issues. - project_track_issues = repository.project.track_issues if repository.project else True - month_ago = timezone.now() - td(days=30) - - if repository.track_issues and project_track_issues: - kwargs = { - "direction": "desc", - "sort": "updated", - "state": "all", - } - until = ( - latest_updated_issue.updated_at - if (latest_updated_issue := repository.latest_updated_issue) - else month_ago +def _sync_repository_issues(gh_repository, repository): + """Sync repository issues.""" + project_track_issues = repository.project.track_issues if repository.project else True + + if not (repository.track_issues and project_track_issues): + logger.info("Skipping issues sync for %s", repository.name) + return + + until = ( + repository.latest_updated_issue.updated_at + if repository.latest_updated_issue + else timezone.now() - td(days=30) + ) + + for gh_issue in gh_repository.get_issues(direction="desc", sort="updated", state="all"): + if gh_issue.pull_request: + continue + if gh_issue.updated_at < until: + break + + milestone = ( + Milestone.update_data( + gh_issue.milestone, + author=User.update_data(gh_issue.milestone.creator), + repository=repository, ) - for gh_issue in gh_repository.get_issues(**kwargs): - if gh_issue.pull_request: # Skip pull requests. - continue - - if gh_issue.updated_at < until: - break - - author = User.update_data(gh_issue.user) - - # Milestone - milestone = None - if gh_issue.milestone: - milestone = Milestone.update_data( - gh_issue.milestone, - author=User.update_data(gh_issue.milestone.creator), - repository=repository, - ) - issue = Issue.update_data( - gh_issue, - author=author, - milestone=milestone, - repository=repository, - ) + if gh_issue.milestone + else None + ) - # Assignees. - issue.assignees.clear() - for gh_issue_assignee in gh_issue.assignees: - if issue_assignee := User.update_data(gh_issue_assignee): - issue.assignees.add(issue_assignee) - - # Labels. - issue.labels.clear() - for gh_issue_label in gh_issue.labels: - try: - issue.labels.add(Label.update_data(gh_issue_label)) - except UnknownObjectException: - logger.exception("Couldn't get GitHub issue label %s", issue.url) - else: - logger.info("Skipping issues sync for %s", repository.name) - - # GitHub repository pull requests. - kwargs = { - "direction": "desc", - "sort": "updated", - "state": "all", - } - until = ( - latest_updated_pull_request.updated_at - if (latest_updated_pull_request := repository.latest_updated_pull_request) - else month_ago + issue = Issue.update_data( + gh_issue, + author=User.update_data(gh_issue.user), + milestone=milestone, + repository=repository, ) - for gh_pull_request in gh_repository.get_pulls(**kwargs): - if gh_pull_request.updated_at < until: - break - author = User.update_data(gh_pull_request.user) + _update_assignees_and_labels(issue, gh_issue.assignees, gh_issue.labels, "issue") - # Milestone - milestone = None - if gh_pull_request.milestone: - milestone = Milestone.update_data( - gh_pull_request.milestone, - author=User.update_data(gh_pull_request.milestone.creator), - repository=repository, - ) - pull_request = PullRequest.update_data( - gh_pull_request, - author=author, - milestone=milestone, + +def _sync_repository_pull_requests(gh_repository, repository): + """Sync repository pull requests.""" + until = ( + repository.latest_updated_pull_request.updated_at + if repository.latest_updated_pull_request + else timezone.now() - td(days=30) + ) + + for gh_pull_request in gh_repository.get_pulls(direction="desc", sort="updated", state="all"): + if gh_pull_request.updated_at < until: + break + + milestone = ( + Milestone.update_data( + gh_pull_request.milestone, + author=User.update_data(gh_pull_request.milestone.creator), repository=repository, ) + if gh_pull_request.milestone + else None + ) + + pull_request = PullRequest.update_data( + gh_pull_request, + author=User.update_data(gh_pull_request.user), + milestone=milestone, + repository=repository, + ) + + _update_assignees_and_labels( + pull_request, gh_pull_request.assignees, gh_pull_request.labels, "pull request" + ) - # Assignees. - pull_request.assignees.clear() - for gh_pull_request_assignee in gh_pull_request.assignees: - if pull_request_assignee := User.update_data(gh_pull_request_assignee): - pull_request.assignees.add(pull_request_assignee) - - # Labels. - pull_request.labels.clear() - for gh_pull_request_label in gh_pull_request.labels: - try: - pull_request.labels.add(Label.update_data(gh_pull_request_label)) - except UnknownObjectException: - logger.exception("Couldn't get GitHub pull request label %s", pull_request.url) - - # GitHub repository releases. + +def _update_assignees_and_labels(item, gh_assignees, gh_labels, item_type): + """Update assignees and labels for issues/pull requests.""" + item.assignees.clear() + for gh_assignee in gh_assignees: + assignee = User.update_data(gh_assignee) + if assignee: + item.assignees.add(assignee) + + item.labels.clear() + for gh_label in gh_labels: + try: + item.labels.add(Label.update_data(gh_label)) + except UnknownObjectException: + logger.exception("Couldn't get GitHub %s label %s", item_type, item.url) + + +def _sync_repository_releases(gh_repository, repository, is_owasp_site_repository): + """Sync repository releases.""" releases = [] if not is_owasp_site_repository: existing_release_node_ids = set( @@ -209,21 +201,20 @@ def sync_repository( if release_node_id in existing_release_node_ids: break - author = User.update_data(gh_release.author) - releases.append(Release.update_data(gh_release, author=author, repository=repository)) + releases.append( + Release.update_data( + gh_release, author=User.update_data(gh_release.author), repository=repository + ) + ) Release.bulk_save(releases) - # GitHub repository contributors. + +def _sync_repository_contributors(gh_repository, repository): + """Sync repository contributors.""" RepositoryContributor.bulk_save( [ - RepositoryContributor.update_data( - gh_contributor, - repository=repository, - user=user, - ) + RepositoryContributor.update_data(gh_contributor, repository=repository, user=user) for gh_contributor in gh_repository.get_contributors() if (user := User.update_data(gh_contributor)) ] ) - - return organization, repository diff --git a/backend/apps/github/models/repository.py b/backend/apps/github/models/repository.py index 2cf299b7bb..6d07edc690 100644 --- a/backend/apps/github/models/repository.py +++ b/backend/apps/github/models/repository.py @@ -210,6 +210,7 @@ def from_github( user (User, optional): The user instance. """ + # Direct field mapping field_mapping = { "created_at": "created_at", "default_branch": "default_branch", @@ -234,75 +235,76 @@ def from_github( "updated_at": "updated_at", "watchers_count": "watchers_count", } - - # Direct fields. for model_field, gh_field in field_mapping.items(): value = getattr(gh_repository, gh_field) if value is not None: setattr(self, model_field, value) - # Key and OWASP repository flags. + # Repository metadata self.key = self.name.lower() self.is_owasp_repository = ( organization is not None and organization.login.lower() == OWASP_LOGIN ) self.is_owasp_site_repository = check_owasp_site_repository(self.key) - # Commits. - if commits is not None: - try: - self.commits_count = commits.totalCount - except GithubException as e: - if e.data["status"] == "409" and "Git Repository is empty" in e.data["message"]: - self.is_empty = True - - # Contributors. - if contributors is not None: - self.contributors_count = contributors.totalCount - - # Languages. - if languages is not None: - total_size = sum(languages.values()) - self.languages = { - language: round(size * 100.0 / total_size, 1) - for language, size in languages.items() - } - - # License. + # Process optional data sources + optional_data = [ + (commits, self._process_commits_data), + (contributors, self._process_contributors_data), + (languages, self._process_languages_data), + ] + for data, processor in optional_data: + if data is not None: + processor(data) + + # License self.license = gh_repository.license.spdx_id if gh_repository.license else "" - # Fetch project metadata from funding.yml file. + # Funding metadata + self._process_funding_data(gh_repository) + + # Foreign keys + self.organization = organization + self.owner = user + + def _process_commits_data(self, commits): + """Process commits data.""" + try: + self.commits_count = commits.totalCount + except GithubException as e: + if e.data["status"] == "409" and "Git Repository is empty" in e.data["message"]: + self.is_empty = True + + def _process_contributors_data(self, contributors): + """Process contributors data.""" + self.contributors_count = contributors.totalCount + + def _process_languages_data(self, languages): + """Process languages data.""" + total_size = sum(languages.values()) + self.languages = { + language: round(size * 100.0 / total_size, 1) for language, size in languages.items() + } + + def _process_funding_data(self, gh_repository): + """Process funding.yml data.""" try: funding_yml = gh_repository.get_contents(".github/FUNDING.yml") yaml_content = b64decode(funding_yml.content).decode() self.funding_yml = yaml.safe_load(yaml_content) self.has_funding_yml = True - # Set funding policy compliance flag. - is_funding_policy_compliant = True - for platform, targets in self.funding_yml.items(): - for target in targets if isinstance(targets, list) else [targets]: - if not target: - continue - is_funding_policy_compliant = check_funding_policy_compliance( - platform, - target, - ) - - if not is_funding_policy_compliant: - break - - if not is_funding_policy_compliant: - break - self.is_funding_policy_compliant = is_funding_policy_compliant + # Check funding policy compliance + self.is_funding_policy_compliant = all( + check_funding_policy_compliance(platform, target) + for platform, targets in self.funding_yml.items() + for target in (targets if isinstance(targets, list) else [targets]) + if target + ) except (AttributeError, GithubException): self.has_funding_yml = False self.is_funding_policy_compliant = True - # FKs. - self.organization = organization - self.owner = user - @staticmethod def update_data( gh_repository, diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 0a6363fc58..dbd90bc0b7 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -131,7 +131,6 @@ ignore = [ "DJ012", # https://docs.astral.sh/ruff/rules/django-unordered-body-content-in-model/ "FIX002", # https://docs.astral.sh/ruff/rules/line-contains-todo/ "PLC0415", # https://docs.astral.sh/ruff/rules/import-outside-top-level/ - "PLR0912", # https://docs.astral.sh/ruff/rules/too-many-branches/ "PLR0913", # https://docs.astral.sh/ruff/rules/too-many-arguments/ "PLR0915", # https://docs.astral.sh/ruff/rules/too-many-statements/ "RUF012", # https://docs.astral.sh/ruff/rules/mutable-class-default/ From 00f1b0918f6ebcb724a559215746a8b08b64f94a Mon Sep 17 00:00:00 2001 From: trucodd <135946016+trucodd@users.noreply.github.com> Date: Sat, 26 Jul 2025 17:47:02 +0000 Subject: [PATCH 2/5] improved readability of the funding compliance check while addressing PLR0912 (too many branches) --- backend/apps/github/models/repository.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/backend/apps/github/models/repository.py b/backend/apps/github/models/repository.py index 6d07edc690..2b3cdbaf43 100644 --- a/backend/apps/github/models/repository.py +++ b/backend/apps/github/models/repository.py @@ -295,16 +295,21 @@ def _process_funding_data(self, gh_repository): self.has_funding_yml = True # Check funding policy compliance - self.is_funding_policy_compliant = all( - check_funding_policy_compliance(platform, target) - for platform, targets in self.funding_yml.items() - for target in (targets if isinstance(targets, list) else [targets]) - if target - ) + self.is_funding_policy_compliant = self._check_all_funding_compliance() except (AttributeError, GithubException): self.has_funding_yml = False self.is_funding_policy_compliant = True + def _check_all_funding_compliance(self) -> bool: + """Check if all funding targets are policy compliant.""" + for platform, targets in self.funding_yml.items(): + # Normalize to list for consistent processing + target_list = targets if isinstance(targets, list) else [targets] + for target in target_list: + if target and not check_funding_policy_compliance(platform, target): + return False + return True + @staticmethod def update_data( gh_repository, From 8840fb2999b33ce53c9fb85c7be1327e0a54276d Mon Sep 17 00:00:00 2001 From: trucodd <135946016+trucodd@users.noreply.github.com> Date: Mon, 28 Jul 2025 07:33:56 +0000 Subject: [PATCH 3/5] refactor: Address reviewer feedback on PLR0912 changes --- backend/apps/github/common.py | 10 +- backend/apps/github/models/repository.py | 151 ++++++++++++++++++++++- backend/apps/github/utils.py | 14 ++- 3 files changed, 162 insertions(+), 13 deletions(-) diff --git a/backend/apps/github/common.py b/backend/apps/github/common.py index 86c7607d76..b2a90a068a 100644 --- a/backend/apps/github/common.py +++ b/backend/apps/github/common.py @@ -59,12 +59,12 @@ def sync_repository( # Process repository content if not archived if not repository.is_archived: - _sync_repository_milestones(gh_repository, repository) - _sync_repository_issues(gh_repository, repository) - _sync_repository_pull_requests(gh_repository, repository) + repository.sync_milestones(gh_repository) + repository.sync_issues(gh_repository) + repository.sync_pull_requests(gh_repository) - _sync_repository_releases(gh_repository, repository, is_owasp_site_repository) - _sync_repository_contributors(gh_repository, repository) + repository.sync_releases(gh_repository, is_owasp_site_repository) + repository.sync_contributors(gh_repository) return organization, repository diff --git a/backend/apps/github/models/repository.py b/backend/apps/github/models/repository.py index 2b3cdbaf43..2ca7195fdc 100644 --- a/backend/apps/github/models/repository.py +++ b/backend/apps/github/models/repository.py @@ -2,22 +2,33 @@ from __future__ import annotations +import logging from base64 import b64decode +from datetime import timedelta as td import yaml from django.db import models -from github.GithubException import GithubException +from django.utils import timezone +from github.GithubException import GithubException, UnknownObjectException from apps.common.models import TimestampedModel from apps.github.constants import OWASP_LOGIN from apps.github.models.common import NodeModel +from apps.github.models.issue import Issue +from apps.github.models.label import Label from apps.github.models.milestone import Milestone from apps.github.models.mixins import RepositoryIndexMixin +from apps.github.models.pull_request import PullRequest +from apps.github.models.release import Release +from apps.github.models.repository_contributor import RepositoryContributor +from apps.github.models.user import User from apps.github.utils import ( check_funding_policy_compliance, check_owasp_site_repository, ) +logger = logging.getLogger(__name__) + IGNORED_LANGUAGES = {"css", "html"} LANGUAGE_PERCENTAGE_THRESHOLD = 1 @@ -303,11 +314,8 @@ def _process_funding_data(self, gh_repository): def _check_all_funding_compliance(self) -> bool: """Check if all funding targets are policy compliant.""" for platform, targets in self.funding_yml.items(): - # Normalize to list for consistent processing - target_list = targets if isinstance(targets, list) else [targets] - for target in target_list: - if target and not check_funding_policy_compliance(platform, target): - return False + if not check_funding_policy_compliance(platform, targets): + return False return True @staticmethod @@ -354,3 +362,134 @@ def update_data( repository.save() return repository + + def sync_milestones(self, gh_repository): + """Sync milestones from GitHub repository.""" + until = ( + self.latest_updated_milestone.updated_at + if self.latest_updated_milestone + else timezone.now() - td(days=30) + ) + for gh_milestone in gh_repository.get_milestones( + direction="desc", sort="updated", state="all" + ): + if gh_milestone.updated_at < until: + break + milestone = Milestone.update_data( + gh_milestone, + author=User.update_data(gh_milestone.creator), + repository=self, + ) + milestone.labels.clear() + for gh_milestone_label in gh_milestone.get_labels(): + try: + milestone.labels.add(Label.update_data(gh_milestone_label)) + except UnknownObjectException: + logger.exception("Couldn't get GitHub milestone label %s", milestone.url) + + def sync_issues(self, gh_repository): + """Sync issues from GitHub repository.""" + project_track_issues = self.project.track_issues if self.project else True + if not (self.track_issues and project_track_issues): + logger.info("Skipping issues sync for %s", self.name) + return + until = ( + self.latest_updated_issue.updated_at + if self.latest_updated_issue + else timezone.now() - td(days=30) + ) + for gh_issue in gh_repository.get_issues(direction="desc", sort="updated", state="all"): + if gh_issue.pull_request: + continue + if gh_issue.updated_at < until: + break + milestone = ( + Milestone.update_data( + gh_issue.milestone, + author=User.update_data(gh_issue.milestone.creator), + repository=self, + ) + if gh_issue.milestone + else None + ) + issue = Issue.update_data( + gh_issue, + author=User.update_data(gh_issue.user), + milestone=milestone, + repository=self, + ) + self._update_assignees_and_labels(issue, gh_issue.assignees, gh_issue.labels, "issue") + + def sync_pull_requests(self, gh_repository): + """Sync pull requests from GitHub repository.""" + until = ( + self.latest_updated_pull_request.updated_at + if self.latest_updated_pull_request + else timezone.now() - td(days=30) + ) + for gh_pull_request in gh_repository.get_pulls( + direction="desc", sort="updated", state="all" + ): + if gh_pull_request.updated_at < until: + break + milestone = ( + Milestone.update_data( + gh_pull_request.milestone, + author=User.update_data(gh_pull_request.milestone.creator), + repository=self, + ) + if gh_pull_request.milestone + else None + ) + pull_request = PullRequest.update_data( + gh_pull_request, + author=User.update_data(gh_pull_request.user), + milestone=milestone, + repository=self, + ) + self._update_assignees_and_labels( + pull_request, gh_pull_request.assignees, gh_pull_request.labels, "pull request" + ) + + def _update_assignees_and_labels(self, item, gh_assignees, gh_labels, item_type): + item.assignees.clear() + for gh_assignee in gh_assignees: + assignee = User.update_data(gh_assignee) + if assignee: + item.assignees.add(assignee) + item.labels.clear() + for gh_label in gh_labels: + try: + item.labels.add(Label.update_data(gh_label)) + except UnknownObjectException: + logger.exception("Couldn't get GitHub %s label %s", item_type, item.url) + + def sync_releases(self, gh_repository, is_owasp_site_repository): + """Sync releases from GitHub repository.""" + releases = [] + if not is_owasp_site_repository: + existing_release_node_ids = set( + Release.objects.filter(repository=self).values_list("node_id", flat=True) + if self.id + else () + ) + for gh_release in gh_repository.get_releases(): + release_node_id = Release.get_node_id(gh_release) + if release_node_id in existing_release_node_ids: + break + releases.append( + Release.update_data( + gh_release, author=User.update_data(gh_release.author), repository=self + ) + ) + Release.bulk_save(releases) + + def sync_contributors(self, gh_repository): + """Sync contributors from GitHub repository.""" + RepositoryContributor.bulk_save( + [ + RepositoryContributor.update_data(gh_contributor, repository=self, user=user) + for gh_contributor in gh_repository.get_contributors() + if (user := User.update_data(gh_contributor)) + ] + ) diff --git a/backend/apps/github/utils.py b/backend/apps/github/utils.py index 647c4fb811..c80f06903d 100644 --- a/backend/apps/github/utils.py +++ b/backend/apps/github/utils.py @@ -33,8 +33,18 @@ def check_owasp_site_repository(key: str) -> bool: ) -def check_funding_policy_compliance(platform: str, target: str) -> bool: - """Check OWASP funding policy compliance. +def check_funding_policy_compliance(platform, targets): + """Check if all funding targets for a platform are policy compliant.""" + if not isinstance(targets, (list, tuple, set)): + targets = [targets] + for target in targets: + if target and not _check_single_funding_policy_compliance(platform, target): + return False + return True + + +def _check_single_funding_policy_compliance(platform, target): + """Check OWASP funding policy compliance for a single target. Args: platform (str): The funding platform (e.g., 'github', 'custom'). From d1c0ff0eaadb8e1b9f1baac12308601b6dd2c88d Mon Sep 17 00:00:00 2001 From: trucodd <135946016+trucodd@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:05:59 +0000 Subject: [PATCH 4/5] another refactor --- .../apps/ai/management/commands/ai_create_chapter_chunks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/apps/ai/management/commands/ai_create_chapter_chunks.py b/backend/apps/ai/management/commands/ai_create_chapter_chunks.py index da20ffbf04..9b95163bea 100644 --- a/backend/apps/ai/management/commands/ai_create_chapter_chunks.py +++ b/backend/apps/ai/management/commands/ai_create_chapter_chunks.py @@ -101,7 +101,11 @@ def extract_chapter_content(self, chapter: Chapter) -> tuple[str, str]: metadata_parts = [] # Prose content - for field, label in [("description", "Description"), ("summary", "Summary")]: + prose_fields = ( + ("description", "Description"), + ("summary", "Summary"), + ) + for field, label in prose_fields: value = getattr(chapter, field, None) if value: prose_parts.append(f"{label}: {value}") From 04fe755f67f5227aaecefd7a63c6fb50eec843e0 Mon Sep 17 00:00:00 2001 From: trucodd <135946016+trucodd@users.noreply.github.com> Date: Sun, 3 Aug 2025 02:16:20 +0000 Subject: [PATCH 5/5] removed code duplication in common.py --- backend/apps/github/common.py | 189 +++------------------------------- 1 file changed, 17 insertions(+), 172 deletions(-) diff --git a/backend/apps/github/common.py b/backend/apps/github/common.py index b2a90a068a..e93a1afdb3 100644 --- a/backend/apps/github/common.py +++ b/backend/apps/github/common.py @@ -3,19 +3,9 @@ from __future__ import annotations import logging -from datetime import timedelta as td -from django.utils import timezone -from github.GithubException import UnknownObjectException - -from apps.github.models.issue import Issue -from apps.github.models.label import Label -from apps.github.models.milestone import Milestone from apps.github.models.organization import Organization -from apps.github.models.pull_request import PullRequest -from apps.github.models.release import Release from apps.github.models.repository import Repository -from apps.github.models.repository_contributor import RepositoryContributor from apps.github.models.user import User from apps.github.utils import check_owasp_site_repository @@ -39,20 +29,26 @@ def sync_repository( entity_key = gh_repository.name.lower() is_owasp_site_repository = check_owasp_site_repository(entity_key) - # Setup organization and user - organization = organization or ( - Organization.update_data(gh_repository.organization) - if gh_repository.organization - else None - ) - user = user or User.update_data(gh_repository.owner) + # GitHub repository organization. + if organization is None: + gh_organization = gh_repository.organization + if gh_organization is not None: + organization = Organization.update_data(gh_organization) + + # GitHub repository owner. + if user is None: + user = User.update_data(gh_repository.owner) + + # GitHub repository. + commits = gh_repository.get_commits() + contributors = gh_repository.get_contributors() + languages = None if is_owasp_site_repository else gh_repository.get_languages() - # Create repository repository = Repository.update_data( gh_repository, - commits=gh_repository.get_commits(), - contributors=gh_repository.get_contributors(), - languages=None if is_owasp_site_repository else gh_repository.get_languages(), + commits=commits, + contributors=contributors, + languages=languages, organization=organization, user=user, ) @@ -67,154 +63,3 @@ def sync_repository( repository.sync_contributors(gh_repository) return organization, repository - - -def _sync_repository_milestones(gh_repository, repository): - """Sync repository milestones.""" - until = ( - repository.latest_updated_milestone.updated_at - if repository.latest_updated_milestone - else timezone.now() - td(days=30) - ) - - for gh_milestone in gh_repository.get_milestones( - direction="desc", sort="updated", state="all" - ): - if gh_milestone.updated_at < until: - break - - milestone = Milestone.update_data( - gh_milestone, - author=User.update_data(gh_milestone.creator), - repository=repository, - ) - - milestone.labels.clear() - for gh_milestone_label in gh_milestone.get_labels(): - try: - milestone.labels.add(Label.update_data(gh_milestone_label)) - except UnknownObjectException: - logger.exception("Couldn't get GitHub milestone label %s", milestone.url) - - -def _sync_repository_issues(gh_repository, repository): - """Sync repository issues.""" - project_track_issues = repository.project.track_issues if repository.project else True - - if not (repository.track_issues and project_track_issues): - logger.info("Skipping issues sync for %s", repository.name) - return - - until = ( - repository.latest_updated_issue.updated_at - if repository.latest_updated_issue - else timezone.now() - td(days=30) - ) - - for gh_issue in gh_repository.get_issues(direction="desc", sort="updated", state="all"): - if gh_issue.pull_request: - continue - if gh_issue.updated_at < until: - break - - milestone = ( - Milestone.update_data( - gh_issue.milestone, - author=User.update_data(gh_issue.milestone.creator), - repository=repository, - ) - if gh_issue.milestone - else None - ) - - issue = Issue.update_data( - gh_issue, - author=User.update_data(gh_issue.user), - milestone=milestone, - repository=repository, - ) - - _update_assignees_and_labels(issue, gh_issue.assignees, gh_issue.labels, "issue") - - -def _sync_repository_pull_requests(gh_repository, repository): - """Sync repository pull requests.""" - until = ( - repository.latest_updated_pull_request.updated_at - if repository.latest_updated_pull_request - else timezone.now() - td(days=30) - ) - - for gh_pull_request in gh_repository.get_pulls(direction="desc", sort="updated", state="all"): - if gh_pull_request.updated_at < until: - break - - milestone = ( - Milestone.update_data( - gh_pull_request.milestone, - author=User.update_data(gh_pull_request.milestone.creator), - repository=repository, - ) - if gh_pull_request.milestone - else None - ) - - pull_request = PullRequest.update_data( - gh_pull_request, - author=User.update_data(gh_pull_request.user), - milestone=milestone, - repository=repository, - ) - - _update_assignees_and_labels( - pull_request, gh_pull_request.assignees, gh_pull_request.labels, "pull request" - ) - - -def _update_assignees_and_labels(item, gh_assignees, gh_labels, item_type): - """Update assignees and labels for issues/pull requests.""" - item.assignees.clear() - for gh_assignee in gh_assignees: - assignee = User.update_data(gh_assignee) - if assignee: - item.assignees.add(assignee) - - item.labels.clear() - for gh_label in gh_labels: - try: - item.labels.add(Label.update_data(gh_label)) - except UnknownObjectException: - logger.exception("Couldn't get GitHub %s label %s", item_type, item.url) - - -def _sync_repository_releases(gh_repository, repository, is_owasp_site_repository): - """Sync repository releases.""" - releases = [] - if not is_owasp_site_repository: - existing_release_node_ids = set( - Release.objects.filter(repository=repository).values_list("node_id", flat=True) - if repository.id - else () - ) - for gh_release in gh_repository.get_releases(): - release_node_id = Release.get_node_id(gh_release) - if release_node_id in existing_release_node_ids: - break - - releases.append( - Release.update_data( - gh_release, author=User.update_data(gh_release.author), repository=repository - ) - ) - Release.bulk_save(releases) - - -def _sync_repository_contributors(gh_repository, repository): - """Sync repository contributors.""" - RepositoryContributor.bulk_save( - [ - RepositoryContributor.update_data(gh_contributor, repository=repository, user=user) - for gh_contributor in gh_repository.get_contributors() - if (user := User.update_data(gh_contributor)) - ] - )