diff --git a/backend/Makefile b/backend/Makefile index 5422364f1..5f33706c4 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -58,6 +58,10 @@ github-update-project-related-repositories: @echo "Updating OWASP project related GitHub repositories" @CMD="python manage.py github_update_project_related_repositories" $(MAKE) exec-backend-command +github-update-external-repositories: + @echo "Updating external OWASP GitHub repositories" + @CMD="python manage.py github_update_external_repositories" $(MAKE) exec-backend-command + github-update-users: @echo "Updating GitHub users" @CMD="python manage.py github_update_users" $(MAKE) exec-backend-command @@ -196,6 +200,7 @@ update-data: \ owasp-scrape-committees \ owasp-scrape-projects \ github-update-project-related-repositories \ + github-update-external-repositories \ github-update-users \ owasp-aggregate-projects \ owasp-update-events \ diff --git a/backend/apps/github/management/commands/github_update_external_repositories.py b/backend/apps/github/management/commands/github_update_external_repositories.py new file mode 100644 index 000000000..055188fa0 --- /dev/null +++ b/backend/apps/github/management/commands/github_update_external_repositories.py @@ -0,0 +1,94 @@ +"""A command to update external OWASP repositories from GitHub data.""" + +import logging +import os + +import github +from django.core.management.base import BaseCommand +from github.GithubException import BadCredentialsException + +from apps.github.common import sync_repository +from apps.github.constants import GITHUB_ITEMS_PER_PAGE +from apps.github.models.organization import Organization +from apps.owasp.constants import OWASP_ORGANIZATION_NAME +from apps.owasp.models.project import Project + +logger: logging.Logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + """Fetch external OWASP GitHub repositories and update relevant entities.""" + + help = "Fetch external OWASP GitHub repositories and update relevant entities." + + def handle(self, *_args, **options) -> None: + """Handle the command execution. + + Args: + *_args: Variable length argument list. + **options: Arbitrary keyword arguments containing command options. + + """ + github_token = os.getenv("GITHUB_TOKEN") + if not github_token: + logger.warning( + "Github token not found, please update .env file with a valid GITHUB_TOKEN" + ) + return + + gh = github.Github(github_token, per_page=GITHUB_ITEMS_PER_PAGE) + + external_organizations = Organization.objects.filter( + is_owasp_related_organization=True + ).exclude(login=OWASP_ORGANIZATION_NAME) + org_count = external_organizations.count() + + for org_idx, ext_org in enumerate(external_organizations): + print(f"Processing organization {org_idx + 1}/{org_count}: {ext_org.login}") + try: + gh_organization = gh.get_organization(ext_org.login) + gh_repositories = gh_organization.get_repos( + type="public", + sort="created", + direction="desc", + ) + except BadCredentialsException: + logger.warning("Invalid GitHub token. Please update .env file with a valid token.") + return + except Exception: + logger.exception("Failed fetching GitHub repository for org %s", ext_org.login) + continue + + projects = self.sync_organization_repositories(ext_org, gh_repositories) + if projects: + Project.bulk_save(projects) + + def sync_organization_repositories(self, external_org, gh_repositories): + """Sync GitHub repositories for a given external organization. + + Sync repositories and return the updated projects. + + """ + gh_repositories_count = gh_repositories.totalCount + projects = [] + for repo_idx, gh_repository in enumerate(gh_repositories): + entity_key = gh_repository.name.lower() + org_key = external_org.login.lower() + repository_url = f"https://github.com/{org_key}/{entity_key}" + print(f"{repo_idx + 1}/{gh_repositories_count}: {repository_url}") + + try: + organization, repository = sync_repository(gh_repository) + except BadCredentialsException: + logger.warning("Invalid GitHub token. Please update .env file with a valid token.") + return None + except Exception: + logger.exception("Error syncing repository %s", repository_url) + continue + + project = repository.project + if project: + project.repositories.add(repository) + projects.append(project) + + return projects diff --git a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py new file mode 100644 index 000000000..f791709af --- /dev/null +++ b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py @@ -0,0 +1,147 @@ +from typing import NamedTuple +from unittest import mock + +import pytest + +from apps.github.management.commands.github_update_external_repositories import ( + GITHUB_ITEMS_PER_PAGE, + Command, + Organization, + Project, +) + + +@pytest.fixture +def command(): + return Command() + + +@pytest.fixture +def mock_gh_repository(): + repo = mock.Mock() + repo.name = "test-repo" + repo.html_url = "https://github.com/TestOrg/test-repo" + return repo + + +def setup_organizations(num_orgs, num_repos_per_org): + orgs = [] + gh_orgs = [] + for i in range(num_orgs): + org = mock.Mock(spec=Organization) + org.login = f"TestOrg{i + 1}" + org.is_owasp_related_organization = True + orgs.append(org) + + gh_org = mock.Mock() + gh_repos = mock.MagicMock() + gh_repos.totalCount = num_repos_per_org[i] + gh_repos.__iter__.return_value = [mock_gh_repository] * num_repos_per_org[i] + gh_org.get_repos.return_value = gh_repos + gh_orgs.append(gh_org) + + return (orgs, gh_orgs) + + +class Scenario(NamedTuple): + num_orgs: int + num_repos_per_org: list[int] + expected_sync_calls: int + expected_bulk_save_calls: int + + +@pytest.mark.parametrize( + "scenario", + [ + Scenario( + num_orgs=1, num_repos_per_org=[2], expected_sync_calls=2, expected_bulk_save_calls=1 + ), # 1 org with 2 repos + Scenario( + num_orgs=2, num_repos_per_org=[1, 3], expected_sync_calls=4, expected_bulk_save_calls=2 + ), # 2 orgs with 1 and 3 repos + Scenario( + num_orgs=2, num_repos_per_org=[0, 2], expected_sync_calls=2, expected_bulk_save_calls=1 + ), # 1 org with 2 repos, 1 without any repos + ], +) +@mock.patch.dict("os.environ", {"GITHUB_TOKEN": "valid-token"}) +@mock.patch("apps.github.management.commands.github_update_external_repositories.github.Github") +@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") +@mock.patch( + "apps.github.management.commands.github_update_external_repositories.Project.bulk_save" +) +@mock.patch( + "apps.github.management.commands.github_update_external_repositories.Organization.objects.filter" +) +def test_handle( + mock_org_filter, + mock_bulk_save, + mock_sync_repository, + mock_github, + command, + scenario, +): + """Test command execution with varying organizations and repositories.""" + mock_gh = mock_github.return_value + + orgs, gh_orgs = setup_organizations(scenario.num_orgs, scenario.num_repos_per_org) + + qs_mock = mock.MagicMock() + qs_mock.count.return_value = scenario.num_orgs + qs_mock.__iter__.return_value = orgs + mock_org_filter.return_value.exclude.return_value = qs_mock + + mock_gh.get_organization.side_effect = gh_orgs + + mock_project = mock.Mock(spec=Project) + + def sync_side_effect(repo): + org_index = hash(repo.name) % len(orgs) if orgs else 0 + return (orgs[org_index], mock.Mock(project=mock_project)) + + mock_sync_repository.side_effect = sync_side_effect + + with mock.patch("builtins.print"): + command.handle() + + mock_github.assert_called_once_with("valid-token", per_page=GITHUB_ITEMS_PER_PAGE) + + assert mock_gh.get_organization.call_count == scenario.num_orgs + for org in orgs: + mock_gh.get_organization.assert_any_call(org.login) + + assert mock_sync_repository.call_count == scenario.expected_sync_calls + + assert mock_bulk_save.call_count == scenario.expected_bulk_save_calls + + +@pytest.mark.parametrize( + ("num_repos", "expected_project_count"), + [ + (1, 1), # 1 repo that successfully syncs to 1 project + (3, 3), # 3 repos that successfully sync to 3 projects + (2, 0), # 2 repos that fail to sync (return None projects) + ], +) +@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") +def test_sync_organization_repositories( + mock_sync_repository, command, mock_gh_repository, num_repos, expected_project_count +): + """Test repository synchronization with varying repository counts.""" + mock_organization = mock.Mock(spec=Organization) + mock_organization.login = "TestOrg" + + mock_repositories = mock.MagicMock() + mock_repositories.totalCount = num_repos + mock_repositories.__iter__.return_value = [mock_gh_repository] * num_repos + + mock_project = mock.Mock(spec=Project) + if expected_project_count > 0: + mock_sync_repository.return_value = (mock_organization, mock.Mock(project=mock_project)) + else: + mock_sync_repository.return_value = (mock_organization, mock.Mock(project=None)) + + projects = command.sync_organization_repositories(mock_organization, mock_repositories) + + assert len(projects) == expected_project_count + assert mock_sync_repository.call_count == num_repos