From 27907d538c983845855c7c5f4ea55ac543ba3de6 Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Mon, 23 Jun 2025 21:24:41 +0530 Subject: [PATCH 1/5] feat: add command to sync external OWASP repositories --- backend/Makefile | 5 ++ .../github_update_external_repositories.py | 82 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 backend/apps/github/management/commands/github_update_external_repositories.py diff --git a/backend/Makefile b/backend/Makefile index 8310ce66a..c40cd2f48 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -58,6 +58,10 @@ github-update-project-related-repositories: @echo "Updating OWASP project related GitHub repositories" @CMD="python manage.py github_update_project_related_repositories" $(MAKE) exec-backend-command +github-update-external-repositories: + @echo "Updating external OWASP GitHub repositories" + @CMD="python manage.py github_update_external_repositories" $(MAKE) exec-backend-command + github-update-users: @echo "Updating GitHub users" @CMD="python manage.py github_update_users" $(MAKE) exec-backend-command @@ -196,6 +200,7 @@ update-data: \ owasp-scrape-committees \ owasp-scrape-projects \ github-update-project-related-repositories \ + github-update-external-repositories \ github-update-users \ owasp-aggregate-projects \ owasp-update-events \ diff --git a/backend/apps/github/management/commands/github_update_external_repositories.py b/backend/apps/github/management/commands/github_update_external_repositories.py new file mode 100644 index 000000000..dc3741978 --- /dev/null +++ b/backend/apps/github/management/commands/github_update_external_repositories.py @@ -0,0 +1,82 @@ +"""A command to update external OWASP repositories from GitHub data.""" + +import logging +import os + +import github +from django.core.management.base import BaseCommand +from github.GithubException import BadCredentialsException + +from apps.github.common import sync_repository +from apps.github.constants import GITHUB_ITEMS_PER_PAGE +from apps.github.models.organization import Organization +from apps.owasp.constants import OWASP_ORGANIZATION_NAME +from apps.owasp.models.project import Project + +logger: logging.Logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + """Fetch external OWASP GitHub repositories and update relevant entities.""" + + help = "Fetch external OWASP GitHub repositories and update relevant entities." + + def handle(self, *_args, **options) -> None: + """Handle the command execution. + + Args: + *_args: Variable length argument list. + **options: Arbitrary keyword arguments containing command options. + + """ + try: + gh = github.Github(os.getenv("GITHUB_TOKEN"), per_page=GITHUB_ITEMS_PER_PAGE) + except BadCredentialsException: + logger.warning( + "Invalid GitHub token. Please create and update .env file with a valid token." + ) + return + + external_organizations = Organization.objects.filter( + is_owasp_related_organization=True + ).exclude(login=OWASP_ORGANIZATION_NAME) + org_count = external_organizations.count() + + synced_projects = [] + for org_idx, ext_org in enumerate(external_organizations): + print(f"Processing organization {org_idx + 1}/{org_count}: {ext_org.login}") + try: + gh_organization = gh.get_organization(ext_org.login) + except Exception: + logger.exception("Failed fetching GitHub org %s", ext_org.login) + continue + + try: + gh_repositories = gh_organization.get_repos( + type="public", + sort="created", + direction="desc", + ) + gh_repositories_count = gh_repositories.totalCount + except Exception: + logger.exception("Failed fetching GitHub repository for org %s", ext_org.login) + continue + + for repo_idx, gh_repository in enumerate(gh_repositories): + entity_key = gh_repository.name.lower() + org_key = ext_org.login.lower() + repository_url = f"https://github.com/{org_key}/{entity_key}" + print(f"{repo_idx + 1}/{gh_repositories_count}: {repository_url}") + + try: + organization, repository = sync_repository(gh_repository) + except Exception: + logger.exception("Error syncing repository %s", repository_url) + continue + + project = repository.project + if project: + project.repositories.add(repository) + synced_projects.append(project) + + Project.bulk_save(synced_projects) From 1024bc737e80c0f1107aefe1cd27b5775e4fc6ce Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Tue, 24 Jun 2025 22:18:44 +0530 Subject: [PATCH 2/5] refactor: better error handling, extract some code into a function --- .../github_update_external_repositories.py | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/backend/apps/github/management/commands/github_update_external_repositories.py b/backend/apps/github/management/commands/github_update_external_repositories.py index dc3741978..055188fa0 100644 --- a/backend/apps/github/management/commands/github_update_external_repositories.py +++ b/backend/apps/github/management/commands/github_update_external_repositories.py @@ -29,54 +29,66 @@ def handle(self, *_args, **options) -> None: **options: Arbitrary keyword arguments containing command options. """ - try: - gh = github.Github(os.getenv("GITHUB_TOKEN"), per_page=GITHUB_ITEMS_PER_PAGE) - except BadCredentialsException: + github_token = os.getenv("GITHUB_TOKEN") + if not github_token: logger.warning( - "Invalid GitHub token. Please create and update .env file with a valid token." + "Github token not found, please update .env file with a valid GITHUB_TOKEN" ) return + gh = github.Github(github_token, per_page=GITHUB_ITEMS_PER_PAGE) + external_organizations = Organization.objects.filter( is_owasp_related_organization=True ).exclude(login=OWASP_ORGANIZATION_NAME) org_count = external_organizations.count() - synced_projects = [] for org_idx, ext_org in enumerate(external_organizations): print(f"Processing organization {org_idx + 1}/{org_count}: {ext_org.login}") try: gh_organization = gh.get_organization(ext_org.login) - except Exception: - logger.exception("Failed fetching GitHub org %s", ext_org.login) - continue - - try: gh_repositories = gh_organization.get_repos( type="public", sort="created", direction="desc", ) - gh_repositories_count = gh_repositories.totalCount + except BadCredentialsException: + logger.warning("Invalid GitHub token. Please update .env file with a valid token.") + return except Exception: logger.exception("Failed fetching GitHub repository for org %s", ext_org.login) continue - for repo_idx, gh_repository in enumerate(gh_repositories): - entity_key = gh_repository.name.lower() - org_key = ext_org.login.lower() - repository_url = f"https://github.com/{org_key}/{entity_key}" - print(f"{repo_idx + 1}/{gh_repositories_count}: {repository_url}") + projects = self.sync_organization_repositories(ext_org, gh_repositories) + if projects: + Project.bulk_save(projects) + + def sync_organization_repositories(self, external_org, gh_repositories): + """Sync GitHub repositories for a given external organization. - try: - organization, repository = sync_repository(gh_repository) - except Exception: - logger.exception("Error syncing repository %s", repository_url) - continue + Sync repositories and return the updated projects. + + """ + gh_repositories_count = gh_repositories.totalCount + projects = [] + for repo_idx, gh_repository in enumerate(gh_repositories): + entity_key = gh_repository.name.lower() + org_key = external_org.login.lower() + repository_url = f"https://github.com/{org_key}/{entity_key}" + print(f"{repo_idx + 1}/{gh_repositories_count}: {repository_url}") + + try: + organization, repository = sync_repository(gh_repository) + except BadCredentialsException: + logger.warning("Invalid GitHub token. Please update .env file with a valid token.") + return None + except Exception: + logger.exception("Error syncing repository %s", repository_url) + continue - project = repository.project - if project: - project.repositories.add(repository) - synced_projects.append(project) + project = repository.project + if project: + project.repositories.add(repository) + projects.append(project) - Project.bulk_save(synced_projects) + return projects From b66a3bf9c77a0bf09aa43dd57d92d4dcf5810ceb Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Wed, 25 Jun 2025 21:11:50 +0530 Subject: [PATCH 3/5] feat: add tests for github_update_external_repositories --- ...ithub_update_external_repositories_test.py | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 backend/tests/apps/github/management/commands/github_update_external_repositories_test.py diff --git a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py new file mode 100644 index 000000000..06f6bf01d --- /dev/null +++ b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py @@ -0,0 +1,122 @@ +from unittest import mock + +import pytest + +from apps.github.management.commands.github_update_external_repositories import ( + GITHUB_ITEMS_PER_PAGE, + Command, + Organization, + Project +) + +@pytest.fixture +def command(): + return Command() + +@pytest.fixture +def mock_gh_repository(): + repo = mock.Mock() + repo.name = "test-repo" + repo.html_url = "https://github.com/TestOrg/test-repo" + return repo + +@pytest.mark.parametrize( + "num_orgs, num_repos_per_org, expected_sync_calls, expected_bulk_save_calls", + [ + (1, [2], 2, 1), # 1 org with 2 repos + (2, [1, 3], 4, 2), # 2 orgs with 1 and 3 repos + (2, [0, 2], 2, 1), # 1 org with repos, 1 without + ] +) +@mock.patch.dict("os.environ", {"GITHUB_TOKEN": "valid-token"}) +@mock.patch("apps.github.management.commands.github_update_external_repositories.github.Github") +@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") +@mock.patch("apps.github.management.commands.github_update_external_repositories.Project.bulk_save") +@mock.patch("apps.github.management.commands.github_update_external_repositories.Organization.objects.filter") +def test_handle_success( + mock_org_filter, + mock_bulk_save, + mock_sync_repository, + mock_github, + command, + mock_gh_repository, + num_orgs, + num_repos_per_org, + expected_sync_calls, + expected_bulk_save_calls +): + """Test command execution with varying organizations and repositories.""" + mock_gh = mock_github.return_value + + orgs = [] + gh_orgs = [] + for i in range(num_orgs): + org = mock.Mock(spec=Organization) + org.login = f"TestOrg{i+1}" + org.is_owasp_related_organization = True + orgs.append(org) + + gh_org = mock.Mock() + gh_repos = mock.MagicMock() + gh_repos.totalCount = num_repos_per_org[i] + gh_repos.__iter__.return_value = [mock_gh_repository] * num_repos_per_org[i] + gh_org.get_repos.return_value = gh_repos + gh_orgs.append(gh_org) + + qs_mock = mock.MagicMock() + qs_mock.count.return_value = num_orgs + qs_mock.__iter__.return_value = orgs + mock_org_filter.return_value.exclude.return_value = qs_mock + + mock_gh.get_organization.side_effect = gh_orgs + + mock_project = mock.Mock(spec=Project) + mock_sync_repository.side_effect = lambda gh_repo: (orgs[0], mock.Mock(project=mock_project)) + + with mock.patch("builtins.print"): + command.handle() + + mock_github.assert_called_once_with("valid-token", per_page=GITHUB_ITEMS_PER_PAGE) + + assert mock_gh.get_organization.call_count == num_orgs + for i, org in enumerate(orgs): + mock_gh.get_organization.assert_any_call(org.login) + + assert mock_sync_repository.call_count == expected_sync_calls + + assert mock_bulk_save.call_count == expected_bulk_save_calls + +@pytest.mark.parametrize( + "num_repos, expected_project_count", + [ + (1, 1), # 1 repo with 1 project + (3, 3), # 3 repos with 3 projects + (2, 0), # 2 repos without 0 projects + ] +) +@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") +def test_sync_organization_repositories( + mock_sync_repository, + command, + mock_gh_repository, + num_repos, + expected_project_count +): + """Test repository synchronization with varying repository counts.""" + mock_organization = mock.Mock(spec=Organization) + mock_organization.login = "TestOrg" + + mock_repositories = mock.MagicMock() + mock_repositories.totalCount = num_repos + mock_repositories.__iter__.return_value = [mock_gh_repository] * num_repos + + mock_project = mock.Mock(spec=Project) + if expected_project_count > 0: + mock_sync_repository.return_value = (mock_organization, mock.Mock(project=mock_project)) + else: + mock_sync_repository.return_value = (mock_organization, mock.Mock(project=None)) + + projects = command.sync_organization_repositories(mock_organization, mock_repositories) + + assert len(projects) == expected_project_count + assert mock_sync_repository.call_count == num_repos \ No newline at end of file From 7e7190395d8111151be0224ccfb80db303995ee3 Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Wed, 25 Jun 2025 21:19:42 +0530 Subject: [PATCH 4/5] refactor: lint, use tuple for parameters, remove unused lambda --- ...ithub_update_external_repositories_test.py | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py index 06f6bf01d..e2a750d71 100644 --- a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py +++ b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py @@ -6,13 +6,15 @@ GITHUB_ITEMS_PER_PAGE, Command, Organization, - Project + Project, ) + @pytest.fixture def command(): return Command() + @pytest.fixture def mock_gh_repository(): repo = mock.Mock() @@ -20,20 +22,25 @@ def mock_gh_repository(): repo.html_url = "https://github.com/TestOrg/test-repo" return repo + @pytest.mark.parametrize( - "num_orgs, num_repos_per_org, expected_sync_calls, expected_bulk_save_calls", + ("num_orgs", "num_repos_per_org", "expected_sync_calls", "expected_bulk_save_calls"), [ - (1, [2], 2, 1), # 1 org with 2 repos + (1, [2], 2, 1), # 1 org with 2 repos (2, [1, 3], 4, 2), # 2 orgs with 1 and 3 repos (2, [0, 2], 2, 1), # 1 org with repos, 1 without - ] + ], ) @mock.patch.dict("os.environ", {"GITHUB_TOKEN": "valid-token"}) @mock.patch("apps.github.management.commands.github_update_external_repositories.github.Github") @mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") -@mock.patch("apps.github.management.commands.github_update_external_repositories.Project.bulk_save") -@mock.patch("apps.github.management.commands.github_update_external_repositories.Organization.objects.filter") -def test_handle_success( +@mock.patch( + "apps.github.management.commands.github_update_external_repositories.Project.bulk_save" +) +@mock.patch( + "apps.github.management.commands.github_update_external_repositories.Organization.objects.filter" +) +def test_handle( mock_org_filter, mock_bulk_save, mock_sync_repository, @@ -43,7 +50,7 @@ def test_handle_success( num_orgs, num_repos_per_org, expected_sync_calls, - expected_bulk_save_calls + expected_bulk_save_calls, ): """Test command execution with varying organizations and repositories.""" mock_gh = mock_github.return_value @@ -52,7 +59,7 @@ def test_handle_success( gh_orgs = [] for i in range(num_orgs): org = mock.Mock(spec=Organization) - org.login = f"TestOrg{i+1}" + org.login = f"TestOrg{i + 1}" org.is_owasp_related_organization = True orgs.append(org) @@ -71,7 +78,7 @@ def test_handle_success( mock_gh.get_organization.side_effect = gh_orgs mock_project = mock.Mock(spec=Project) - mock_sync_repository.side_effect = lambda gh_repo: (orgs[0], mock.Mock(project=mock_project)) + mock_sync_repository.side_effect = lambda _: (orgs[0], mock.Mock(project=mock_project)) with mock.patch("builtins.print"): command.handle() @@ -79,28 +86,25 @@ def test_handle_success( mock_github.assert_called_once_with("valid-token", per_page=GITHUB_ITEMS_PER_PAGE) assert mock_gh.get_organization.call_count == num_orgs - for i, org in enumerate(orgs): + for org in orgs: mock_gh.get_organization.assert_any_call(org.login) assert mock_sync_repository.call_count == expected_sync_calls assert mock_bulk_save.call_count == expected_bulk_save_calls + @pytest.mark.parametrize( - "num_repos, expected_project_count", + ("num_repos", "expected_project_count"), [ (1, 1), # 1 repo with 1 project (3, 3), # 3 repos with 3 projects (2, 0), # 2 repos without 0 projects - ] + ], ) @mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository") def test_sync_organization_repositories( - mock_sync_repository, - command, - mock_gh_repository, - num_repos, - expected_project_count + mock_sync_repository, command, mock_gh_repository, num_repos, expected_project_count ): """Test repository synchronization with varying repository counts.""" mock_organization = mock.Mock(spec=Organization) @@ -119,4 +123,4 @@ def test_sync_organization_repositories( projects = command.sync_organization_repositories(mock_organization, mock_repositories) assert len(projects) == expected_project_count - assert mock_sync_repository.call_count == num_repos \ No newline at end of file + assert mock_sync_repository.call_count == num_repos From 6723f393fb2fd3c2d8b3493d07bca5093e54f840 Mon Sep 17 00:00:00 2001 From: Rudransh Shrivastava Date: Wed, 25 Jun 2025 22:02:27 +0530 Subject: [PATCH 5/5] refactor: make code more maintainable, fix: mock behavior to represent sync logic --- ...ithub_update_external_repositories_test.py | 83 ++++++++++++------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py index e2a750d71..f791709af 100644 --- a/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py +++ b/backend/tests/apps/github/management/commands/github_update_external_repositories_test.py @@ -1,3 +1,4 @@ +from typing import NamedTuple from unittest import mock import pytest @@ -23,12 +24,44 @@ def mock_gh_repository(): return repo +def setup_organizations(num_orgs, num_repos_per_org): + orgs = [] + gh_orgs = [] + for i in range(num_orgs): + org = mock.Mock(spec=Organization) + org.login = f"TestOrg{i + 1}" + org.is_owasp_related_organization = True + orgs.append(org) + + gh_org = mock.Mock() + gh_repos = mock.MagicMock() + gh_repos.totalCount = num_repos_per_org[i] + gh_repos.__iter__.return_value = [mock_gh_repository] * num_repos_per_org[i] + gh_org.get_repos.return_value = gh_repos + gh_orgs.append(gh_org) + + return (orgs, gh_orgs) + + +class Scenario(NamedTuple): + num_orgs: int + num_repos_per_org: list[int] + expected_sync_calls: int + expected_bulk_save_calls: int + + @pytest.mark.parametrize( - ("num_orgs", "num_repos_per_org", "expected_sync_calls", "expected_bulk_save_calls"), + "scenario", [ - (1, [2], 2, 1), # 1 org with 2 repos - (2, [1, 3], 4, 2), # 2 orgs with 1 and 3 repos - (2, [0, 2], 2, 1), # 1 org with repos, 1 without + Scenario( + num_orgs=1, num_repos_per_org=[2], expected_sync_calls=2, expected_bulk_save_calls=1 + ), # 1 org with 2 repos + Scenario( + num_orgs=2, num_repos_per_org=[1, 3], expected_sync_calls=4, expected_bulk_save_calls=2 + ), # 2 orgs with 1 and 3 repos + Scenario( + num_orgs=2, num_repos_per_org=[0, 2], expected_sync_calls=2, expected_bulk_save_calls=1 + ), # 1 org with 2 repos, 1 without any repos ], ) @mock.patch.dict("os.environ", {"GITHUB_TOKEN": "valid-token"}) @@ -46,60 +79,48 @@ def test_handle( mock_sync_repository, mock_github, command, - mock_gh_repository, - num_orgs, - num_repos_per_org, - expected_sync_calls, - expected_bulk_save_calls, + scenario, ): """Test command execution with varying organizations and repositories.""" mock_gh = mock_github.return_value - orgs = [] - gh_orgs = [] - for i in range(num_orgs): - org = mock.Mock(spec=Organization) - org.login = f"TestOrg{i + 1}" - org.is_owasp_related_organization = True - orgs.append(org) - - gh_org = mock.Mock() - gh_repos = mock.MagicMock() - gh_repos.totalCount = num_repos_per_org[i] - gh_repos.__iter__.return_value = [mock_gh_repository] * num_repos_per_org[i] - gh_org.get_repos.return_value = gh_repos - gh_orgs.append(gh_org) + orgs, gh_orgs = setup_organizations(scenario.num_orgs, scenario.num_repos_per_org) qs_mock = mock.MagicMock() - qs_mock.count.return_value = num_orgs + qs_mock.count.return_value = scenario.num_orgs qs_mock.__iter__.return_value = orgs mock_org_filter.return_value.exclude.return_value = qs_mock mock_gh.get_organization.side_effect = gh_orgs mock_project = mock.Mock(spec=Project) - mock_sync_repository.side_effect = lambda _: (orgs[0], mock.Mock(project=mock_project)) + + def sync_side_effect(repo): + org_index = hash(repo.name) % len(orgs) if orgs else 0 + return (orgs[org_index], mock.Mock(project=mock_project)) + + mock_sync_repository.side_effect = sync_side_effect with mock.patch("builtins.print"): command.handle() mock_github.assert_called_once_with("valid-token", per_page=GITHUB_ITEMS_PER_PAGE) - assert mock_gh.get_organization.call_count == num_orgs + assert mock_gh.get_organization.call_count == scenario.num_orgs for org in orgs: mock_gh.get_organization.assert_any_call(org.login) - assert mock_sync_repository.call_count == expected_sync_calls + assert mock_sync_repository.call_count == scenario.expected_sync_calls - assert mock_bulk_save.call_count == expected_bulk_save_calls + assert mock_bulk_save.call_count == scenario.expected_bulk_save_calls @pytest.mark.parametrize( ("num_repos", "expected_project_count"), [ - (1, 1), # 1 repo with 1 project - (3, 3), # 3 repos with 3 projects - (2, 0), # 2 repos without 0 projects + (1, 1), # 1 repo that successfully syncs to 1 project + (3, 3), # 3 repos that successfully sync to 3 projects + (2, 0), # 2 repos that fail to sync (return None projects) ], ) @mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository")