Skip to content

Add a Command to Sync External OWASP Repositories #1656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ github-update-project-related-repositories:
@echo "Updating OWASP project related GitHub repositories"
@CMD="python manage.py github_update_project_related_repositories" $(MAKE) exec-backend-command

github-update-external-repositories:
@echo "Updating external OWASP GitHub repositories"
@CMD="python manage.py github_update_external_repositories" $(MAKE) exec-backend-command

github-update-users:
@echo "Updating GitHub users"
@CMD="python manage.py github_update_users" $(MAKE) exec-backend-command
Expand Down Expand Up @@ -196,6 +200,7 @@ update-data: \
owasp-scrape-committees \
owasp-scrape-projects \
github-update-project-related-repositories \
github-update-external-repositories \
github-update-users \
owasp-aggregate-projects \
owasp-update-events \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""A command to update external OWASP repositories from GitHub data."""

import logging
import os

import github
from django.core.management.base import BaseCommand
from github.GithubException import BadCredentialsException

from apps.github.common import sync_repository
from apps.github.constants import GITHUB_ITEMS_PER_PAGE
from apps.github.models.organization import Organization
from apps.owasp.constants import OWASP_ORGANIZATION_NAME
from apps.owasp.models.project import Project

logger: logging.Logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""Fetch external OWASP GitHub repositories and update relevant entities."""

help = "Fetch external OWASP GitHub repositories and update relevant entities."

def handle(self, *_args, **options) -> None:
"""Handle the command execution.

Args:
*_args: Variable length argument list.
**options: Arbitrary keyword arguments containing command options.

"""
github_token = os.getenv("GITHUB_TOKEN")
if not github_token:
logger.warning(
"Github token not found, please update .env file with a valid GITHUB_TOKEN"
)
return

gh = github.Github(github_token, per_page=GITHUB_ITEMS_PER_PAGE)

external_organizations = Organization.objects.filter(
is_owasp_related_organization=True
).exclude(login=OWASP_ORGANIZATION_NAME)
org_count = external_organizations.count()

for org_idx, ext_org in enumerate(external_organizations):
print(f"Processing organization {org_idx + 1}/{org_count}: {ext_org.login}")
try:
gh_organization = gh.get_organization(ext_org.login)
gh_repositories = gh_organization.get_repos(
type="public",
sort="created",
direction="desc",
)
except BadCredentialsException:
logger.warning("Invalid GitHub token. Please update .env file with a valid token.")
return
except Exception:
logger.exception("Failed fetching GitHub repository for org %s", ext_org.login)
continue

projects = self.sync_organization_repositories(ext_org, gh_repositories)
if projects:
Project.bulk_save(projects)

def sync_organization_repositories(self, external_org, gh_repositories):
"""Sync GitHub repositories for a given external organization.

Sync repositories and return the updated projects.

"""
gh_repositories_count = gh_repositories.totalCount
projects = []
for repo_idx, gh_repository in enumerate(gh_repositories):
entity_key = gh_repository.name.lower()
org_key = external_org.login.lower()
repository_url = f"https://github.com/{org_key}/{entity_key}"
print(f"{repo_idx + 1}/{gh_repositories_count}: {repository_url}")

try:
organization, repository = sync_repository(gh_repository)
except BadCredentialsException:
logger.warning("Invalid GitHub token. Please update .env file with a valid token.")
return None
except Exception:
logger.exception("Error syncing repository %s", repository_url)
continue

project = repository.project
if project:
project.repositories.add(repository)
projects.append(project)

return projects
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from typing import NamedTuple
from unittest import mock

import pytest

from apps.github.management.commands.github_update_external_repositories import (
GITHUB_ITEMS_PER_PAGE,
Command,
Organization,
Project,
)


@pytest.fixture
def command():
return Command()


@pytest.fixture
def mock_gh_repository():
repo = mock.Mock()
repo.name = "test-repo"
repo.html_url = "https://github.com/TestOrg/test-repo"
return repo


def setup_organizations(num_orgs, num_repos_per_org):
orgs = []
gh_orgs = []
for i in range(num_orgs):
org = mock.Mock(spec=Organization)
org.login = f"TestOrg{i + 1}"
org.is_owasp_related_organization = True
orgs.append(org)

gh_org = mock.Mock()
gh_repos = mock.MagicMock()
gh_repos.totalCount = num_repos_per_org[i]
gh_repos.__iter__.return_value = [mock_gh_repository] * num_repos_per_org[i]
gh_org.get_repos.return_value = gh_repos
gh_orgs.append(gh_org)

return (orgs, gh_orgs)


class Scenario(NamedTuple):
num_orgs: int
num_repos_per_org: list[int]
expected_sync_calls: int
expected_bulk_save_calls: int


@pytest.mark.parametrize(
"scenario",
[
Scenario(
num_orgs=1, num_repos_per_org=[2], expected_sync_calls=2, expected_bulk_save_calls=1
), # 1 org with 2 repos
Scenario(
num_orgs=2, num_repos_per_org=[1, 3], expected_sync_calls=4, expected_bulk_save_calls=2
), # 2 orgs with 1 and 3 repos
Scenario(
num_orgs=2, num_repos_per_org=[0, 2], expected_sync_calls=2, expected_bulk_save_calls=1
), # 1 org with 2 repos, 1 without any repos
],
)
@mock.patch.dict("os.environ", {"GITHUB_TOKEN": "valid-token"})
@mock.patch("apps.github.management.commands.github_update_external_repositories.github.Github")
@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository")
@mock.patch(
"apps.github.management.commands.github_update_external_repositories.Project.bulk_save"
)
@mock.patch(
"apps.github.management.commands.github_update_external_repositories.Organization.objects.filter"
)
def test_handle(
mock_org_filter,
mock_bulk_save,
mock_sync_repository,
mock_github,
command,
scenario,
):
"""Test command execution with varying organizations and repositories."""
mock_gh = mock_github.return_value

orgs, gh_orgs = setup_organizations(scenario.num_orgs, scenario.num_repos_per_org)

qs_mock = mock.MagicMock()
qs_mock.count.return_value = scenario.num_orgs
qs_mock.__iter__.return_value = orgs
mock_org_filter.return_value.exclude.return_value = qs_mock

mock_gh.get_organization.side_effect = gh_orgs

mock_project = mock.Mock(spec=Project)

def sync_side_effect(repo):
org_index = hash(repo.name) % len(orgs) if orgs else 0
return (orgs[org_index], mock.Mock(project=mock_project))

mock_sync_repository.side_effect = sync_side_effect

with mock.patch("builtins.print"):
command.handle()

mock_github.assert_called_once_with("valid-token", per_page=GITHUB_ITEMS_PER_PAGE)

assert mock_gh.get_organization.call_count == scenario.num_orgs
for org in orgs:
mock_gh.get_organization.assert_any_call(org.login)

assert mock_sync_repository.call_count == scenario.expected_sync_calls

assert mock_bulk_save.call_count == scenario.expected_bulk_save_calls


@pytest.mark.parametrize(
("num_repos", "expected_project_count"),
[
(1, 1), # 1 repo that successfully syncs to 1 project
(3, 3), # 3 repos that successfully sync to 3 projects
(2, 0), # 2 repos that fail to sync (return None projects)
],
)
@mock.patch("apps.github.management.commands.github_update_external_repositories.sync_repository")
def test_sync_organization_repositories(
mock_sync_repository, command, mock_gh_repository, num_repos, expected_project_count
):
"""Test repository synchronization with varying repository counts."""
mock_organization = mock.Mock(spec=Organization)
mock_organization.login = "TestOrg"

mock_repositories = mock.MagicMock()
mock_repositories.totalCount = num_repos
mock_repositories.__iter__.return_value = [mock_gh_repository] * num_repos

mock_project = mock.Mock(spec=Project)
if expected_project_count > 0:
mock_sync_repository.return_value = (mock_organization, mock.Mock(project=mock_project))
else:
mock_sync_repository.return_value = (mock_organization, mock.Mock(project=None))

projects = command.sync_organization_repositories(mock_organization, mock_repositories)

assert len(projects) == expected_project_count
assert mock_sync_repository.call_count == num_repos