Skip to content

feat: implement two-person reviewed check #492

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,5 @@ hostname = search.maven.org
# The search REST API. See https://central.sonatype.org/search/rest-api-guide/
search_endpoint = solrsearch/select
request_timeout = 20
[check.two_person]
required_reviewers = 1
224 changes: 224 additions & 0 deletions src/macaron/slsa_analyzer/checks/two_person_reviewed_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the TwoPersonReviewedCheck class."""

import logging
import os

from sqlalchemy import ForeignKey
from sqlalchemy.orm import Mapped, mapped_column

from macaron.config.defaults import defaults
from macaron.database.database_manager import ORMBase
from macaron.database.table_definitions import CheckFacts
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.checks.base_check import BaseCheck
from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType
from macaron.slsa_analyzer.git_service.api_client import GhAPIClient
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.slsa_req import ReqName

logger: logging.Logger = logging.getLogger(__name__)


class TwoPersonReviewedTable(CheckFacts, ORMBase):
"""Check result table for two-person_reviewed."""

__tablename__ = "_two_person_reviewed_check"
# The primary key.
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
__mapper_args__ = {
"polymorphic_identity": "_two_person_reviewed_check",
}


class TwoPersonReviewedCheck(BaseCheck):
"""This Check checks whether the target submitted code has been reviewed by two people."""

def __init__(self) -> None:
"""Initiate the BuildScriptCheck instance."""
check_id = "mcn_two_person_reviewed_1"
description = "Check whether the merged pull requests has been reviewd and approved by at least one reviewer."
depends_on: list[tuple[str, CheckResultType]] = []
eval_reqs = [ReqName.TWO_PERSON_REVIEWED]
super().__init__(
check_id=check_id,
description=description,
depends_on=depends_on,
eval_reqs=eval_reqs,
# result_on_skip=CheckResultType.FAILED,
)

def _get_graphql_query(self, with_commit_sha: bool) -> str:
"""Get the graphql query based on whether the commit sha is provided.

Parameters
----------
with_commit_sha : bool
Whether providing commit sha.
commit_sha : str | None
The commit sha provided by user.

Returns
-------
str
The graphql query
"""
if with_commit_sha:
return """
query ($owner: String!, $name: String!, $commit_sha: String!) {
repository(owner: $owner, name: $name) {
object(expression: $commit_sha) {
... on Commit {
associatedPullRequests(first: 10) {
edges {
node {
reviewDecision
state
baseRefName
author {
__typename
}
mergedBy {
__typename
}
}
}
}
}
}
}
}
"""
return """
query ($owner: String!, $name: String!, $cursor: String, $branch_name: String) {
repository(owner: $owner, name: $name) {
pullRequests(first: 100, states: MERGED, after: $cursor, baseRefName: $branch_name) {
totalCount
pageInfo {
hasNextPage
endCursor
}
edges {
node {
reviewDecision
author {
__typename
}
mergedBy {
__typename
}
}
}
}
}
}
"""

def _extract_data(self, ctx: AnalyzeContext, client: GhAPIClient, commit_sha: str | None) -> dict:
"""Implement the check in this method.

Parameters
----------
ctx : AnalyzeContext
The object containing processed data for the target repo.
check_result : CheckResult
The object containing result data of a check.

Returns
-------
CheckResultType
The result type of the check (e.g. PASSED).
"""
approved_pr_num = 0
merged_pr_num = 0
has_next_page = True
end_cursor = None
variables = {
"owner": ctx.component.repository.owner,
"name": ctx.component.repository.name,
"cursor": end_cursor,
"branch_name": ctx.component.repository.branch_name,
}

# "commitSha": ctx.component.repository.commit_sha,
if commit_sha:
variables["commit_sha"] = commit_sha
result = client.graphql_fetch_associated_prs(variables=variables)
merged_pr_num = result["merged_pr_num"]
approved_pr_num = result["approved_pr_num"]
else:
while has_next_page:
variables["end_cursor"] = end_cursor
pull_requests = client.graphql_fetch_pull_requests(variables=variables)
merged_pr_num = pull_requests["merged_pr_num"]
has_next_page = pull_requests["has_next_page"]
end_cursor = pull_requests["end_cursor"]
approved_pr_num += pull_requests["approved_pr_num"]
# logger.info(f"[merge_pr]: {merged_pr_num} / [approved_pr]: {approved_pr_num}")

return {"approved_pr_num": approved_pr_num, "merged_pr_num": merged_pr_num}

def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResultType:
"""Implement the check in this method.

Parameters
----------
ctx : AnalyzeContext
The object containing processed data for the target repo.
check_result : CheckResult
The object containing result data of a check.

Returns
-------
CheckResultType
The result type of the check (e.g. PASSED).
"""
check_result["result_tables"] = [TwoPersonReviewedTable()]
required_reviewers = defaults.get_list("check.two_person", "required_reviewers", fallback=[])
logger.info("Reviewers number required: %s", {required_reviewers[0]})
commit_sha: str | None = ctx.component.repository.commit_sha
with_commit_sha: bool = bool(commit_sha)
query: str = self._get_graphql_query(with_commit_sha=with_commit_sha)
token = os.getenv("GITHUB_TOKEN")
headers: dict = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
client = GhAPIClient(
{
"headers": headers,
"query": query,
}
)
# TODO filter mannequin from with merge-by
# GitHub GraphQL API endpoint

data = self._extract_data(
ctx=ctx,
client=client,
commit_sha=commit_sha,
)

approved_pr_num: int = data["approved_pr_num"]
merged_pr_num: int = data["merged_pr_num"]

logger.info(
"%d pull requests have been reviewed by at least two person, and the pass rate is %d / %d",
approved_pr_num,
approved_pr_num,
merged_pr_num,
)
check_result["justification"].extend(
[
f"{str(approved_pr_num)} pull requests have been reviewed by at least two person.",
f"The pass rate is {str(approved_pr_num)} / {str(merged_pr_num)}",
]
)
if approved_pr_num == merged_pr_num:
return CheckResultType.PASSED
return CheckResultType.FAILED


registry.register(TwoPersonReviewedCheck())
108 changes: 107 additions & 1 deletion src/macaron/slsa_analyzer/git_service/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from macaron.config.defaults import defaults
from macaron.slsa_analyzer.asset import AssetLocator
from macaron.util import construct_query, download_github_build_log, send_get_http, send_get_http_raw
from macaron.util import construct_query, download_github_build_log, send_get_http, send_get_http_raw, send_post_graphql

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -620,6 +620,112 @@ def download_asset(self, url: str, download_path: str) -> bool:

return True

def graphql_fetch_associated_prs(self, variables: dict) -> dict:
"""Fetch the associated pull requests given the user provided digest.

Parameters
----------
variables : dict
The variables that are passed to the graphql query.

Returns
-------
dict
The results for one page of the pull requests' data.
"""
url = "https://api.github.com/graphql"

response = send_post_graphql(
url=url, query=self.query_list, timeout=None, headers=self.headers, variables=variables
) # nosec B113:request_without_timeout

if response is None:
return {}

response_json = response.json()

approved_pr_num = 0
merged_pr_num = 0
ignore_analyse_list = ["Bot"]
branch_name = variables["branch_name"]
edges = response_json.get("data").get("repository").get("object").get("associatedPullRequests").get("edges")
for edge in edges:
node = edge.get("node")
review_decision = node.get("reviewDecision")
state = node.get("state")
base_ref_name = node.get("baseRefName") # branch name
author = node.get("author").get("__typename")
merge_by = node.get("mergedBy").get("__typename")
if author in ignore_analyse_list or merge_by in ignore_analyse_list:
continue
if base_ref_name == branch_name and state == "MERGED":
merged_pr_num += 1
if review_decision == "APPROVED":
approved_pr_num += 1
return {"merged_pr_num": merged_pr_num, "approved_pr_num": approved_pr_num}

def graphql_fetch_pull_requests(self, variables: dict) -> dict:
"""Fetch the pull requests from the specified branch (if specified).

Parameters
----------
url : str
The graphql URL.
variables : dict
The variables that are passed to the graphql query.

Returns
-------
dict
The results for one page of the pull requests' data.
"""

def filter_response(approved_pr_num: int) -> int:
"""Filter the merges that are trigger by the dependent bot, and only remain the approved pull requests.

Parameters
----------
approved_pr_num : int
The number of the pull requests are merged and approved by the reviewers.

Returns
-------
tuple
The dependabot_num and approved_pr_num cumulative results.
"""
ignore_analyse_list = ["Bot"]
for edge in response_json.get("data").get("repository").get("pullRequests").get("edges"):
node = edge.get("node")
review_decision = node.get("reviewDecision")
author = node.get("author").get("__typename")
merge_by = node.get("mergedBy").get("__typename")
if author in ignore_analyse_list or merge_by in ignore_analyse_list:
continue
if review_decision == "APPROVED":
approved_pr_num += 1
return approved_pr_num

url = "https://api.github.com/graphql"
response = send_post_graphql(
url=url, query=self.query_list, timeout=None, headers=self.headers, variables=variables
) # nosec B113:request_without_timeout

if response is None:
return {}

response_json = response.json()
approved_pr_num = 0
filtered_response = filter_response(approved_pr_num)

pull_requests = response_json.get("data").get("repository").get("pullRequests")

return {
"merged_pr_num": pull_requests.get("totalCount"), # nosec B113:request_without_timeout
"has_next_page": pull_requests.get("pageInfo").get("hasNextPage"), # nosec B113:request_without_timeout
"end_cursor": pull_requests.get("pageInfo").get("endCursor"), # nosec B113:request_without_timeout,
"approved_pr_num": filtered_response, # nosec B113:request_without_timeout,
}


def get_default_gh_client(access_token: str) -> GhAPIClient:
"""Return a GhAPIClient instance with default values.
Expand Down
Loading