Skip to content

Commit

Permalink
Additional filter for commented issues and PRs
Browse files Browse the repository at this point in the history
GH search API unable to identify if the issues and PRs were updated by
the user's comment. To make it reliable, one needs to search the
comments for the entity after the `since` date, sort them by created_at
field, and check if the comments in the given interval are created by
the user.
  • Loading branch information
Felixoid committed Mar 26, 2024
1 parent cf0c25f commit b7983a0
Showing 1 changed file with 63 additions and 34 deletions.
97 changes: 63 additions & 34 deletions did/plugins/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import requests

from did.base import Config, ReportError, delta, get_token
from did.base import Config, Date, ReportError, delta, get_token
from did.stats import Stats, StatsGroup
from did.utils import listed, log, pretty

Expand Down Expand Up @@ -57,45 +57,70 @@ def __init__(self, url, token):

@staticmethod
def until(until):
"""Issue #362: until for GH should have - delta(day=1)"""
""" Issue #362: until for GH should have - delta(day=1) """
return until - 1

@staticmethod
def request_data(url, headers):
""" Fetch the URL from GitHub API and deserialize it to JSON """
log.debug(f"GitHub URL: {url}")
try:
response = requests.get(url, headers=headers)
log.debug(f"Response headers:\n{response.headers}")
except requests.exceptions.RequestException as error:
log.debug(error)
raise ReportError(f"GitHub failed to request URL {url}.")

# Check if credentials are valid
log.debug(f"GitHub status code: {response.status_code}")
if response.status_code == 401:
raise ReportError(
"Defined token is not valid. "
"Either update it or remove it.")

# Parse fetched json data
try:
data = json.loads(response.text)
except requests.exceptions.JSONDecodeError as error:
log.debug(error)
raise ReportError(f"GitHub JSON failed: {response.text}.")
if data["message"].startswith("API rate limit exceeded"):
raise ReportError(
"GitHub API rate limit exceeded. "
"Consider creating an access token.")
return data, response

def has_comments(self, issue_data, user, since, until):
url = issue_data["comments_url"]
if not url:
return False

url = f"{url}?per_page={PER_PAGE}&sort=created&since={since}"

while True:
comments, response = self.request_data(url, self.headers)
for comment in comments:
date = Date(comment["created_at"].split("T", 1)[0])
if date.date > until:
return False
if user == comment["user"]["login"]:
return True
# Update url to the next page, break if no next page
# provided
if 'next' in response.links:
url = response.links['next']['url']
else:
break
return False

def search(self, query):
""" Perform GitHub query """
result = []
url = self.url + "/" + query + f"&per_page={PER_PAGE}"

while True:
# Fetch the query
log.debug(f"GitHub query: {url}")
try:
response = requests.get(url, headers=self.headers)
log.debug(f"Response headers:\n{response.headers}")
except requests.exceptions.RequestException as error:
log.debug(error)
raise ReportError(f"GitHub search on {self.url} failed.")

# Check if credentials are valid
log.debug(f"GitHub status code: {response.status_code}")
if response.status_code == 401:
raise ReportError(
"Defined token is not valid. "
"Either update it or remove it.")

# Parse fetched json data
try:
data = json.loads(response.text)["items"]
result.extend(data)
except KeyError:
if json.loads(response.text)["message"].startswith(
"API rate limit exceeded"):
raise ReportError(
"GitHub API rate limit exceeded. "
"Consider creating an access token.")
except requests.exceptions.JSONDecodeError as error:
log.debug(error)
raise ReportError(f"GitHub JSON failed: {response.text}.")

data, response = self.request_data(url, self.headers)
result.extend(data["items"])
# Update url to the next page, break if no next page
# provided
if 'next' in response.links:
Expand Down Expand Up @@ -190,7 +215,9 @@ def fetch(self):
user, since, until)
query += "+type:issue"
self.stats = [
Issue(issue, self.parent) for issue in self.parent.github.search(query)]
Issue(issue, self.parent) for issue in self.parent.github.search(query)
# Additional filter for the comments by user in the interval
if self.parent.github.has_comments(issue, user, since, until)]


class PullRequestsCreated(Stats):
Expand Down Expand Up @@ -219,7 +246,9 @@ def fetch(self):
self.user.login, self.options.since, until)
query += "+type:pr"
self.stats = [
Issue(issue, self.parent) for issue in self.parent.github.search(query)]
Issue(issue, self.parent) for issue in self.parent.github.search(query)
# Additional filter for the comments by user in the interval
if self.parent.github.has_comments(issue, user, since, until)]


class PullRequestsClosed(Stats):
Expand Down

0 comments on commit b7983a0

Please sign in to comment.