From b7983a09ff0ad7722952ce889bb103a7857599ae Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 26 Mar 2024 23:02:30 +0100 Subject: [PATCH] Additional filter for commented issues and PRs GH search API unable to identify if the issues and PRs were updated by the user's comment. To make it reliable, one needs to search the comments for the entity after the `since` date, sort them by created_at field, and check if the comments in the given interval are created by the user. --- did/plugins/github.py | 97 ++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 34 deletions(-) diff --git a/did/plugins/github.py b/did/plugins/github.py index d28e1ae6..77196a20 100644 --- a/did/plugins/github.py +++ b/did/plugins/github.py @@ -27,7 +27,7 @@ import requests -from did.base import Config, ReportError, delta, get_token +from did.base import Config, Date, ReportError, delta, get_token from did.stats import Stats, StatsGroup from did.utils import listed, log, pretty @@ -57,45 +57,70 @@ def __init__(self, url, token): @staticmethod def until(until): - """Issue #362: until for GH should have - delta(day=1)""" + """ Issue #362: until for GH should have - delta(day=1) """ return until - 1 + @staticmethod + def request_data(url, headers): + """ Fetch the URL from GitHub API and deserialize it to JSON """ + log.debug(f"GitHub URL: {url}") + try: + response = requests.get(url, headers=headers) + log.debug(f"Response headers:\n{response.headers}") + except requests.exceptions.RequestException as error: + log.debug(error) + raise ReportError(f"GitHub failed to request URL {url}.") + + # Check if credentials are valid + log.debug(f"GitHub status code: {response.status_code}") + if response.status_code == 401: + raise ReportError( + "Defined token is not valid. " + "Either update it or remove it.") + + # Parse fetched json data + try: + data = json.loads(response.text) + except requests.exceptions.JSONDecodeError as error: + log.debug(error) + raise ReportError(f"GitHub JSON failed: {response.text}.") + if data["message"].startswith("API rate limit exceeded"): + raise ReportError( + "GitHub API rate limit exceeded. " + "Consider creating an access token.") + return data, response + + def has_comments(self, issue_data, user, since, until): + url = issue_data["comments_url"] + if not url: + return False + + url = f"{url}?per_page={PER_PAGE}&sort=created&since={since}" + + while True: + comments, response = self.request_data(url, self.headers) + for comment in comments: + date = Date(comment["created_at"].split("T", 1)[0]) + if date.date > until: + return False + if user == comment["user"]["login"]: + return True + # Update url to the next page, break if no next page + # provided + if 'next' in response.links: + url = response.links['next']['url'] + else: + break + return False + def search(self, query): """ Perform GitHub query """ result = [] url = self.url + "/" + query + f"&per_page={PER_PAGE}" while True: - # Fetch the query - log.debug(f"GitHub query: {url}") - try: - response = requests.get(url, headers=self.headers) - log.debug(f"Response headers:\n{response.headers}") - except requests.exceptions.RequestException as error: - log.debug(error) - raise ReportError(f"GitHub search on {self.url} failed.") - - # Check if credentials are valid - log.debug(f"GitHub status code: {response.status_code}") - if response.status_code == 401: - raise ReportError( - "Defined token is not valid. " - "Either update it or remove it.") - - # Parse fetched json data - try: - data = json.loads(response.text)["items"] - result.extend(data) - except KeyError: - if json.loads(response.text)["message"].startswith( - "API rate limit exceeded"): - raise ReportError( - "GitHub API rate limit exceeded. " - "Consider creating an access token.") - except requests.exceptions.JSONDecodeError as error: - log.debug(error) - raise ReportError(f"GitHub JSON failed: {response.text}.") - + data, response = self.request_data(url, self.headers) + result.extend(data["items"]) # Update url to the next page, break if no next page # provided if 'next' in response.links: @@ -190,7 +215,9 @@ def fetch(self): user, since, until) query += "+type:issue" self.stats = [ - Issue(issue, self.parent) for issue in self.parent.github.search(query)] + Issue(issue, self.parent) for issue in self.parent.github.search(query) + # Additional filter for the comments by user in the interval + if self.parent.github.has_comments(issue, user, since, until)] class PullRequestsCreated(Stats): @@ -219,7 +246,9 @@ def fetch(self): self.user.login, self.options.since, until) query += "+type:pr" self.stats = [ - Issue(issue, self.parent) for issue in self.parent.github.search(query)] + Issue(issue, self.parent) for issue in self.parent.github.search(query) + # Additional filter for the comments by user in the interval + if self.parent.github.has_comments(issue, user, since, until)] class PullRequestsClosed(Stats):