diff --git a/.gitignore b/.gitignore index c4c5b0e..5c031cf 100644 --- a/.gitignore +++ b/.gitignore @@ -160,5 +160,13 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + # MAC .DS_Store + +# Vs Code +.vscode/ +.history/ + +# devstats +devstats-data/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61458df..2272fec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,3 @@ -# Install pre-commit hooks via -# pre-commit install - repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b # frozen: v5.0.0 diff --git a/devstats/__main__.py b/devstats/__main__.py index 6154528..8a6963b 100644 --- a/devstats/__main__.py +++ b/devstats/__main__.py @@ -45,7 +45,7 @@ def query(repo_owner, repo_name, outdir): sys.exit() headers = {"Authorization": f"bearer {token}"} - query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql")) + query_files = sorted(glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))) for n, query in enumerate(query_files): if n != 0: @@ -55,7 +55,7 @@ def query(repo_owner, repo_name, outdir): # Parse query type from gql gql = open(query).read() qtype_match = re.match( - r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues)", + r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues|stargazerCount)", gql, flags=re.MULTILINE, ) @@ -65,7 +65,6 @@ def query(repo_owner, repo_name, outdir): else: qtype = qtype_match.group(1) - qname, qext = os.path.splitext(query) data = GithubGrabber( query, qtype, @@ -74,7 +73,7 @@ def query(repo_owner, repo_name, outdir): repo_name=repo_name, ) data.get() - ftype = {"issues": "issues", "pullRequests": "PRs"} + ftype = {"issues": "issues", "pullRequests": "PRs", "stargazerCount": "stars"} data.dump(f"{outdir}/{repo_name}_{ftype.get(qtype, qtype)}.json") diff --git a/devstats/queries/issue_activity_since_date.gql b/devstats/queries/issue_activity_since_date.gql index 2301975..b30aa85 100644 --- a/devstats/queries/issue_activity_since_date.gql +++ b/devstats/queries/issue_activity_since_date.gql @@ -8,10 +8,16 @@ query { number title createdAt - state - closedAt updatedAt + closedAt + state url + reactionGroups { + content + users { + totalCount + } + } labels(first: 100) { edges { node { diff --git a/devstats/queries/pr_data_query.gql b/devstats/queries/pr_data_query.gql index 55274db..3d33001 100644 --- a/devstats/queries/pr_data_query.gql +++ b/devstats/queries/pr_data_query.gql @@ -9,8 +9,17 @@ query { state title createdAt + updatedAt + closedAt baseRefName mergeable + labels(first: 25) { + edges { + node { + name + } + } + } author{ login } diff --git a/devstats/queries/repo_stars.gql b/devstats/queries/repo_stars.gql new file mode 100644 index 0000000..18e5ed5 --- /dev/null +++ b/devstats/queries/repo_stars.gql @@ -0,0 +1,5 @@ +query { + repository(owner: "_REPO_OWNER_", name: "_REPO_NAME_") { + stargazerCount + } +} \ No newline at end of file diff --git a/devstats/query.py b/devstats/query.py index a16fd00..b388f42 100644 --- a/devstats/query.py +++ b/devstats/query.py @@ -3,7 +3,7 @@ import requests -endpoint = r"https://api.github.com/graphql" +ENDPOINT = r"https://api.github.com/graphql" def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"): @@ -72,10 +72,10 @@ def send_query(query, query_type, headers, cursor=None): This is intended mostly for internal use within `get_all_responses`. """ # TODO: Expand this, either by parsing the query type from the query - # directly or manually adding more query_types to the set - if query_type not in {"issues", "pullRequests"}: + # Directly or manually adding more query_types to the set + if query_type not in {"issues", "pullRequests", "stargazerCount"}: raise ValueError( - "Only 'issues' and 'pullRequests' queries are currently supported" + "Only 'issues', 'pullRequests' and 'stargazerCount' queries are currently supported" ) # TODO: Generalize this # WARNING: The cursor injection depends on the specific structure of the @@ -91,7 +91,7 @@ def send_query(query, query_type, headers, cursor=None): retries = max_retries while retries > 0: try: - response = requests.post(endpoint, json=payload, headers=headers) + response = requests.post(ENDPOINT, json=payload, headers=headers) except ( requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, @@ -138,7 +138,15 @@ def get_all_responses(query, query_type, headers): print("Fetching...", end="", flush=True) rdata = send_query(query, query_type, headers, cursor=last_cursor) try: - pdata, last_cursor, total_count = parse_single_query(rdata, query_type) + # TODO: Generalize this + if query_type == "stargazerCount": + # Special case for stargazerCount + pdata = [{"stargazerCount":rdata["data"]["repository"]["stargazerCount"]}] + last_cursor = None + total_count = 1 + else: + # Normal case for issues/PRs + pdata, last_cursor, total_count = parse_single_query(rdata, query_type) except (KeyError, TypeError): print("Malformed response; repeating request after 1 minute") time.sleep(1 * 60)