Skip to content

Commit a1d3b8d

Browse files
Embed standard queries in library (#7)
* Embed standard queries in library * Fix linting --------- Co-authored-by: Jarrod Millman <[email protected]>
1 parent bfaeffb commit a1d3b8d

File tree

5 files changed

+139
-21
lines changed

5 files changed

+139
-21
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
*~
2+
13
# Byte-compiled / optimized / DLL files
24
__pycache__/
35
*.py[cod]

devstats/__init__.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import sys
44
import json
55
import click
6+
from glob import glob
7+
import re
68

79
try:
810
token = os.environ["GRAPH_API_KEY"]
@@ -102,16 +104,21 @@ def get_all_responses(query, query_type):
102104
Helper function to bypass GitHub GraphQL API node limit.
103105
"""
104106
# Get data from a single response
107+
print(f"Retrieving first page...", end="", flush=True)
105108
initial_data = send_query(query, query_type)
106109
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
107-
print(f"Retrieving {len(data)} out of {total_count} values...")
110+
108111
# Continue requesting data (with pagination) until all are acquired
109112
while len(data) < total_count:
110113
rdata = send_query(query, query_type, cursor=last_cursor)
111114
pdata, last_cursor, _ = parse_single_query(rdata, query_type)
112115
data.extend(pdata)
113-
print(f"Retrieving {len(data)} out of {total_count} values...")
114-
print("Done.")
116+
print(
117+
f"OK\nRetrieving {len(data)} out of {total_count} values...",
118+
end="",
119+
flush=True,
120+
)
121+
print("OK")
115122
return data
116123

117124

@@ -187,6 +194,7 @@ def dump(self, outfile):
187194
raise ValueError("raw_data is currently empty, nothing to dump")
188195

189196
with open(outfile, "w") as outf:
197+
print(f"Writing [{outfile}]")
190198
json.dump(self.raw_data, outf)
191199

192200

@@ -195,24 +203,37 @@ def dump(self, outfile):
195203
@click.argument("repo_name")
196204
def main(repo_owner, repo_name):
197205
"""Download and save issue and pr data for `repo_owner`/`repo_name`."""
198-
# Download issue data
199-
issues = GithubGrabber(
200-
"query_examples/issue_activity_since_date.gql",
201-
"issues",
202-
repo_owner=repo_owner,
203-
repo_name=repo_name,
204-
)
205-
issues.get()
206-
issues.dump(f"{repo_name}_issues.json")
207-
# Download PR data
208-
prs = GithubGrabber(
209-
"query_examples/pr_data_query.gql",
210-
"pullRequests",
211-
repo_owner=repo_owner,
212-
repo_name=repo_name,
213-
)
214-
prs.get()
215-
prs.dump(f"{repo_name}_prs.json")
206+
207+
query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))
208+
209+
for n, query in enumerate(query_files):
210+
if n != 0:
211+
print()
212+
213+
print(f"Query: [{os.path.basename(query)}] on [{repo_owner}/{repo_name}]")
214+
# Parse query type from gql
215+
gql = open(query).read()
216+
qtype_match = re.match(
217+
r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues)",
218+
gql,
219+
flags=re.MULTILINE,
220+
)
221+
if qtype_match is None:
222+
print(f"Could not determine gql query type for {query}")
223+
sys.exit(-1)
224+
else:
225+
qtype = qtype_match.group(1)
226+
227+
qname, qext = os.path.splitext(query)
228+
data = GithubGrabber(
229+
query,
230+
qtype,
231+
repo_owner=repo_owner,
232+
repo_name=repo_name,
233+
)
234+
data.get()
235+
ftype = {"issues": "issues", "pullRequests": "PRs"}
236+
data.dump(f"{repo_name}_{ftype.get(qtype, qtype)}.json")
216237

217238

218239
if __name__ == "__main__":
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
query {
2+
repository(owner: "_REPO_OWNER_", name: "_REPO_NAME_") {
3+
issues(first: 100, filterBy: {since: "2020-01-01T00:00:00Z"}) {
4+
totalCount
5+
edges {
6+
cursor
7+
node {
8+
number
9+
title
10+
createdAt
11+
state
12+
closedAt
13+
updatedAt
14+
url
15+
labels(first: 100) {
16+
edges {
17+
node {
18+
name
19+
}
20+
}
21+
}
22+
timelineItems(first: 100, itemTypes: [CROSS_REFERENCED_EVENT, ISSUE_COMMENT, CLOSED_EVENT]) {
23+
totalCount
24+
edges {
25+
node {
26+
__typename
27+
... on CrossReferencedEvent {
28+
source {
29+
... on Issue {
30+
__typename
31+
number
32+
closed
33+
}
34+
... on PullRequest {
35+
__typename
36+
number
37+
closed
38+
}
39+
}
40+
isCrossRepository
41+
}
42+
... on IssueComment {
43+
author {
44+
login
45+
}
46+
createdAt
47+
}
48+
... on ClosedEvent {
49+
actor {
50+
login
51+
}
52+
}
53+
}
54+
}
55+
}
56+
}
57+
}
58+
}
59+
}
60+
}

devstats/queries/pr_data_query.gql

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
query {
2+
repository(owner:"_REPO_OWNER_", name:"_REPO_NAME_") {
3+
pullRequests(first:100) {
4+
totalCount
5+
edges {
6+
cursor
7+
node{
8+
number
9+
state
10+
title
11+
createdAt
12+
baseRefName
13+
mergeable
14+
author{
15+
login
16+
}
17+
authorAssociation
18+
mergedBy{
19+
login
20+
}
21+
mergedAt
22+
reviews(states:APPROVED){
23+
totalCount
24+
}
25+
participants(first:100){
26+
totalCount
27+
}
28+
}
29+
}
30+
}
31+
}
32+
}

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,6 @@ homepage = "https://github.com/scientific-python/devstats"
3535

3636
[tool.setuptools.packages.find]
3737
include = ["devstats*"]
38+
39+
[tool.setuptools.package-data]
40+
"*" = ["*.gql"]

0 commit comments

Comments
 (0)