Embed standard queries in library (#7)

stefanv · jarrodmillman · web-flow · commit a1d3b8d3ff5b · 2023-05-23T19:21:48.000-05:00
* Embed standard queries in library

* Fix linting

---------

Co-authored-by: Jarrod Millman &lt;jarrod.millman@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+*~
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/devstats/__init__.py b/devstats/__init__.py
@@ -3,6 +3,8 @@
 import sys
 import json
 import click
+from glob import glob
+import re
 
 try:
     token = os.environ["GRAPH_API_KEY"]
@@ -102,16 +104,21 @@ def get_all_responses(query, query_type):
     Helper function to bypass GitHub GraphQL API node limit.
     """
     # Get data from a single response
+    print(f"Retrieving first page...", end="", flush=True)
     initial_data = send_query(query, query_type)
     data, last_cursor, total_count = parse_single_query(initial_data, query_type)
-    print(f"Retrieving {len(data)} out of {total_count} values...")
+
     # Continue requesting data (with pagination) until all are acquired
     while len(data) < total_count:
         rdata = send_query(query, query_type, cursor=last_cursor)
         pdata, last_cursor, _ = parse_single_query(rdata, query_type)
         data.extend(pdata)
-        print(f"Retrieving {len(data)} out of {total_count} values...")
-    print("Done.")
+        print(
+            f"OK\nRetrieving {len(data)} out of {total_count} values...",
+            end="",
+            flush=True,
+        )
+    print("OK")
     return data
 
 
@@ -187,6 +194,7 @@ def dump(self, outfile):
             raise ValueError("raw_data is currently empty, nothing to dump")
 
         with open(outfile, "w") as outf:
+            print(f"Writing [{outfile}]")
             json.dump(self.raw_data, outf)
 
 
@@ -195,24 +203,37 @@ def dump(self, outfile):
 @click.argument("repo_name")
 def main(repo_owner, repo_name):
     """Download and save issue and pr data for `repo_owner`/`repo_name`."""
-    # Download issue data
-    issues = GithubGrabber(
-        "query_examples/issue_activity_since_date.gql",
-        "issues",
-        repo_owner=repo_owner,
-        repo_name=repo_name,
-    )
-    issues.get()
-    issues.dump(f"{repo_name}_issues.json")
-    # Download PR data
-    prs = GithubGrabber(
-        "query_examples/pr_data_query.gql",
-        "pullRequests",
-        repo_owner=repo_owner,
-        repo_name=repo_name,
-    )
-    prs.get()
-    prs.dump(f"{repo_name}_prs.json")
+
+    query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))
+
+    for n, query in enumerate(query_files):
+        if n != 0:
+            print()
+
+        print(f"Query: [{os.path.basename(query)}] on [{repo_owner}/{repo_name}]")
+        # Parse query type from gql
+        gql = open(query).read()
+        qtype_match = re.match(
+            r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues)",
+            gql,
+            flags=re.MULTILINE,
+        )
+        if qtype_match is None:
+            print(f"Could not determine gql query type for {query}")
+            sys.exit(-1)
+        else:
+            qtype = qtype_match.group(1)
+
+        qname, qext = os.path.splitext(query)
+        data = GithubGrabber(
+            query,
+            qtype,
+            repo_owner=repo_owner,
+            repo_name=repo_name,
+        )
+        data.get()
+        ftype = {"issues": "issues", "pullRequests": "PRs"}
+        data.dump(f"{repo_name}_{ftype.get(qtype, qtype)}.json")
 
 
 if __name__ == "__main__":
diff --git a/devstats/queries/issue_activity_since_date.gql b/devstats/queries/issue_activity_since_date.gql
@@ -0,0 +1,60 @@
+query {
+  repository(owner: "_REPO_OWNER_", name: "_REPO_NAME_") {
+    issues(first: 100, filterBy: {since: "2020-01-01T00:00:00Z"}) {
+      totalCount
+      edges {
+        cursor
+        node {
+          number
+          title
+          createdAt
+          state
+          closedAt
+          updatedAt
+          url
+          labels(first: 100) {
+            edges {
+              node {
+                name
+              }
+            }
+          }
+          timelineItems(first: 100, itemTypes: [CROSS_REFERENCED_EVENT, ISSUE_COMMENT, CLOSED_EVENT]) {
+            totalCount
+            edges {
+              node {
+                __typename
+                ... on CrossReferencedEvent {
+                  source {
+                    ... on Issue {
+                      __typename
+                      number
+                      closed
+                    }
+                    ... on PullRequest {
+                      __typename
+                      number
+                      closed
+                    }
+                  }
+                  isCrossRepository
+                }
+                ... on IssueComment {
+                  author {
+                    login
+                  }
+                  createdAt
+                }
+                ... on ClosedEvent {
+                  actor {
+                    login
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/devstats/queries/pr_data_query.gql b/devstats/queries/pr_data_query.gql
@@ -0,0 +1,32 @@
+query {
+  repository(owner:"_REPO_OWNER_", name:"_REPO_NAME_") {
+    pullRequests(first:100) {
+      totalCount
+      edges {
+        cursor
+        node{
+          number
+          state
+          title
+          createdAt
+          baseRefName
+          mergeable
+          author{
+            login
+          }
+          authorAssociation
+          mergedBy{
+            login
+          }
+          mergedAt
+          reviews(states:APPROVED){
+            totalCount
+          }
+          participants(first:100){
+            totalCount
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,3 +35,6 @@ homepage = "https://github.com/scientific-python/devstats"
 
 [tool.setuptools.packages.find]
 include = ["devstats*"]
+
+[tool.setuptools.package-data]
+"*" = ["*.gql"]

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+*~`
	`2`	`+`
`1`	`3`	`# Byte-compiled / optimized / DLL files`
`2`	`4`	`__pycache__/`
`3`	`5`	`*.py[cod]`