scientific-python · nabobalis · May 11, 2025 · May 11, 2025 · May 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -160,5 +160,13 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
 # MAC
 .DS_Store
+
+# Vs Code
+.vscode/
+.history/
+
+# devstats
+devstats-data/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,3 @@
-# Install pre-commit hooks via
-# pre-commit install
-
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: cef0300fd0fc4d2a87a85fa2093c6b283ea36f4b # frozen: v5.0.0

diff --git a/devstats/__main__.py b/devstats/__main__.py
@@ -45,7 +45,7 @@ def query(repo_owner, repo_name, outdir):
         sys.exit()
 
     headers = {"Authorization": f"bearer {token}"}
-    query_files = glob(os.path.join(os.path.dirname(__file__), "queries/*.gql"))
+    query_files = sorted(glob(os.path.join(os.path.dirname(__file__), "queries/*.gql")))
 
     for n, query in enumerate(query_files):
         if n != 0:
@@ -55,7 +55,7 @@ def query(repo_owner, repo_name, outdir):
         # Parse query type from gql
         gql = open(query).read()
         qtype_match = re.match(
-            r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues)",
+            r"query\s*{\s*repository\(.*?\)\s*{\s*(pullRequests|issues|stargazerCount)",
             gql,
             flags=re.MULTILINE,
         )
@@ -65,7 +65,6 @@ def query(repo_owner, repo_name, outdir):
         else:
             qtype = qtype_match.group(1)
 
-        qname, qext = os.path.splitext(query)
         data = GithubGrabber(
             query,
             qtype,
@@ -74,7 +73,7 @@ def query(repo_owner, repo_name, outdir):
             repo_name=repo_name,
         )
         data.get()
-        ftype = {"issues": "issues", "pullRequests": "PRs"}
+        ftype = {"issues": "issues", "pullRequests": "PRs", "stargazerCount": "stars"}
         data.dump(f"{outdir}/{repo_name}_{ftype.get(qtype, qtype)}.json")
 
 

diff --git a/devstats/queries/issue_activity_since_date.gql b/devstats/queries/issue_activity_since_date.gql
@@ -8,10 +8,16 @@ query {
           number
           title
           createdAt
-          state
-          closedAt
           updatedAt
+          closedAt
+          state
           url
+          reactionGroups {
+            content
+              users {
+            totalCount
+                  }
+          }
           labels(first: 100) {
             edges {
               node {

diff --git a/devstats/queries/pr_data_query.gql b/devstats/queries/pr_data_query.gql
@@ -9,8 +9,17 @@ query {
           state
           title
           createdAt
+          updatedAt
+          closedAt
           baseRefName
           mergeable
+          labels(first: 25) {
+            edges {
+              node {
+                name
+              }
+            }
+          }
           author{
             login
           }

diff --git a/devstats/queries/repo_stars.gql b/devstats/queries/repo_stars.gql
@@ -0,0 +1,5 @@
+query {
+  repository(owner: "_REPO_OWNER_", name: "_REPO_NAME_") {
+    stargazerCount
+  }
+}
diff --git a/devstats/query.py b/devstats/query.py
@@ -3,7 +3,7 @@
 
 import requests
 
-endpoint = r"https://api.github.com/graphql"
+ENDPOINT = r"https://api.github.com/graphql"
 
 
 def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
@@ -72,10 +72,10 @@ def send_query(query, query_type, headers, cursor=None):
     This is intended mostly for internal use within `get_all_responses`.
     """
     # TODO: Expand this, either by parsing the query type from the query
-    # directly or manually adding more query_types to the set
-    if query_type not in {"issues", "pullRequests"}:
+    # Directly or manually adding more query_types to the set
+    if query_type not in {"issues", "pullRequests", "stargazerCount"}:
         raise ValueError(
-            "Only 'issues' and 'pullRequests' queries are currently supported"
+            "Only 'issues', 'pullRequests' and 'stargazerCount' queries are currently supported"
         )
     # TODO: Generalize this
     # WARNING: The cursor injection depends on the specific structure of the
@@ -91,7 +91,7 @@ def send_query(query, query_type, headers, cursor=None):
     retries = max_retries
     while retries > 0:
         try:
-            response = requests.post(endpoint, json=payload, headers=headers)
+            response = requests.post(ENDPOINT, json=payload, headers=headers)
         except (
             requests.exceptions.ChunkedEncodingError,
             requests.exceptions.ConnectionError,
@@ -138,7 +138,15 @@ def get_all_responses(query, query_type, headers):
         print("Fetching...", end="", flush=True)
         rdata = send_query(query, query_type, headers, cursor=last_cursor)
         try:
-            pdata, last_cursor, total_count = parse_single_query(rdata, query_type)
+            # TODO: Generalize this
+            if query_type == "stargazerCount":
+                # Special case for stargazerCount
+                pdata = [{"stargazerCount":rdata["data"]["repository"]["stargazerCount"]}]
+                last_cursor = None
+                total_count = 1
+            else:
+                # Normal case for issues/PRs
+                pdata, last_cursor, total_count = parse_single_query(rdata, query_type)
         except (KeyError, TypeError):
             print("Malformed response; repeating request after 1 minute")
             time.sleep(1 * 60)