Skip to content

Commit

Permalink
✨ Feature: add graphql pagination support (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
yanyongyu authored Jul 28, 2024
1 parent a90820a commit 855eb17
Show file tree
Hide file tree
Showing 6 changed files with 329 additions and 44 deletions.
75 changes: 74 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ Current supported versions are: (you can find it in the section `[[tool.codegen.
- 2022-11-28 (latest)
- ghec-2022-11-28

### Pagination
### Rest API Pagination

Pagination type checking is also supported:

Expand Down Expand Up @@ -509,6 +509,79 @@ Simple async call:
data: Dict[str, Any] = await github.async_graphql(query, variables={"foo": "bar"})
```

### GraphQL Pagination

githubkit also provides a helper function to paginate the GraphQL API.

First, You must accept a `cursor` parameter and return a `pageInfo` object in your query. For example:

```graphql
query ($owner: String!, $repo: String!, $cursor: String) {
repository(owner: $owner, name: $repo) {
issues(first: 10, after: $cursor) {
nodes {
number
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
```

The `pageInfo` object in your query must be one of the following types depending on the direction of the pagination:

For forward pagination, use:

```graphql
pageInfo {
hasNextPage
endCursor
}
```

For backward pagination, use:

```graphql
pageInfo {
hasPreviousPage
startCursor
}
```

If you provide all 4 properties in a `pageInfo`, githubkit will default to forward pagination.

Then, you can iterate over the paginated results by using the graphql `paginate` method:

```python
for result in github.graphql.paginate(
query, variables={"owner": "owner", "repo": "repo"}
):
print(result)
```

Note that the `result` is a dict containing the list of nodes/edges for each page and the `pageInfo` object. You should iterate over the `nodes` or `edges` list to get the actual data. For example:

```python
for result in g.graphql.paginate(query, {"owner": "owner", "repo": "repo"}):
for issue in result["repository"]["issues"]["nodes"]:
print(issue)
```

You can also provide a initial cursor value to start pagination from a specific point:

```python
for result in github.graphql.paginate(
query, variables={"owner": "owner", "repo": "repo", "cursor": "initial_cursor"}
):
print(result)
```

> [!NOTE]
> Nested pagination is not supported.

### Auto Retry

By default, githubkit will retry the request when specific exception encountered. When rate limit exceeded, githubkit will retry once after GitHub suggested waiting time. When server error encountered (http status >= 500), githubkit will retry max three times.
Expand Down
24 changes: 23 additions & 1 deletion githubkit/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ class SecondaryRateLimitExceeded(RateLimitExceeded):
"""API request failed with secondary rate limit exceeded"""


class GraphQLFailed(GitHubException):
class GraphQLError(GitHubException):
"""Simple GraphQL request error"""


class GraphQLFailed(GraphQLError):
"""GraphQL request with errors in response"""

def __init__(self, response: "GraphQLResponse"):
Expand All @@ -83,6 +87,24 @@ def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.response.errors!r})"


class GraphQLPaginationError(GraphQLError):
"""GraphQL paginate response error"""

def __init__(self, response: "GraphQLResponse"):
self.response = response

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.response})"


class GraphQLMissingPageInfo(GraphQLPaginationError):
"""GraphQL paginate response missing PageInfo object"""


class GraphQLMissingCursorChange(GraphQLPaginationError):
"""GraphQL paginate response missing cursor change"""


class WebhookTypeNotFound(GitHubException):
"""Webhook event type not found"""

Expand Down
24 changes: 6 additions & 18 deletions githubkit/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
from .response import Response
from .paginator import Paginator
from .auth import BaseAuthStrategy
from .graphql import GraphQLNamespace
from .typing import RetryDecisionFunc
from .versions import RestVersionSwitcher, WebhooksVersionSwitcher
from .graphql import GraphQLResponse, build_graphql_request, parse_graphql_response

if TYPE_CHECKING:
import httpx
Expand Down Expand Up @@ -133,27 +133,15 @@ def rest(self) -> RestVersionSwitcher:
webhooks = WebhooksVersionSwitcher()

# graphql
def graphql(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
json = build_graphql_request(query, variables)

return parse_graphql_response(
self,
self.request("POST", "/graphql", json=json, response_model=GraphQLResponse),
)
@cached_property
def graphql(self) -> GraphQLNamespace:
return GraphQLNamespace(self)

# alias for graphql.arequest
async def async_graphql(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
json = build_graphql_request(query, variables)

return parse_graphql_response(
self,
await self.arequest(
"POST", "/graphql", json=json, response_model=GraphQLResponse
),
)
return await self.graphql.arequest(query, variables)

# rest pagination
paginate = Paginator
102 changes: 78 additions & 24 deletions githubkit/graphql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from weakref import ref
from typing import TYPE_CHECKING, Any, Dict, Optional, cast

from githubkit.exception import GraphQLFailed, PrimaryRateLimitExceeded

from .paginator import Paginator as Paginator
from .models import GraphQLError as GraphQLError
from .models import SourceLocation as SourceLocation
from .models import GraphQLResponse as GraphQLResponse
Expand All @@ -11,27 +13,79 @@
from githubkit.response import Response


def build_graphql_request(
query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
json: Dict[str, Any] = {"query": query}
if variables:
json["variables"] = variables
return json


def parse_graphql_response(
github: "GitHubCore", response: "Response[GraphQLResponse]"
) -> Dict[str, Any]:
response_data = response.parsed_data
if response_data.errors:
# check rate limit exceeded
# https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#exceeding-the-rate-limit
# x-ratelimit-remaining may not be 0, ignore it
# https://github.com/octokit/plugin-throttling.js/pull/636
if any(error.type == "RATE_LIMITED" for error in response_data.errors):
raise PrimaryRateLimitExceeded(
response, github._extract_retry_after(response)
)
raise GraphQLFailed(response_data)
return cast(Dict[str, Any], response_data.data)
class GraphQLNamespace:
def __init__(self, github: "GitHubCore") -> None:
self._github_ref = ref(github)

@property
def _github(self) -> "GitHubCore":
if g := self._github_ref():
return g
raise RuntimeError(
"GitHub client has already been collected. "
"Do not use the namespace after the client has been collected."
)

@staticmethod
def build_graphql_request(
query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
json: Dict[str, Any] = {"query": query}
if variables:
json["variables"] = variables
return json

def parse_graphql_response(
self, response: "Response[GraphQLResponse]"
) -> Dict[str, Any]:
response_data = response.parsed_data
if response_data.errors:
# check rate limit exceeded
# https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#exceeding-the-rate-limit
# x-ratelimit-remaining may not be 0, ignore it
# https://github.com/octokit/plugin-throttling.js/pull/636
if any(error.type == "RATE_LIMITED" for error in response_data.errors):
raise PrimaryRateLimitExceeded(
response, self._github._extract_retry_after(response)
)
raise GraphQLFailed(response_data)
return cast(Dict[str, Any], response_data.data)

def _request(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> "Response[GraphQLResponse]":
json = self.build_graphql_request(query, variables)

return self._github.request(
"POST", "/graphql", json=json, response_model=GraphQLResponse
)

def request(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
return self.parse_graphql_response(self._request(query, variables))

async def _arequest(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> "Response[GraphQLResponse]":
json = self.build_graphql_request(query, variables)

return await self._github.arequest(
"POST", "/graphql", json=json, response_model=GraphQLResponse
)

async def arequest(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
return self.parse_graphql_response(await self._arequest(query, variables))

# backport for calling graphql directly
def __call__(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
return self.request(query, variables)

def paginate(
self, query: str, variables: Optional[Dict[str, Any]] = None
) -> Paginator:
return Paginator(self, query, variables)
Loading

0 comments on commit 855eb17

Please sign in to comment.