Skip to content

Commit

Permalink
Merge branch 'replay-json-pages' into dev-test
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Feb 9, 2025
2 parents 5f321a5 + af93ad9 commit 25ef155
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 4 deletions.
16 changes: 13 additions & 3 deletions backend/btrixcloud/colls.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,10 @@ async def get_collection_out(
coll_id, is_seed=True, page_size=25
)
result["pages"] = pages
public = "public/" if public_or_unlisted_only else ""
result["pagesQuery"] = (
get_origin(headers) + f"/api/orgs/{org.id}/collections/{coll_id}/pages"
get_origin(headers)
+ f"/api/orgs/{org.id}/collections/{coll_id}/{public}pages"
)

thumbnail = result.get("thumbnail")
Expand Down Expand Up @@ -539,9 +541,17 @@ async def get_collection_search_values(self, org: Organization):
names = [name for name in names if name]
return {"names": names}

async def get_collection_crawl_ids(self, coll_id: UUID) -> List[str]:
"""Return list of crawl ids in collection"""
async def get_collection_crawl_ids(
self, coll_id: UUID, public_or_unlisted_only=False
) -> List[str]:
"""Return list of crawl ids in collection, including only public collections"""
crawl_ids = []
if public_or_unlisted_only:
try:
await self.get_collection_raw(coll_id, public_or_unlisted_only)
except HTTPException:
return []

async for crawl_raw in self.crawls.find(
{"collectionIds": coll_id}, projection=["_id"]
):
Expand Down
41 changes: 40 additions & 1 deletion backend/btrixcloud/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,14 +670,17 @@ async def list_collection_pages(
page: int = 1,
sort_by: Optional[str] = None,
sort_direction: Optional[int] = -1,
public_or_unlisted_only=False,
) -> Tuple[Union[List[PageOut], List[PageOutWithSingleQA]], int]:
"""List all pages in collection, with optional filtering"""
# pylint: disable=duplicate-code, too-many-locals, too-many-branches, too-many-statements
# Zero-index page for query
page = page - 1
skip = page_size * page

crawl_ids = await self.coll_ops.get_collection_crawl_ids(coll_id)
crawl_ids = await self.coll_ops.get_collection_crawl_ids(
coll_id, public_or_unlisted_only
)

query: dict[str, object] = {
"crawl_id": {"$in": crawl_ids},
Expand Down Expand Up @@ -886,6 +889,7 @@ def init_pages_api(

org_viewer_dep = org_ops.org_viewer_dep
org_crawl_dep = org_ops.org_crawl_dep
org_public = org_ops.org_public

@app.post(
"/orgs/{oid}/crawls/all/pages/reAdd",
Expand Down Expand Up @@ -1092,6 +1096,41 @@ async def get_crawl_pages_list(
)
return paginated_format(pages, total, page, pageSize)

@app.get(
"/orgs/{oid}/collections/{coll_id}/public/pages",
tags=["pages", "collections"],
response_model=PaginatedPageOutResponse,
)
async def get_public_collection_pages_list(
coll_id: UUID,
org: Organization = Depends(org_public),
url: Optional[str] = None,
urlPrefix: Optional[str] = None,
ts: Optional[datetime] = None,
isSeed: Optional[bool] = None,
depth: Optional[int] = None,
pageSize: int = DEFAULT_PAGE_SIZE,
page: int = 1,
sortBy: Optional[str] = None,
sortDirection: Optional[int] = -1,
):
"""Retrieve paginated list of pages in collection"""
pages, total = await ops.list_collection_pages(
coll_id=coll_id,
org=org,
url=url,
url_prefix=urlPrefix,
ts=ts,
is_seed=isSeed,
depth=depth,
page_size=pageSize,
page=page,
sort_by=sortBy,
sort_direction=sortDirection,
public_or_unlisted_only=True,
)
return paginated_format(pages, total, page, pageSize)

@app.get(
"/orgs/{oid}/collections/{coll_id}/pages",
tags=["pages", "collections"],
Expand Down
8 changes: 8 additions & 0 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ def test_create_collection(
assert data["pageCount"] > 0
assert data["uniquePageCount"] > 0
assert data["totalSize"] > 0
assert (
data["pagesQuery"]
== f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/pages"
)
modified = data["modified"]
assert modified
assert modified.endswith("Z")
Expand Down Expand Up @@ -413,6 +417,10 @@ def test_collection_public(crawler_auth_headers, default_org_id):
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/public/replay.json",
headers=crawler_auth_headers,
)
assert (
r.json()["pagesQuery"]
== f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/public/pages"
)
assert r.status_code == 200
assert r.headers["Access-Control-Allow-Origin"] == "*"
assert r.headers["Access-Control-Allow-Headers"] == "*"
Expand Down

0 comments on commit 25ef155

Please sign in to comment.