@@ -682,7 +682,11 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
682
682
683
683
# Test GET page endpoint
684
684
global page_id
685
- page_id = pages [0 ]["id" ]
685
+ test_page = pages [0 ]
686
+ page_id = test_page ["id" ]
687
+ test_page_url = test_page ["url" ]
688
+ test_page_ts = test_page ["ts" ]
689
+
686
690
r = requests .get (
687
691
f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawler_crawl_id } /pages/{ page_id } " ,
688
692
headers = crawler_auth_headers ,
@@ -710,6 +714,51 @@ def test_crawl_pages(crawler_auth_headers, default_org_id, crawler_crawl_id):
710
714
assert page .get ("modified" ) is None
711
715
assert page .get ("approved" ) is None
712
716
717
+ # Test exact url filter
718
+ r = requests .get (
719
+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawler_crawl_id } /pages?url={ test_page_url } " ,
720
+ headers = crawler_auth_headers ,
721
+ )
722
+ assert r .status_code == 200
723
+ data = r .json ()
724
+
725
+ assert data ["total" ] >= 1
726
+ for matching_page in data ["items" ]:
727
+ assert matching_page ["url" ] == test_page_url
728
+
729
+ # Test exact url and ts filters together
730
+ r = requests .get (
731
+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawler_crawl_id } /pages?url={ test_page_url } &ts={ test_page_ts } " ,
732
+ headers = crawler_auth_headers ,
733
+ )
734
+ assert r .status_code == 200
735
+ data = r .json ()
736
+
737
+ assert data ["total" ] >= 1
738
+ for matching_page in data ["items" ]:
739
+ assert matching_page ["url" ] == test_page_url
740
+ assert matching_page ["ts" ] == test_page_ts
741
+
742
+ # Test urlPrefix filter
743
+ url_prefix = test_page_url [:8 ]
744
+ r = requests .get (
745
+ f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawler_crawl_id } /pages?urlPrefix={ url_prefix } " ,
746
+ headers = crawler_auth_headers ,
747
+ )
748
+ assert r .status_code == 200
749
+ data = r .json ()
750
+
751
+ assert data ["total" ] >= 1
752
+
753
+ found_matching_page = False
754
+ for page in data ["items" ]:
755
+ if page ["id" ] == page_id and page ["url" ] == test_page_url :
756
+ found_matching_page = True
757
+
758
+ assert found_matching_page
759
+
760
+
761
+ def test_crawl_pages_qa_filters (crawler_auth_headers , default_org_id , crawler_crawl_id ):
713
762
# Test reviewed filter (page has no notes or approved so should show up in false)
714
763
r = requests .get (
715
764
f"{ API_PREFIX } /orgs/{ default_org_id } /crawls/{ crawler_crawl_id } /pages?reviewed=False" ,
0 commit comments