Skip to content

Commit

Permalink
urlstore: add method get_all_counts()
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Apr 24, 2023
1 parent 9f6944f commit 3d969fa
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
4 changes: 4 additions & 0 deletions courlan/urlstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,10 @@ def get_crawl_delay(self, website: str, default: float = 5) -> float:

# GENERAL INFO

def get_all_counts(self) -> List[int]:
"Return all download counts for the hosts in store."
return [self.urldict[d].count for d in self.urldict]

def total_url_number(self) -> int:
"Find number of all URLs in store."
return sum(len(self.urldict[d].tuples) for d in self.urldict)
Expand Down
2 changes: 2 additions & 0 deletions tests/urlstore_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def test_urlstore():
urls
)
assert my_urls.total_url_number() == len(urls)
assert my_urls.get_all_counts() == [0, 0]

if my_urls.compressed is False:
assert sum(len(v.tuples) for _, v in my_urls.urldict.items()) == len(urls)
Expand Down Expand Up @@ -170,6 +171,7 @@ def test_urlstore():
assert my_urls.urldict[example_domain].count == 2
assert timestamp != my_urls.urldict[example_domain].timestamp
assert url2 not in set(my_urls.find_unvisited_urls(example_domain))
assert my_urls.get_all_counts() == [2, 0, 0]

# as_visited=False
timestamp = my_urls.urldict[example_domain].timestamp
Expand Down

0 comments on commit 3d969fa

Please sign in to comment.