We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 83677bd commit c26951aCopy full SHA for c26951a
archive_query_log/downloaders/warc.py
@@ -116,7 +116,14 @@ def download_serps_warc(config: Config) -> None:
116
echo("No new/changed captures.")
117
return
118
119
- changed_serps: Iterable[Serp] = changed_serps_search.scan()
+ changed_serps: Iterable[Serp] = (
120
+ changed_serps_search
121
+ # Downloading WARCs is very slow, so we keep track
122
+ # of the Elasticsearch query for a full day, assuming that
123
+ # 1000 WARCs can be downloaded in 24h.
124
+ .params(scroll="24h")
125
+ .scan()
126
+ )
127
changed_serps = safe_iter_scan(changed_serps)
128
# noinspection PyTypeChecker
129
changed_serps = tqdm(changed_serps, total=num_changed_serps,
0 commit comments