File tree Expand file tree Collapse file tree 2 files changed +10
-1
lines changed Expand file tree Collapse file tree 2 files changed +10
-1
lines changed Original file line number Diff line number Diff line change @@ -406,6 +406,7 @@ class InnerSerp(InnerDocument):
406
406
class Result (BaseDocument ):
407
407
archive : InnerArchive = Object (InnerArchive )
408
408
provider : InnerProvider = Object (InnerProvider )
409
+ capture : InnerCapture = Object (InnerCapture )
409
410
serp : InnerSerp = Object (InnerSerp )
410
411
snippet : Snippet = Object (Snippet )
411
412
snippet_parser : InnerParser | None = Object (InnerParser )
Original file line number Diff line number Diff line change @@ -71,6 +71,7 @@ def add_warc_snippets_parser(
71
71
72
72
def _parse_warc_snippets (
73
73
parser : WarcSnippetsParser ,
74
+ serp_id : str ,
74
75
capture_url : str ,
75
76
warc_store : WarcS3Store ,
76
77
warc_location : WarcLocation ,
@@ -121,6 +122,7 @@ def _parse_warc_snippets(
121
122
with_tail = True ,
122
123
)
123
124
snippet_id_components = (
125
+ serp_id ,
124
126
parser .id ,
125
127
str (hash (content )),
126
128
str (i ),
@@ -175,7 +177,12 @@ def _parse_serp_warc_snippets_action(
175
177
for parser in _warc_snippets_parsers (config , serp .provider .id ):
176
178
# Try to parse the snippets.
177
179
warc_snippets = _parse_warc_snippets (
178
- parser , serp .capture .url , config .s3 .warc_store , serp .warc_location )
180
+ parser = parser ,
181
+ serp_id = serp .id ,
182
+ capture_url = serp .capture .url ,
183
+ warc_store = config .s3 .warc_store ,
184
+ warc_location = serp .warc_location ,
185
+ )
179
186
if warc_snippets is None :
180
187
# Parsing was not successful, e.g., URL pattern did not match.
181
188
continue
@@ -188,6 +195,7 @@ def _parse_serp_warc_snippets_action(
188
195
meta = {"id" : snippet .id },
189
196
archive = serp .archive ,
190
197
provider = serp .provider ,
198
+ capture = serp .capture ,
191
199
serp = InnerSerp (
192
200
id = serp .id ,
193
201
),
You can’t perform that action at this time.
0 commit comments