Skip to content

Commit

Permalink
fix: check document id after pre-processing (#263)
Browse files Browse the repository at this point in the history
Because id might be computed at processing time
  • Loading branch information
alexgarel authored Dec 2, 2024
1 parent e747cdf commit d363b41
Showing 1 changed file with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions app/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,13 +311,6 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
if data is None:
# unexpected !
return FetcherResult(status=FetcherStatus.OTHER, document=None)
id_field_name = self.config.index.id_field_name

_id = data.get(id_field_name)
if _id is None or _id in self.config.document_denylist:
# We don't process the document if it has no ID or if it's in the
# denylist
return FetcherResult(status=FetcherStatus.SKIP, document=None)

processed_result = (
self.preprocessor.preprocess(data)
Expand All @@ -326,6 +319,8 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
else result
)

id_field_name = self.config.index.id_field_name
_id = data.get(id_field_name)
if processed_result.status == FetcherStatus.REMOVED:
return FetcherResult(
status=FetcherStatus.REMOVED,
Expand All @@ -336,6 +331,10 @@ def from_result(self, result: FetcherResult) -> FetcherResult:
or processed_result.document is None
):
return processed_result
elif _id is None or _id in self.config.document_denylist:
# We don't add the document if it has no ID or if it's in the
# denylist
return FetcherResult(status=FetcherStatus.SKIP, document={"_id": _id})

processed_data = processed_result.document

Expand Down

0 comments on commit d363b41

Please sign in to comment.