Skip to content

Commit

Permalink
fix: when backfilling an index, don't delete other indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Feb 21, 2025
1 parent 91ff316 commit 9ae8ba9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
2 changes: 1 addition & 1 deletion share/search/index_strategy/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def parse_full_index_name(self, index_name: str) -> SpecificIndex:
_strategy = self.with_strategy_check(_strategy_check)
return _strategy.get_index(_etc[0] if _etc else '')

def with_strategy_check(self, strategy_check: str) -> IndexStrategy:
def with_strategy_check(self, strategy_check: str) -> typing.Self:
return dataclasses.replace(self, strategy_check=strategy_check)

def pls_setup(self, *, skip_backfill=False) -> None:
Expand Down
17 changes: 10 additions & 7 deletions share/search/index_strategy/trovesearch_denorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,14 @@ def _paths_and_values_mappings(cls):

# override method from Elastic8IndexStrategy
def after_chunk(self, messages_chunk: messages.MessagesChunk, affected_indexnames: Iterable[str]):
_strategy_checks = {
self.parse_full_index_name(_indexname).index_strategy.strategy_check
for _indexname in affected_indexnames
}
task__delete_iri_value_scraps.apply_async(
kwargs={
'index_strategy_name': self.strategy_name,
'indexnames': list(affected_indexnames),
'index_strategy_checks': list(_strategy_checks),
'card_pks': messages_chunk.target_ids_chunk,
'timestamp': messages_chunk.timestamp,
},
Expand Down Expand Up @@ -980,8 +984,8 @@ def _any_query(queries: abc.Collection[dict]):
def task__delete_iri_value_scraps(
task: celery.Task,
index_strategy_name: str,
index_strategy_checks: list[str],
card_pks: list[int],
indexnames: list[str],
timestamp: int,
):
'''followup task to delete value-docs no longer present
Expand All @@ -996,11 +1000,10 @@ def task__delete_iri_value_scraps(
from share.search.index_strategy import get_strategy
_index_strategy = get_strategy(index_strategy_name)
assert isinstance(_index_strategy, TrovesearchDenormIndexStrategy)
_irivalue_indexnames = {
_index.full_index_name
for _index in _index_strategy.each_live_index(any_strategy_check=True)
if _index.subname == 'iri_values'
}
_irivalue_indexnames = [
_index_strategy.with_strategy_check(_check).irivaluesearch_index().full_index_name
for _check in index_strategy_checks
]
# delete any docs that belong to cards in this chunk but weren't touched by indexing
_delete_resp = _index_strategy.es8_client.delete_by_query(
index=list(_irivalue_indexnames),
Expand Down

0 comments on commit 9ae8ba9

Please sign in to comment.