Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions geonode/base/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ def get_entry(self, lang, data_key):
.first()
)
if cached_entry and cached_entry.date != thesaurus_date:
logger.info(f"Cache for {lang}:{data_key} needs to be recreated")
logger.info(f"Cache for {lang}:{data_key} dirty, clearing all caches")
self.lang_cache.clear()
return thesaurus_date, None
Comment on lines 117 to 120
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_entry() now clears self.lang_cache under _lock, but set() and clear() mutate self.lang_cache without acquiring the same lock. This creates a real race where one thread can be in setdefault()/write while another thread clears the dict, leading to lost updates and inconsistent cache state. Consider guarding all lang_cache mutations (set(), clear(), force_check()) with _lock (or switching to a thread-safe strategy) so invalidation is atomic across readers/writers.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applied in commit a2745f9. All lang_cache mutations (set(), clear(), force_check()) are now guarded with _lock. The DB query in set() is intentionally kept outside the lock to avoid holding it during I/O, with only the actual cache mutations inside the lock context.

Comment on lines 117 to 120
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new behavior invalidates the entire i18n cache when the Thesaurus date changes. Given this is a regression fix (and i18nCache is also used for other cached data like metadata schema), it would be good to add a unit test that reproduces the stale-cache scenario and asserts that previously cached values (e.g., schema and/or labels for multiple languages) are rebuilt after a Thesaurus update.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in commit d6add25. The new test test_stale_cache_invalidated_on_thesaurus_update in geonode/metadata/tests/test_i18n.py:

  1. Warms up the cache with labels for both en and it
  2. Updates the label values in the DB and bumps the Thesaurus date (simulating a real thesaurus update)
  3. Forces a freshness check (bypassing CHECK_INTERVAL)
  4. Asserts that both language caches are rebuilt with the new values, not the stale ones

if not cached_entry:
logger.info(f"Cache for {lang}:{data_key} needs to be created")
Expand All @@ -126,31 +127,35 @@ def get_entry(self, lang, data_key):

def set(self, lang: str, data_key: str, data, request_date: str):
# TODO: check if lang is allowed
cached_entry: I18nCacheEntry = self.lang_cache.setdefault(lang, I18nCacheEntry())

# Perform DB query outside the lock to avoid holding it during I/O
latest_date = (
Thesaurus.objects.filter(identifier=I18N_THESAURUS_IDENTIFIER).values_list("date", flat=True).first()
)

if request_date == latest_date:
# no changes after processing, set the info right away
logger.debug(f"Caching lang:{lang} key:{data_key} date:{request_date}")
cached_entry.date = latest_date
cached_entry.caches[data_key] = data
return True
else:
logger.warning(
f"Cache will not be updated for lang:{lang} key:{data_key} reqdate:{request_date} latest:{latest_date}"
)
return False
with self._lock:
cached_entry: I18nCacheEntry = self.lang_cache.setdefault(lang, I18nCacheEntry())

if request_date == latest_date:
# no changes after processing, set the info right away
logger.debug(f"Caching lang:{lang} key:{data_key} date:{request_date}")
cached_entry.date = latest_date
cached_entry.caches[data_key] = data
return True
else:
logger.warning(
f"Cache will not be updated for lang:{lang} key:{data_key} reqdate:{request_date} latest:{latest_date}"
)
return False

def clear(self):
logger.info("Clearing i18n cache")
self.lang_cache.clear()
with self._lock:
self.lang_cache.clear()

def force_check(self):
"""For testing: forces a check against the DB on the next get_entry call."""
self._last_check = 0
with self._lock:
self._last_check = 0


class LabelResolver:
Expand Down
38 changes: 37 additions & 1 deletion geonode/metadata/tests/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from geonode.metadata.handlers.sparse import SparseHandler, SparseFieldRegistry
from geonode.metadata.manager import MetadataManager

from geonode.base.i18n import I18N_THESAURUS_IDENTIFIER, i18nCache
from geonode.base.i18n import I18N_THESAURUS_IDENTIFIER, i18nCache, labelResolver
from geonode.base.models import (
ThesaurusKeyword,
ThesaurusKeywordLabel,
Expand Down Expand Up @@ -147,3 +147,39 @@ def test_schema_i18n_title_defined(self):
self._add_label("field1__ovr", "en", "f1_ovr_en")
schema = self.mm.build_schema(lang="en")
self.assertEqual("f1_ovr_en", schema["properties"]["field1"]["title"])

def test_stale_cache_invalidated_on_thesaurus_update(self):
"""
Ensure that all language caches (en and it) are invalidated when the Thesaurus date
changes, so that stale values are never served after a thesaurus update.
"""
# Populate labels for two languages and warm up the cache
self._add_label("key1", "en", "key1_en_v1")
self._add_label("key1", "it", "key1_it_v1")

labels_en = labelResolver.get_labels("en")
labels_it = labelResolver.get_labels("it")

self.assertEqual("key1_en_v1", labels_en.get("key1"))
self.assertEqual("key1_it_v1", labels_it.get("key1"))

# Update label values in the DB to simulate a thesaurus edit
ThesaurusKeywordLabel.objects.filter(keyword__thesaurus_id=self.tid, keyword__about="key1", lang="en").update(
label="key1_en_v2"
)
ThesaurusKeywordLabel.objects.filter(keyword__thesaurus_id=self.tid, keyword__about="key1", lang="it").update(
label="key1_it_v2"
)

# Simulate a thesaurus date bump (what happens when the thesaurus is updated)
Thesaurus.objects.filter(id=self.tid).update(date="2024-01-01")

# Force cache freshness check to bypass CHECK_INTERVAL
i18nCache.force_check()

# Both language caches must be rebuilt with the new values
labels_en = labelResolver.get_labels("en")
labels_it = labelResolver.get_labels("it")

self.assertEqual("key1_en_v2", labels_en.get("key1"))
self.assertEqual("key1_it_v2", labels_it.get("key1"))
Loading