Skip to content

Commit

Permalink
Core: Improves performance of orm_cached with an in-memory cache
Browse files Browse the repository at this point in the history
This avoid deserialization overhead for potentially very large nested
structures, such as the pages tree. While still properly invalidating
the cache between multiple processes.

TYPE: Performance
LINK: OGC-1827
  • Loading branch information
Daverball authored Sep 17, 2024
1 parent 91aeb84 commit 0a9647d
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Please fill in the commit message below and work through the checklist. You can

<Optional Description>

TYPE: <Feature|Bugfix>
TYPE: <Feature|Performance|Bugfix>
LINK: <Ticket-Number>
HINT: <Optional upgrade hint>

Expand Down
60 changes: 30 additions & 30 deletions do/changes
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if TYPE_CHECKING:
from git.objects.tag import TagObject
from git.repo import Repo
from git.types import PathLike
from typing_extensions import Self
from typing import Self


COMMIT_HEADER = re.compile(r'^[A-Z]{1}[A-Za-z6_\- ]{2,18}: ')
Expand Down Expand Up @@ -160,22 +160,22 @@ class ChangelogCommit:
)

if link.startswith('STAKA-'):
return f"https://ogc-ar.atlassian.net/projects/STAKA/issues/{link}"
return f'https://ogc-ar.atlassian.net/projects/STAKA/issues/{link}'

if link.startswith('STAKABS-'):
return f"https://kt-bs.atlassian.net/browse/{link}"
return f'https://kt-bs.atlassian.net/browse/{link}'

if link.startswith('SEA-'):
return f"https://linear.app/seantis/issue/{link}"
return f'https://linear.app/seantis/issue/{link}'

if link.startswith('OGC-'):
return f"https://linear.app/onegovcloud/issue/{link}"
return f'https://linear.app/onegovcloud/issue/{link}'

if link.startswith('SWI-'):
return f"https://linear.app/swissvotes/issue/{link}"
return f'https://linear.app/swissvotes/issue/{link}'

if link.startswith('PRO-'):
return f"https://linear.app/projuventute/issue/{link}"
return f'https://linear.app/projuventute/issue/{link}'

# default to a non-functioning link (they can be added later)
return f'#{self.link}'
Expand Down Expand Up @@ -217,7 +217,7 @@ def release_date(tag: 'TagObject') -> 'datetime':


def format_datetime(dt: 'date | datetime') -> str:
return dt.strftime("%Y-%m-%d")
return dt.strftime('%Y-%m-%d')


def changelog(folder: 'PathLike', new: str) -> None:
Expand All @@ -230,22 +230,22 @@ def changelog(folder: 'PathLike', new: str) -> None:
)

# header
print("# Changes")
print("")
print('# Changes')
print('')

# not yet released
if new == 'Unreleased':
print("## Unreleased")
print('## Unreleased')
else:
print(f"## {new.rsplit('-', 1)[-1]}")
print("")
print('')
changelog_commits(new, repository, repository.head.commit, tags[0].commit,
date.today())

# last 50 releases (the last version is not included by design)
for newer, older in pairwise(tags[:50]):
print(f"## {release_number(newer)}")
print("")
print(f'## {release_number(newer)}')
print('')

changelog_commits(newer.name, repository, newer.commit, older.commit,
release_date(newer))
Expand All @@ -263,16 +263,16 @@ def changelog_commits(

# quit if there are no commits
if not commit_objs:
print("No changes since last release")
print("")
print('No changes since last release')
print('')
return

# print the release overview
lo = commit_objs[0].hexsha[:10]
hi = commit_objs[-1].hexsha[:10]
url = f'https://github.com/OneGov/onegov-cloud/compare/{lo}^...{hi}'
print(f"`{format_datetime(release_date)}` | [{lo}...{hi}]({url})")
print("")
print(f'`{format_datetime(release_date)}` | [{lo}...{hi}]({url})')
print('')

# parse the commit messages
commits = [
Expand All @@ -282,38 +282,38 @@ def changelog_commits(

hints = [commit.hint for commit in commits if commit.hint]
if hints:
print("**Upgrade hints**")
print('**Upgrade hints**')
for hint in hints:
print(f"- {hint}")
print(f'- {hint}')

# sort the result by name and type
commits.sort(key=lambda k: (k.module, k.type_order))

# print the resulting information
for module, commit in groupby(commits, key=attrgetter('module')):
print(f"### {module}")
print("")
print(f'### {module}')
print('')

for c in commit:
print(f"##### {c.title}")
print("")
print(f'##### {c.title}')
print('')

if c.note:
print(c.note)
print("")
print('')

print(f"`{c.type}`", end='')
print(f'`{c.type}`', end='')

if c.link:
print(f" | [{c.link}]({c.link_url})", end='')
print(f' | [{c.link}]({c.link_url})', end='')

print(f" | [{c.short}]({c.url})")
print("")
print(f' | [{c.short}]({c.url})')
print('')


if __name__ == '__main__':
if len(sys.argv) == 1:
new = "Unreleased"
new = 'Unreleased'
else:
new = sys.argv[1]

Expand Down
14 changes: 14 additions & 0 deletions src/onegov/core/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ class Framework(
#: the request cache is initialised/emptied before each request
request_cache: dict[str, Any]

#: the schema cache stays around for the entire runtime of the
#: application, but is switched, each time the schema changes
# NOTE: This cache should never be used to store ORM objects
# In addition this should generally be backed by a Redis
# cache to make sure the cache is synchronized between
# all processes. Although there may be some cases where
# it makes sense to use this cache on its own
schema_cache: dict[str, Any]
_all_schema_caches: dict[str, Any]

@property
def version(self) -> str:
from onegov.core import __version__
Expand Down Expand Up @@ -666,6 +676,10 @@ def set_application_id(self, application_id: str) -> None:
# then, replace the '/' with a '-' so the only dash left will be
# the dash between namespace and id
self.schema = application_id.replace('-', '_').replace('/', '-')
if not hasattr(self, '_all_schema_caches'):
self._all_schema_caches = {}

self.schema_cache = self._all_schema_caches.setdefault(self.schema, {})

if self.has_database_connection:
ScopedPropertyObserver.enter_scope(self)
Expand Down
42 changes: 40 additions & 2 deletions src/onegov/core/orm/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def users(self):
from libres.db.models import ORMBase
from onegov.core.orm.utils import maybe_merge
from sqlalchemy.orm.query import Query
from time import time


from typing import cast, overload, Any, Generic, TypeVar, TYPE_CHECKING
Expand Down Expand Up @@ -83,6 +84,7 @@ def app(self) -> 'OrmCacheApp': ...

_T = TypeVar('_T')
_QT = TypeVar('_QT')
unset = object()


class OrmCacheApp:
Expand All @@ -103,6 +105,7 @@ class OrmCacheApp:
def cache(self) -> RedisCacheRegion: ...
request_cache: dict[str, Any]
request_class: type[Request]
schema_cache: dict[str, Any]

def configure_orm_cache(self, **cfg: Any) -> None:
self.is_orm_cache_setup = getattr(self, 'is_orm_cache_setup', False)
Expand Down Expand Up @@ -163,6 +166,13 @@ def handle_orm_change(schema: str, obj: 'Base') -> None:
assert self.schema == schema
for cache_key in descriptor.used_cache_keys:
self.cache.delete(cache_key)
# NOTE: We also need to delete the timestamp, so we don't
# get stuck on an old timestamp forever, we use
# get_or_create for the timestamp below in order to
# avoid data races in cache invalidation
self.cache.delete(f'{cache_key}_ts')
if cache_key in self.schema_cache:
del self.schema_cache[cache_key]
if cache_key in self.request_cache:
del self.request_cache[cache_key]

Expand Down Expand Up @@ -297,17 +307,45 @@ def load(self, instance: 'OrmCacheApp | _HasApp') -> _T:
cache_key = self.cache_key(instance)
self.used_cache_keys.add(cache_key)

# we use a secondary request cache for even more lookup speed and to
# we use a tertiary request cache for even more lookup speed and to
# make sure that inside a request we always get the exact same instance
# (otherwise we don't see changes reflected)
if cache_key in app.request_cache:
return app.request_cache[cache_key]

else:
# we separately store when the redis cache was last populated
# so we can detect when we need to invalidate the memory cache
# dogpile has its own time metadata, but we can't retrieve it
# without paying the deserialization overhead, defeating the
# entire purpose of this secondary cache
ts_key = f'{cache_key}_ts'

# we use a secondary in-memory cache for more lookup speed
ts, obj = app.schema_cache.get(cache_key, (float('-Inf'), unset))
if obj is unset or ts != app.cache.get(key=ts_key):
# NOTE: Ideally we would create these values as a pair
# but then we would have to start circumventing
# most of dogpile's API, at which point we may
# as well just use raw Redis, which would give us
# even better possibilities.
# A data race isn't really harmful here, but it is
# kind of inefficient that we're sending two separate
# Redis commands, when one would suffice.
obj = app.cache.get_or_create(
key=cache_key,
creator=lambda: self.create(instance)
)
ts = app.cache.get_or_create(
key=ts_key,
# NOTE: There are some corner-cases where time can lead
# to incorrect cache-invalidation, but we can't use
# monotonic, since that will not lead to a meaningful
# comparison between different processes, dogpile
# also uses time for its own cache invalidation, so
# we should be fine
creator=time
)
app.schema_cache[cache_key] = (ts, obj)

app.request_cache[cache_key] = obj

Expand Down
44 changes: 38 additions & 6 deletions tests/onegov/core/test_orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1523,32 +1523,64 @@ class Document(Base):
'test_orm_cache.<locals>.App.untitled_documents': []
}

ts1 = app.cache.get('test_orm_cache.<locals>.App.documents_ts')
ts2 = app.cache.get('test_orm_cache.<locals>.App.first_document_ts')
ts3 = app.cache.get('test_orm_cache.<locals>.App.secret_document_ts')
ts4 = app.cache.get('test_orm_cache.<locals>.App.untitled_documents_ts')

assert app.schema_cache == {
'test_orm_cache.<locals>.App.documents': (ts1, tuple()),
'test_orm_cache.<locals>.App.first_document': (ts2, None),
'test_orm_cache.<locals>.App.secret_document': (ts3, None),
'test_orm_cache.<locals>.App.untitled_documents': (ts4, [])
}

assert app.cache.get('test_orm_cache.<locals>.App.documents') == tuple()
assert app.cache.get('test_orm_cache.<locals>.App.first_document') is None
assert app.cache.get('test_orm_cache.<locals>.App.secret_document') is None
assert app.cache.get('test_orm_cache.<locals>.App.untitled_documents')\
== []
assert app.cache.get(
'test_orm_cache.<locals>.App.untitled_documents') == []

# if we add a non-secret document all caches update except for the last one
app.session().add(Document(id=1, title='Public', body='Lorem Ipsum'))
transaction.commit()

assert app.cache.get('test_orm_cache.<locals>.App.documents') is NO_VALUE
assert app.cache.get('test_orm_cache.<locals>.App.first_document')\
is NO_VALUE
assert app.cache.get('test_orm_cache.<locals>.App.untitled_documents')\
is NO_VALUE
assert app.cache.get(
'test_orm_cache.<locals>.App.first_document') is NO_VALUE
assert app.cache.get(
'test_orm_cache.<locals>.App.untitled_documents') is NO_VALUE
assert app.cache.get('test_orm_cache.<locals>.App.secret_document') is None
assert app.cache.get(
'test_orm_cache.<locals>.App.documents_ts') is NO_VALUE
assert app.cache.get(
'test_orm_cache.<locals>.App.first_document_ts') is NO_VALUE
assert app.cache.get(
'test_orm_cache.<locals>.App.untitled_documents_ts') is NO_VALUE
assert app.cache.get(
'test_orm_cache.<locals>.App.secret_document_ts') == ts3

assert app.request_cache == {
'test_orm_cache.<locals>.App.secret_document': None,
}
assert app.schema_cache == {
'test_orm_cache.<locals>.App.secret_document': (ts3, None),
}

assert app.secret_document is None
assert app.first_document.title == 'Public'
assert app.untitled_documents == []
assert app.documents[0].title == 'Public'

# the timestamps for the changed caches should update, but the one
# that's still cached should stay the same
assert app.cache.get('test_orm_cache.<locals>.App.documents_ts') > ts1
assert app.cache.get('test_orm_cache.<locals>.App.first_document_ts') > ts2
assert app.cache.get(
'test_orm_cache.<locals>.App.secret_document_ts') == ts3
assert app.cache.get(
'test_orm_cache.<locals>.App.untitled_documents_ts') > ts4

# if we add a secret document all caches change
app.session().add(Document(id=2, title='Secret', body='Geheim'))
transaction.commit()
Expand Down

0 comments on commit 0a9647d

Please sign in to comment.