Skip to content

Commit cea4d05

Browse files
authored
perf(exports): cache results of attachment xpaths in protected property of Asset model DEV-706 (#5925)
### 📣 Summary Speed up and improve the performance of both sync and async exports ### 📖 Description This PR introduces a performance optimization for exports by caching the results of attachment xpaths computation in a protected property on the Asset model instance. Previously, the same logic was computed repeatedly, under certain circumstances, during export processes (both sync and async) , leading to redundant work and slower execution.
1 parent 1b3318e commit cea4d05

File tree

3 files changed

+88
-9
lines changed

3 files changed

+88
-9
lines changed

kobo/apps/subsequences/utils/deprecation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def get_sanitized_advanced_features(asset: 'Asset') -> dict | None:
2323
qual_survey_iter = deepcopy(qual_survey_orig)
2424
for idx, qual_q in enumerate(qual_survey_iter):
2525
qpath = qual_survey_orig[idx]['qpath']
26-
xpath = qpath_to_xpath(qpath, asset)
26+
xpath = asset.get_xpath_from_qpath(qpath)
2727
del qual_survey_orig[idx]['qpath']
2828
qual_survey_orig[idx]['xpath'] = xpath
2929

@@ -39,7 +39,7 @@ def get_sanitized_dict_keys(dict_to_update: dict, asset: 'Asset') -> dict | None
3939
changed = False
4040
for old_xpath, values in dict_to_update.items():
4141
if '-' in old_xpath and '/' not in old_xpath:
42-
xpath = qpath_to_xpath(old_xpath, asset)
42+
xpath = asset.get_xpath_from_qpath(old_xpath)
4343
if xpath == old_xpath:
4444
continue
4545

@@ -62,7 +62,7 @@ def get_sanitized_known_columns(asset: 'Asset') -> list:
6262
# it will enter this condition - which is not a problem except extra
6363
# CPU usage for nothing.
6464
if '-' in xpath and '/' not in xpath:
65-
xpath = qpath_to_xpath(xpath, asset)
65+
xpath = asset.get_xpath_from_qpath(xpath)
6666
rest.insert(0, xpath)
6767
known_cols[idx] = ':'.join(rest)
6868

kpi/models/asset.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,7 @@ def analysis_form_json(self, omit_question_types=None):
562562
try:
563563
xpath = qual_question['xpath']
564564
except KeyError:
565-
xpath = qpath_to_xpath(qual_question['qpath'], self)
565+
xpath = self.get_xpath_from_qpath(qual_question['qpath'])
566566

567567
field = dict(
568568
label=qual_question['labels']['_default'],
@@ -662,15 +662,25 @@ def get_advanced_submission_schema(self, url=None, content=False):
662662
content, self.advanced_features, url=url
663663
)
664664

665-
def get_all_attachment_xpaths(self):
665+
def get_all_attachment_xpaths(self) -> list:
666+
667+
# We previously used `cache_for_request`, but it provides no benefit in Celery
668+
# tasks. A "protected" property on the Asset instance now serves the same
669+
# purpose during its lifecycle.
670+
if (
671+
_all_attachment_xpaths := getattr(self, '_all_attachment_xpaths', None)
672+
) is not None:
673+
return _all_attachment_xpaths
674+
666675
# return deployed versions first
667676
versions = self.asset_versions.filter(deployed=True).order_by('-date_modified')
668677
xpaths = set()
669678
for version in versions:
670679
if xpaths_from_version := self.get_attachment_xpaths_from_version(version):
671680
xpaths.update(xpaths_from_version)
672681

673-
return list(xpaths)
682+
setattr(self, '_all_attachment_xpaths', list(xpaths))
683+
return self._all_attachment_xpaths
674684

675685
def get_attachment_xpaths_from_version(self, version=None) -> Optional[list]:
676686

@@ -831,6 +841,23 @@ def get_partial_perms(
831841

832842
return None
833843

844+
def get_xpath_from_qpath(self, qpath: str) -> str:
845+
846+
# We could have used `cache_for_request` in the `qpath_to_xpath` utility,
847+
# but it provides no benefit in Celery tasks.
848+
# Instead, we use a "protected" property on the Asset model to cache the result
849+
# during the lifetime of the asset instance.
850+
qpaths_xpaths_mapping = getattr(self, '_qpaths_xpaths_mapping', {})
851+
852+
try:
853+
xpath = qpaths_xpaths_mapping[qpath]
854+
except KeyError:
855+
qpaths_xpaths_mapping[qpath] = qpath_to_xpath(qpath, self)
856+
xpath = qpaths_xpaths_mapping[qpath]
857+
858+
setattr(self, '_qpaths_xpaths_mapping', qpaths_xpaths_mapping)
859+
return xpath
860+
834861
@property
835862
def has_advanced_features(self):
836863
if self.advanced_features is None:
@@ -910,6 +937,9 @@ def refresh_from_db(self, using=None, fields=None):
910937
super().refresh_from_db(using=using, fields=fields)
911938
# Refresh hidden fields too
912939
self.__copy_hidden_fields(fields)
940+
# reset caching fields
941+
self._qpaths_xpaths_mapping = {}
942+
self._all_attachment_xpaths = None
913943

914944
def rename_translation(self, _from, _to):
915945
if not self._has_translations(self.content, 2):

kpi/tests/test_asset_content.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections import OrderedDict
55
from copy import deepcopy
66
from functools import reduce
7+
from unittest.mock import patch
78

89
from django.conf import settings
910
from django.test import TestCase
@@ -870,10 +871,10 @@ def test_populates_xpath_correctly():
870871

871872
class TestAssetContent(TestCase):
872873

873-
def test_get_attachment_xpaths_from_all_versions(self):
874+
def setUp(self):
874875
user = baker.make(settings.AUTH_USER_MODEL, username='johndoe')
875876
# survey with 1 attachment question
876-
asset = Asset.objects.create(
877+
self.asset = Asset.objects.create(
877878
owner=user,
878879
content={
879880
'survey': [
@@ -886,7 +887,11 @@ def test_get_attachment_xpaths_from_all_versions(self):
886887
]
887888
},
888889
)
889-
asset.deploy(backend='mock')
890+
self.asset.deploy(backend='mock')
891+
892+
def test_get_attachment_xpaths_from_all_versions(self):
893+
894+
asset = self.asset
890895
# move the attachment question to a group
891896
asset.content = {
892897
'survey': [
@@ -931,3 +936,47 @@ def test_get_attachment_xpaths_from_all_versions(self):
931936
# Validate XPaths can still be retrieved even on old versions
932937
xpaths = asset.get_all_attachment_xpaths()
933938
assert sorted(xpaths) == ['Image', 'group_kq1rd43/Image']
939+
940+
@patch('kpi.models.asset.Asset.get_attachment_xpaths_from_version')
941+
def test_xpath_method_called_once_for_single_version(
942+
self, mock_get_xpaths_from_version
943+
):
944+
"""
945+
Test `get_attachment_xpaths_from_version()` is called only once
946+
for a single deployed version and results are cached
947+
"""
948+
# Simulate return value for a single version
949+
mock_get_xpaths_from_version.return_value = ['Image']
950+
951+
# Call the method twice, should hit cache after first call
952+
self.asset.get_all_attachment_xpaths()
953+
self.asset.get_all_attachment_xpaths()
954+
955+
# Confirm that the mock was called only once
956+
mock_get_xpaths_from_version.assert_called_once()
957+
958+
@patch('kpi.models.asset.Asset.get_attachment_xpaths_from_version')
959+
def test_xpath_method_called_once_per_version(self, mock_get_xpaths_from_version):
960+
"""
961+
Test `get_attachment_xpaths_from_version()` is called only once per deployed
962+
version and results are cached
963+
"""
964+
self.asset.content['survey'].append(
965+
{'type': 'image', 'name': 'image2', 'label': ['image2']}
966+
)
967+
self.asset.save()
968+
self.asset.deploy(backend='mock', active=True)
969+
970+
# Use side_effect to simulate distinct outputs for each version
971+
mock_get_xpaths_from_version.side_effect = [
972+
['image'], # first version
973+
['image', 'image2'], # second version
974+
]
975+
976+
# Call multiple times, should hit cache for each version
977+
self.asset.get_all_attachment_xpaths()
978+
self.asset.get_all_attachment_xpaths()
979+
self.asset.get_all_attachment_xpaths()
980+
981+
# Confirm it was called twice, once for each version
982+
self.assertEqual(mock_get_xpaths_from_version.call_count, 2)

0 commit comments

Comments
 (0)