Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce memory overhead of searches #6471

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
type: perf
issue: 6469
title: "Searching for a large number of resources can use a lot of
memory, due to the nature of deduplication of results in memory.
We will instead push this responsibility to the db to save
reduce this overhead.
"
Original file line number Diff line number Diff line change
Expand Up @@ -719,9 +719,12 @@ private void createChunkedQueryNormalSearch(
}

/*
* If offset is present, we want deduplicate the results by using GROUP BY
* If offset is present, we want to deduplicate the results by using GROUP BY;
* OR
* if the MaxResultsToFetch is null, we are requesting "everything",
* so we'll let the db do the deduplication (instead of in-memory)
*/
if (theOffset != null) {
if (theOffset != null || myMaxResultsToFetch == null) {
queryStack3.addGrouping();
queryStack3.setUseAggregate(true);
}
Expand Down Expand Up @@ -2403,7 +2406,14 @@ private void fetchNext() {

if (nextLong != null) {
JpaPid next = JpaPid.fromId(nextLong);
if (myPidSet.add(next) && doNotSkipNextPidForEverything()) {
if (doNotSkipNextPidForEverything() && !myPidSet.contains(next)) {
if (myMaxResultsToFetch != null) {
// we only add to the map if we aren't fetching "everything";
// otherwise, we let the de-duplication happen in the database
// (see createChunkedQueryNormalSearch above), becuase it saves
// memory that way
myPidSet.add(next);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: add some javadoc to the myPidSet field declaration explaining these two usages: skipping prior results, and de-dupe of res_id from the new sql.

}
myNext = next;
myNonSkipCount++;
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import ca.uhn.fhir.jpa.test.BaseJpaR4Test;
import ca.uhn.fhir.jpa.util.QueryParameterUtils;
import ca.uhn.fhir.jpa.util.SqlQuery;
import ca.uhn.fhir.rest.api.SearchTotalModeEnum;
import ca.uhn.fhir.rest.api.SortSpec;
import ca.uhn.fhir.rest.api.SummaryEnum;
Expand All @@ -33,6 +34,7 @@
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.hl7.fhir.instance.model.api.IAnyResource;
import org.hl7.fhir.instance.model.api.IIdType;
import org.hl7.fhir.r4.model.BaseResource;
import org.hl7.fhir.r4.model.BodyStructure;
import org.hl7.fhir.r4.model.CodeableConcept;
import org.hl7.fhir.r4.model.Coding;
Expand Down Expand Up @@ -65,14 +67,18 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.apache.commons.lang3.StringUtils.leftPad;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.fail;
import static org.awaitility.Awaitility.await;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;


Expand Down Expand Up @@ -556,6 +562,67 @@ public void testFetchOnlySmallBatches() {

}

/**
* We want to use the db to deduplicate in the "fetch everything"
* case because it's more memory efficient.
*/
@Test
public void search_whenPastPreFetchLimit_usesDBToDeduplicate() {
// setup
IBundleProvider results;
List<SqlQuery> queries;
List<String> ids;

create200Patients();

myCaptureQueriesListener.clear();
// set the prefetch thresholds low so we don't need to
// search for tons of resources
myStorageSettings.setSearchPreFetchThresholds(List.of(5, 10, -1));

// basic search map
SearchParameterMap map = new SearchParameterMap();
map.setSort(new SortSpec(BaseResource.SP_RES_LAST_UPDATED));

// test
results = myPatientDao.search(map, null);
String uuid = results.getUuid();
ourLog.debug("** Search returned UUID: {}", uuid);
assertNotNull(results);
ids = toUnqualifiedVersionlessIdValues(results, 0, 9, true);
assertEquals(9, ids.size());

// first search was < 10 (our max pre-fetch value); so we should
// expect no "group by" queries (we deduplicate in memory)
queries = findGroupByQueries();
assertTrue(queries.isEmpty());
myCaptureQueriesListener.clear();

ids = toUnqualifiedVersionlessIdValues(results, 10, 100, true);
assertEquals(90, ids.size());

// we are now requesting > 10 results, meaning we should be using the
// database to deduplicate any values not fetched yet;
// so we *do* expect to see a "group by" query
queries = findGroupByQueries();
assertFalse(queries.isEmpty());
assertEquals(1, queries.size());
SqlQuery query = queries.get(0);
String sql = query.getSql(true, false);
// we expect a "GROUP BY t0.RES_ID" (but we'll be ambiguous about the table
// name, just in case)
Pattern p = Pattern.compile("GROUP BY .+\\.RES_ID");
Matcher m = p.matcher(sql);
assertTrue(m.find());
}

private List<SqlQuery> findGroupByQueries() {
List<SqlQuery> queries = myCaptureQueriesListener.getSelectQueries();
queries = queries.stream().filter(q -> q.getSql(true, false).toLowerCase().contains("group by"))
.collect(Collectors.toList());
return queries;
}

@Test
public void testFetchMoreThanFirstPageSizeInFirstPage() {
create200Patients();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,9 @@ public class JpaStorageSettings extends StorageSettings {

// start with a tiny number so our first page always loads quickly.
// If they fetch the second page, fetch more.
// Use prime sizes to avoid empty next links.
private List<Integer> mySearchPreFetchThresholds = Arrays.asList(13, 503, 2003, -1);
// we'll only fetch (by default) up to 1 million records, because after that, deduplication in local memory is
// prohibitive
private List<Integer> mySearchPreFetchThresholds = Arrays.asList(13, 503, 2003, 1000003, -1);
private List<WarmCacheEntry> myWarmCacheEntries = new ArrayList<>();
private boolean myEnforceReferenceTargetTypes = true;
private ClientIdStrategyEnum myResourceClientIdStrategy = ClientIdStrategyEnum.ALPHANUMERIC;
Expand Down
Loading