From df29074d1584e6e20619f50dd9287abb9cf5033e Mon Sep 17 00:00:00 2001 From: Mikael Weaver Date: Thu, 10 Oct 2024 08:25:34 -0700 Subject: [PATCH] Allow reindex with unlimited empty pages (#4663) * Removed limitation Gen2 reindex history groups * fix fix infinite loop * fix edge case --- .../Features/Search/SqlServerSearchService.cs | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.Health.Fhir.SqlServer/Features/Search/SqlServerSearchService.cs b/src/Microsoft.Health.Fhir.SqlServer/Features/Search/SqlServerSearchService.cs index d49842ccaa..edaf6a440c 100644 --- a/src/Microsoft.Health.Fhir.SqlServer/Features/Search/SqlServerSearchService.cs +++ b/src/Microsoft.Health.Fhir.SqlServer/Features/Search/SqlServerSearchService.cs @@ -926,16 +926,21 @@ protected async override Task SearchForReindexInternalAsync(Search } var queryHints = searchOptions.QueryHints; - long startId = long.Parse(queryHints.First(_ => _.Param == KnownQueryParameterNames.StartSurrogateId).Value); - long endId = long.Parse(queryHints.First(_ => _.Param == KnownQueryParameterNames.EndSurrogateId).Value); + long globalStartId = long.Parse(queryHints.First(h => h.Param == KnownQueryParameterNames.StartSurrogateId).Value); + long globalEndId = long.Parse(queryHints.First(h => h.Param == KnownQueryParameterNames.EndSurrogateId).Value); + long queryStartId = globalStartId; SearchResult results = null; - IReadOnlyList<(long StartId, long EndId)> ranges = await GetSurrogateIdRanges(resourceType, startId, endId, searchOptions.MaxItemCount, 50, true, cancellationToken); + IReadOnlyList<(long StartId, long EndId)> ranges; - if (ranges?.Count > 0) + do { + // Get surrogate ID ranges + ranges = await GetSurrogateIdRanges(resourceType, queryStartId, globalEndId, searchOptions.MaxItemCount, 50, true, cancellationToken); + foreach (var range in ranges) { + // Search within the surrogate ID range results = await SearchBySurrogateIdRange( resourceType, range.StartId, @@ -948,20 +953,24 @@ protected async override Task SearchForReindexInternalAsync(Search if (results.Results.Any()) { results.MaxResourceSurrogateId = results.Results.Max(e => e.Resource.ResourceSurrogateId); - break; + _logger.LogInformation("For Reindex, Resource Type={ResourceType} Count={Count} MaxResourceSurrogateId={MaxResourceSurrogateId}", resourceType, results.TotalCount, results.MaxResourceSurrogateId); + return results; } _logger.LogInformation("For Reindex, empty data page encountered. Resource Type={ResourceType} StartId={StartId} EndId={EndId}", resourceType, range.StartId, range.EndId); } + + // If no resources are found in the group of surrogate id ranges, move forward the starting point. + if (ranges.Any()) + { + queryStartId = ranges.Max(x => x.EndId) + 1; + } } - else - { - _logger.LogInformation("For Reindex, no data pages found. Resource Type={ResourceType} StartId={StartId} EndId={EndId}", resourceType, startId, endId); - results = new SearchResult(0, []); - } + while (ranges.Any()); // Repeat until there are no more ranges to scan. Needed to advance through large contigous history. - _logger.LogInformation("For Reindex, Resource Type={ResourceType} Count={Count} MaxResourceSurrogateId={MaxResourceSurrogateId}", resourceType, results.TotalCount, results.MaxResourceSurrogateId); - return results; + // Return empty result when no resources are found in the given range provided by queryHints. + _logger.LogInformation("No surrogate ID ranges found containing data. Resource Type={ResourceType} StartId={StartId} EndId={EndId}", resourceType, globalStartId, globalEndId); + return new SearchResult(0, []); } ///