Skip to content

Commit 1791bb4

Browse files
author
Vladimir Kotal
committed
proof of concept: split history cache generation into chunks
fixes #3243
1 parent 1374d7e commit 1791bb4

File tree

6 files changed

+164
-37
lines changed

6 files changed

+164
-37
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ private void doFileHistory(String filename, List<HistoryEntry> historyEntries,
150150

151151
storeFile(hist, file, repository, !renamed);
152152

153+
// TODO: readjust for per partes history indexing
153154
statRepoHist.report(LOGGER, Level.FINER,
154155
String.format("Done storing history cache for '%s'", filename),
155156
"filehistorycache.history.store");
@@ -437,14 +438,6 @@ public void store(History history, Repository repository)
437438
return;
438439
}
439440

440-
LOGGER.log(Level.FINE,
441-
"Storing history for repository {0}",
442-
new Object[] {repository.getDirectoryName()});
443-
444-
// Firstly store the history for the top-level directory.
445-
doFileHistory(repository.getDirectoryName(), history.getHistoryEntries(),
446-
repository, env.getSourceRootFile(), null, false);
447-
448441
HashMap<String, List<HistoryEntry>> map = new HashMap<>();
449442
HashMap<String, Boolean> acceptanceCache = new HashMap<>();
450443

@@ -513,7 +506,8 @@ public void store(History history, Repository repository)
513506
fileHistoryCount++;
514507
}
515508

516-
LOGGER.log(Level.FINE, "Stored history for {0} files", fileHistoryCount);
509+
LOGGER.log(Level.FINE, "Stored history for {0} files in repository ''{1}''",
510+
new Object[]{fileHistoryCount, repository.getDirectoryName()});
517511

518512
if (!handleRenamedFiles) {
519513
finishStore(repository, latestRev);
@@ -735,7 +729,8 @@ private String getRepositoryCachedRevPath(Repository repository) {
735729
* @param repository repository
736730
* @param rev latest revision which has been just indexed
737731
*/
738-
private void storeLatestCachedRevision(Repository repository, String rev) {
732+
@Override
733+
public void storeLatestCachedRevision(Repository repository, String rev) {
739734
Writer writer = null;
740735

741736
try {

opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.io.Reader;
3434
import java.nio.charset.StandardCharsets;
3535
import java.nio.file.Paths;
36+
import java.util.Collections;
3637
import java.util.Date;
3738
import java.util.List;
3839
import java.util.ArrayList;
@@ -86,6 +87,7 @@
8687
import org.opengrok.indexer.util.Executor;
8788
import org.opengrok.indexer.util.ForbiddenSymlinkException;
8889
import org.opengrok.indexer.util.LazilyInstantiate;
90+
import org.opengrok.indexer.util.Statistics;
8991
import org.opengrok.indexer.util.Version;
9092

9193
import static org.opengrok.indexer.history.HistoryEntry.TAGS_SEPARATOR;
@@ -94,7 +96,7 @@
9496
* Access to a Git repository.
9597
*
9698
*/
97-
public class GitRepository extends Repository {
99+
public class GitRepository extends RepositoryPerPartesHistory {
98100

99101
private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class);
100102

@@ -524,6 +526,51 @@ History getHistory(File file) throws HistoryException {
524526

525527
@Override
526528
History getHistory(File file, String sinceRevision) throws HistoryException {
529+
return getHistory(file, sinceRevision, null);
530+
}
531+
532+
// TODO: add test
533+
public List<String> getBoundaryChangesetIDs(String sinceRevision) throws HistoryException {
534+
List<String> result = new ArrayList<>();
535+
final int maxCount = 1024; // TODO
536+
537+
Statistics stat = new Statistics();
538+
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName());
539+
RevWalk walk = new RevWalk(repository)) {
540+
541+
if (sinceRevision != null) {
542+
walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
543+
}
544+
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
545+
546+
int cnt = 0;
547+
String lastId = null;
548+
for (RevCommit commit : walk) {
549+
if (cnt != 0 && cnt % maxCount == 0) {
550+
// Do not abbreviate the Id as this could cause AmbiguousObjectException in getHistory().
551+
lastId = commit.getId().name();
552+
result.add(lastId);
553+
}
554+
cnt++;
555+
}
556+
} catch (IOException e) {
557+
throw new HistoryException(e);
558+
}
559+
560+
// The changesets need to go from oldest to newest.
561+
Collections.reverse(result);
562+
563+
// Add null to finish the last step in Repository#createCache().
564+
result.add(null);
565+
566+
stat.report(LOGGER, Level.FINE,
567+
String.format("done getting boundary changesets for ''%s'' (%d entries)",
568+
getDirectoryName(), result.size()));
569+
570+
return result;
571+
}
572+
573+
public History getHistory(File file, String sinceRevision, String tillRevision) throws HistoryException {
527574
final List<HistoryEntry> entries = new ArrayList<>();
528575
final List<String> renamedFiles = new ArrayList<>();
529576

@@ -533,7 +580,12 @@ History getHistory(File file, String sinceRevision) throws HistoryException {
533580
if (sinceRevision != null) {
534581
walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision)));
535582
}
536-
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
583+
584+
if (tillRevision != null) {
585+
walk.markStart(walk.lookupCommit(repository.resolve(tillRevision)));
586+
} else {
587+
walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD)));
588+
}
537589

538590
String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file);
539591
if (!getDirectoryNameRelative().equals(relativePath)) {
@@ -761,6 +813,14 @@ String determineBranch(CommandTimeoutType cmdType) throws IOException {
761813
}
762814
}
763815

816+
// TODO: add test for this
817+
public String determineCurrentVersionId() throws IOException {
818+
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {
819+
Ref head = repository.exactRef(Constants.HEAD);
820+
return getCommit(repository, head).getId().abbreviate(GIT_ABBREV_LEN).name();
821+
}
822+
}
823+
764824
@Override
765825
public String determineCurrentVersion(CommandTimeoutType cmdType) throws IOException {
766826
try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName())) {

opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ boolean hasCacheForDirectory(File directory, Repository repository)
115115
String getLatestCachedRevision(Repository repository)
116116
throws HistoryException;
117117

118+
// TODO
119+
void storeLatestCachedRevision(Repository repository, String version);
120+
118121
/**
119122
* Get the last modified times for all files and subdirectories in the
120123
* specified directory.

opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.opengrok.indexer.util.Executor;
5454

5555
import org.jetbrains.annotations.NotNull;
56+
import org.opengrok.indexer.util.Statistics;
5657

5758
/**
5859
* An interface for an external repository.
@@ -141,8 +142,7 @@ public String getRepoCommand() {
141142
* @return partial history for file
142143
* @throws HistoryException on error accessing the history
143144
*/
144-
History getHistory(File file, String sinceRevision)
145-
throws HistoryException {
145+
History getHistory(File file, String sinceRevision) throws HistoryException {
146146

147147
// If we want an incremental history update and get here, warn that
148148
// it may be slow.
@@ -361,17 +361,16 @@ protected String getRevisionForAnnotate(String history_revision) {
361361
*
362362
* @throws HistoryException on error
363363
*/
364-
final void createCache(HistoryCache cache, String sinceRevision)
365-
throws HistoryException {
364+
final void createCache(HistoryCache cache, String sinceRevision) throws HistoryException {
365+
366366
if (!isWorking()) {
367367
return;
368368
}
369369

370370
// If we don't have a directory parser, we can't create the cache
371371
// this way. Just give up and return.
372372
if (!hasHistoryForDirectories()) {
373-
LOGGER.log(
374-
Level.INFO,
373+
LOGGER.log(Level.INFO,
375374
"Skipping creation of history cache for {0}, since retrieval "
376375
+ "of history for directories is not implemented for this "
377376
+ "repository type.", getDirectoryName());
@@ -381,31 +380,45 @@ final void createCache(HistoryCache cache, String sinceRevision)
381380
File directory = new File(getDirectoryName());
382381

383382
History history;
384-
try {
383+
if (!(this instanceof RepositoryPerPartesHistory)) {
385384
history = getHistory(directory, sinceRevision);
386-
} catch (HistoryException he) {
387-
if (sinceRevision == null) {
388-
// Failed to get full history, so fail.
389-
throw he;
385+
finishCreateCache(cache, history);
386+
return;
387+
}
388+
389+
// To avoid storing complete History memory, split the work into multiple chunks.
390+
RepositoryPerPartesHistory repo = (RepositoryPerPartesHistory) this;
391+
List<String> boundaryChangesets = repo.getBoundaryChangesetIDs(sinceRevision);
392+
int cnt = 0;
393+
for (String tillRevision: boundaryChangesets) {
394+
Statistics stat = new Statistics();
395+
history = repo.getHistory(directory, sinceRevision, tillRevision);
396+
if (history.getHistoryEntries().size() == 0) {
397+
// TODO
398+
break;
390399
}
391-
// Failed to get partial history. This may have been caused
392-
// by changes in the revision numbers since the last update
393-
// (bug #14724) so we'll try to regenerate the cache from
394-
// scratch instead.
395-
LOGGER.log(Level.WARNING,
396-
"Failed to get partial history. Attempting to "
397-
+ "recreate the history cache from scratch.", he);
398-
history = null;
400+
401+
finishCreateCache(cache, history);
402+
sinceRevision = tillRevision;
403+
404+
stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''",
405+
++cnt, boundaryChangesets.size(), this.getDirectoryName()));
399406
}
400407

401-
if (sinceRevision != null && history == null) {
402-
// Failed to get partial history, now get full history instead.
403-
history = getHistory(directory);
404-
// Got full history successfully. Clear the history cache so that
405-
// we can recreate it from scratch.
406-
cache.clear(this);
408+
/*
409+
* Need to reset the latest cachedRevision as the last finishCreateCache() above
410+
* wrote the changeset ID of the last part.
411+
* TODO: probably not necessary now ?
412+
*/
413+
try {
414+
// TODO: does not work well if finishStore() failed
415+
cache.storeLatestCachedRevision(this, repo.determineCurrentVersionId());
416+
} catch (IOException e) {
417+
throw new HistoryException(e);
407418
}
419+
}
408420

421+
private void finishCreateCache(HistoryCache cache, History history) throws HistoryException {
409422
// We need to refresh list of tags for incremental reindex.
410423
RuntimeEnvironment env = RuntimeEnvironment.getInstance();
411424
if (env.isTagsEnabled() && this.hasFileBasedTags()) {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opengrok.indexer.history;
24+
25+
import java.io.File;
26+
import java.io.IOException;
27+
import java.util.List;
28+
29+
public abstract class RepositoryPerPartesHistory extends Repository {
30+
private static final long serialVersionUID = -3433255821312805064L;
31+
32+
abstract History getHistory(File directory, String sinceRevision, String tillRevision) throws HistoryException;
33+
34+
abstract List<String> getBoundaryChangesetIDs(String sinceRevision) throws HistoryException;
35+
36+
abstract String determineCurrentVersionId() throws IOException;
37+
}

opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import static org.junit.jupiter.api.Assertions.assertNull;
3131
import static org.junit.jupiter.api.Assertions.assertThrows;
3232
import static org.junit.jupiter.api.Assertions.assertTrue;
33+
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.GIT;
3334
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.MERCURIAL;
3435
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.SCCS;
3536
import static org.opengrok.indexer.condition.RepositoryInstalled.Type.SUBVERSION;
@@ -247,6 +248,24 @@ public void testStoreAndGetIncrementalTags() throws Exception {
247248
retrievedUpdatedHistoryMainC.getHistoryEntries(), false);
248249
}
249250

251+
/**
252+
* TODO
253+
* move this to RepositoryTest ?
254+
*/
255+
@Test
256+
@EnabledOnOs({OS.LINUX, OS.MAC, OS.SOLARIS, OS.AIX, OS.OTHER})
257+
@EnabledForRepository(GIT)
258+
public void testIncrementalStore() throws Exception {
259+
File reposRoot = new File(repositories.getSourceRoot(), "git");
260+
261+
Repository repo = RepositoryFactory.getRepository(reposRoot);
262+
History historyToStore = repo.getHistory(reposRoot);
263+
264+
repo.createCache(cache, null);
265+
266+
// TODO
267+
}
268+
250269
/**
251270
* Basic tests for the {@code store()} and {@code get()} methods.
252271
*/

0 commit comments

Comments
 (0)