diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionHistoryParser.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionHistoryParser.java index 150d21c114a..a77fcb1d125 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionHistoryParser.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionHistoryParser.java @@ -20,7 +20,7 @@ /* * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . - * Portions Copyright (c) 2020, Ric Harris . + * Portions Copyright (c) 2020, 2023, Ric Harris . */ package org.opengrok.indexer.history; @@ -32,8 +32,11 @@ import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -64,6 +67,8 @@ class SubversionHistoryParser implements Executor.StreamHandler { private static class Handler extends DefaultHandler2 { + private static final String COPYFROM_PATH = "copyfrom-path"; + /** * Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds. */ @@ -74,10 +79,12 @@ private static class Handler extends DefaultHandler2 { final int length; final List entries = new ArrayList<>(); final Set renamedFiles = new HashSet<>(); + final Map renamedToDirectoryRevisions = new HashMap<>(); final SubversionRepository repository; HistoryEntry entry; StringBuilder sb; - boolean isRenamed; + boolean isRenamedFile; + boolean isRenamedDir; Handler(String home, String prefix, int length, SubversionRepository repository) { this.home = home; @@ -87,19 +94,31 @@ private static class Handler extends DefaultHandler2 { sb = new StringBuilder(); } - List getRenamedFiles() { - return new ArrayList<>(renamedFiles); + Set getRenamedFiles() { + return renamedFiles; + } + + Map getRenamedDirectories() { + return renamedToDirectoryRevisions; } @Override public void startElement(String uri, String localName, String qname, Attributes attr) { - isRenamed = false; + isRenamedFile = false; + isRenamedDir = false; if ("logentry".equals(qname)) { entry = new HistoryEntry(); entry.setActive(true); entry.setRevision(attr.getValue("revision")); } else if ("path".equals(qname)) { - isRenamed = attr.getIndex("copyfrom-path") != -1; + if (attr.getIndex(COPYFROM_PATH) != -1) { + LOGGER.info("rename for:" + attr.getValue(COPYFROM_PATH) ); + if ("dir".equals(attr.getValue("kind"))) { + isRenamedDir = true; + } else { + isRenamedFile = true; + } + } } sb.setLength(0); } @@ -132,9 +151,12 @@ public void endElement(String uri, String localName, String qname) throws SAXExc // The same file names may be repeated in many commits, // so intern them to reduce the memory footprint. entry.addFile(path.intern()); - if (isRenamed) { + if (isRenamedFile) { renamedFiles.add(path.intern()); } + if (isRenamedDir) { + renamedToDirectoryRevisions.put(path.intern(), entry.getRevision()); + } } else { LOGGER.log(Level.FINER, "Skipping file outside repository: " + s); } @@ -209,10 +231,26 @@ History parse(File file, SubversionRepository repos, String sinceRevision, repos.removeAndVerifyOldestChangeset(entries, sinceRevision); } - return new History(entries, handler.getRenamedFiles()); + Set allRenamedFiles = findRenamedFilesFromDirectories(handler.getRenamedDirectories(), repos, cmdType); + allRenamedFiles.addAll(handler.getRenamedFiles()); + + return new History(entries, new ArrayList<>(allRenamedFiles)); } - /** + /** + * @return a set of files that were present in the directories at the given revisions. + */ + private Set findRenamedFilesFromDirectories(Map renamedDirectories, SubversionRepository repos, + CommandTimeoutType cmdType) { + + Set renamedFiles = new HashSet<>(); + for (Entry entry : renamedDirectories.entrySet()) { + renamedFiles.addAll(repos.getFilesInDirectoryAtRevision(entry.getKey(), entry.getValue(), cmdType)); + } + return renamedFiles; + } + + /** * Process the output from the log command and insert the HistoryEntries * into the history field. * diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionRepository.java index 83b5129e409..451ea4159f7 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/SubversionRepository.java @@ -20,17 +20,25 @@ /* * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . + * Portions Copyright (c) 2023, Ric Harris . */ package org.opengrok.indexer.history; +import java.io.BufferedReader; import java.io.File; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; + import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -41,6 +49,7 @@ import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.Executor; +import org.opengrok.indexer.util.Executor.StreamHandler; import org.opengrok.indexer.util.LazilyInstantiate; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -249,7 +258,7 @@ boolean getHistoryGet(OutputStream out, String parent, String basename, String r String filepath; try { - filepath = (new File(parent, basename)).getCanonicalPath(); + filepath = new File(parent, basename).getCanonicalPath(); } catch (IOException exp) { LOGGER.log(Level.SEVERE, "Failed to get canonical path: {0}", exp.getClass().toString()); @@ -282,6 +291,73 @@ boolean getHistoryGet(OutputStream out, String parent, String basename, String r return false; } + /** + * Get an executor to be used for retrieving the history log for the named + * file. + * + * @param dir The repository relative directory to retrieve file list for + * @param atRevision the revision number at which we want the directory contents + * @param cmdType command timeout type + * @return An Executor ready to be started + */ + private Executor getDirectoryListExecutor(final String dir, String atRevision, CommandTimeoutType cmdType) { + + List cmd = new ArrayList<>(); + ensureCommand(CMD_PROPERTY_KEY, CMD_FALLBACK); + cmd.add(RepoCommand); + cmd.add("ls"); + cmd.add(escapeFileName(dir)); + cmd.add("-r" + atRevision); + + return new Executor(cmd, new File(getDirectoryName()), + RuntimeEnvironment.getInstance().getCommandTimeout(cmdType)); + } + + + /** + * Provides a list of files that were in a directory at a given revision. + * This is useful for finding files that will need special renamed file history + * handling because they were in a directory when it was renamed. + * + * Note that this doesn't throw an exception even if the command was not completed + * because we will still be able to get the file history up to this point. + * + * @param directory the directory to check + * @param revision the revision to check at + * @param cmdType the timeout setting. + * @return the files that were in the directory at that revision + */ + Set getFilesInDirectoryAtRevision(String directory, String revision, + CommandTimeoutType cmdType) { + + Executor executor = getDirectoryListExecutor( + RuntimeEnvironment.getInstance().getSourceRootPath() + File.separator + directory, + revision, cmdType); + + Set files = new HashSet<>(); + + StreamHandler directoryLogStreamHandler = new StreamHandler() { + + @Override + public void processStream(InputStream in) throws IOException { + new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8)) + .lines() + .filter(s -> !s.isBlank()) + .map(s -> directory + File.separator + s) + .forEach(files::add); + } + }; + + int status = executor.exec(true, directoryLogStreamHandler); + if (status != 0) { + LOGGER.warning("Failed to get history for: \"" + directory + "\" Exit code: " + status); + } + LOGGER.info("log from directory / revision [" + directory + "] [" + revision + "]\n" + files); + + return files; + } + + @Override boolean hasHistoryForDirectories() { return true; diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java index 6a27d647936..42b5427c7e1 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java @@ -20,7 +20,7 @@ /* * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, 2020, Chris Fraire . - * Portions Copyright (c) 2020, Ric Harris . + * Portions Copyright (c) 2020, 2023, Ric Harris . */ package org.opengrok.indexer.history; @@ -48,6 +48,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.lang3.time.DateUtils; import org.eclipse.jgit.api.Git; @@ -60,6 +61,7 @@ import org.junit.jupiter.params.provider.ValueSource; import org.mockito.Mockito; import org.opengrok.indexer.condition.EnabledForRepository; +import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Filter; import org.opengrok.indexer.configuration.IgnoredNames; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -73,6 +75,8 @@ */ class FileHistoryCacheTest { + private static final String SUBVERSION_REPO_LOC = "subversion"; + private static final String SVN_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; private final RuntimeEnvironment env = RuntimeEnvironment.getInstance(); @@ -633,7 +637,7 @@ void testRenamedFilePlusChangesBranched() throws Exception { void testMultipleRenamedFiles() throws Exception { createSvnRepository(); - File reposRoot = new File(repositories.getSourceRoot(), "subversion"); + File reposRoot = new File(repositories.getSourceRoot(), SUBVERSION_REPO_LOC); History updatedHistory; // The test expects support for renamed files. @@ -645,7 +649,7 @@ void testMultipleRenamedFiles() throws Exception { cache.store(historyToStore, repo); // Check complete list of history entries for the renamed file. - File testFile = new File(reposRoot.toString() + File.separatorChar + "FileZ.txt"); + File testFile = new File(reposRoot.toString(), "FileZ.txt"); updatedHistory = cache.get(testFile, repo, false); assertEquals(3, updatedHistory.getHistoryEntries().size()); @@ -677,6 +681,82 @@ void testMultipleRenamedFiles() throws Exception { assertSameEntries(histConstruct.getHistoryEntries(), updatedHistory.getHistoryEntries(), false); } + + /** + * For an Subversion repository, verify we can get a list of files that were in a directory + * at the point it was renamed. + */ + @EnabledForRepository(SUBVERSION) + @Test + void testSubversionFilesInDirectoryLog() throws Exception { + createSvnRepository(); + + File reposRoot = new File(repositories.getSourceRoot(), SUBVERSION_REPO_LOC); + + // The test expects support for renamed files. + env.setHandleHistoryOfRenamedFiles(true); + + // Generate history index. + SubversionRepository svnRepo = (SubversionRepository) RepositoryFactory.getRepository(reposRoot); + Set files = svnRepo.getFilesInDirectoryAtRevision( + Path.of(SUBVERSION_REPO_LOC, "renamedFolder").toString(), "12", CommandTimeoutType.INDEXER); + assertEquals( + Set.of(Path.of(SUBVERSION_REPO_LOC, "renamedFolder", "FileInRenamedFolder.txt").toString()), files); + } + + + /** + * Make sure produces correct history for a file which was in a directory that was renamed. + */ + @EnabledForRepository(SUBVERSION) + @Test + void testRenamedDirectory() throws Exception { + createSvnRepository(); + + File reposRoot = new File(repositories.getSourceRoot(), SUBVERSION_REPO_LOC); + History updatedHistory; + + // The test expects support for renamed files. + env.setHandleHistoryOfRenamedFiles(true); + + // Generate history index. + Repository repo = RepositoryFactory.getRepository(reposRoot); + History historyToStore = repo.getHistory(reposRoot); + cache.store(historyToStore, repo); + + // Check complete list of history entries for the file in the renamed folder. + File testFile = Path.of(reposRoot.toString(), "renamedFolder", "FileInRenamedFolder.txt").toFile(); + updatedHistory = cache.get(testFile, repo, false); + assertEquals(3, updatedHistory.getHistoryEntries().size()); + + HistoryEntry e0 = new HistoryEntry( + "14", + DateUtils.parseDate("2021-03-02T11:34:28.030Z", SVN_DATE_FORMAT), + "RichardH", + "Update to renamed file.", + true); + HistoryEntry e1 = new HistoryEntry( + "13", + DateUtils.parseDate("2021-03-02T08:54:54.693Z", SVN_DATE_FORMAT), + "RichardH", + "rename folder", + true); + HistoryEntry e2 = new HistoryEntry( + "12", + DateUtils.parseDate("2021-03-02T08:54:30.615Z", SVN_DATE_FORMAT), + "RichardH", + "added file to new folder", + true); + + History histConstruct = new History(); + LinkedList entriesConstruct = new LinkedList<>(); + entriesConstruct.add(e0); + entriesConstruct.add(e1); + entriesConstruct.add(e2); + histConstruct.setHistoryEntries(entriesConstruct); + assertSameEntries(histConstruct.getHistoryEntries(), updatedHistory.getHistoryEntries(), false); + } + private void createSvnRepository() throws Exception { var svnLog = FileHistoryCacheTest.class.getResource("/history/svnlog.dump"); Path tempDir = Files.createTempDirectory("opengrok"); @@ -697,7 +777,7 @@ private void createSvnRepository() throws Exception { assertEquals(0, svnLoadRepoFromDumpProcess.waitFor()); var svnCheckoutProcess = new ProcessBuilder("svn", "checkout", Path.of(repo).toUri().toString(), - Path.of(repositories.getSourceRoot()).resolve("subversion").toString()) + Path.of(repositories.getSourceRoot()).resolve(SUBVERSION_REPO_LOC).toString()) .start(); assertEquals(0, svnCheckoutProcess.waitFor()); } @@ -784,7 +864,7 @@ void testRenamedFileHistoryWithPerPartes(int maxCount) throws Exception { void testRenamedFile() throws Exception { createSvnRepository(); - File reposRoot = new File(repositories.getSourceRoot(), "subversion"); + File reposRoot = new File(repositories.getSourceRoot(), SUBVERSION_REPO_LOC); History updatedHistory; // The test expects support for renamed files. @@ -796,8 +876,7 @@ void testRenamedFile() throws Exception { cache.store(historyToStore, repo); // Check complete list of history entries for the renamed file. - File testFile = new File(reposRoot.toString() + File.separatorChar - + "subfolder" + File.separatorChar + "TestFileRenamedAgain.txt"); + File testFile = Path.of(reposRoot.toString(), "subfolder", "TestFileRenamedAgain.txt").toFile(); updatedHistory = cache.get(testFile, repo, false); assertEquals(4, updatedHistory.getHistoryEntries().size()); diff --git a/opengrok-indexer/src/test/resources/history/svnlog.dump b/opengrok-indexer/src/test/resources/history/svnlog.dump index cf6673d6f1f..55265dfccf9 100644 --- a/opengrok-indexer/src/test/resources/history/svnlog.dump +++ b/opengrok-indexer/src/test/resources/history/svnlog.dump @@ -319,3 +319,117 @@ Node-path: FileB.txt Node-action: delete +Revision-number: 11 +Prop-content-length: 119 +Content-length: 119 + +K 10 +svn:author +V 8 +RichardH +K 8 +svn:date +V 27 +2021-03-02T08:53:07.271547Z +K 7 +svn:log +V 16 +add a new folder +PROPS-END + +Node-path: folderToRename +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Revision-number: 12 +Prop-content-length: 127 +Content-length: 127 + +K 10 +svn:author +V 8 +RichardH +K 8 +svn:date +V 27 +2021-03-02T08:54:30.615600Z +K 7 +svn:log +V 24 +added file to new folder +PROPS-END + +Node-path: folderToRename/FileInRenamedFolder.txt +Node-kind: file +Node-action: add +Text-content-md5: 7dbc9f235835a899880f3e9a7ae1f393 +Text-content-sha1: b2c9fc8a263b9ffce7315ac747c7d03fc2d22ef3 +Prop-content-length: 10 +Text-content-length: 14 +Content-length: 24 + +PROPS-END +A sample file. + +Revision-number: 13 +Prop-content-length: 116 +Content-length: 116 + +K 10 +svn:author +V 8 +RichardH +K 8 +svn:date +V 27 +2021-03-02T08:54:54.693901Z +K 7 +svn:log +V 13 +rename folder +PROPS-END + +Node-path: renamedFolder +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 12 +Node-copyfrom-path: folderToRename + + +Node-path: folderToRename +Node-action: delete + + +Revision-number: 14 +Prop-content-length: 126 +Content-length: 126 + +K 10 +svn:author +V 8 +RichardH +K 8 +svn:date +V 27 +2021-03-02T11:34:28.030163Z +K 7 +svn:log +V 23 +Update to renamed file. +PROPS-END + +Node-path: renamedFolder/FileInRenamedFolder.txt +Node-kind: file +Node-action: change +Text-content-md5: ccf9163ea7a31346cbeea040c3cbb4a5 +Text-content-sha1: b9dbd526251d3309741de3d1bf037ac386738788 +Text-content-length: 23 +Content-length: 23 + +A sample file. Amended. +