Skip to content

Fix svn history for files which were in a renamed directory #4172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
/*
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
* Portions Copyright (c) 2020, Ric Harris <[email protected]>.
* Portions Copyright (c) 2020, 2023, Ric Harris <[email protected]>.
*/
package org.opengrok.indexer.history;

Expand All @@ -32,8 +32,11 @@
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -64,6 +67,8 @@ class SubversionHistoryParser implements Executor.StreamHandler {

private static class Handler extends DefaultHandler2 {

private static final String COPYFROM_PATH = "copyfrom-path";

/**
* Example of the longest date format that we should accept - SimpleDateFormat cannot cope with micro/nano seconds.
*/
Expand All @@ -74,10 +79,12 @@ private static class Handler extends DefaultHandler2 {
final int length;
final List<HistoryEntry> entries = new ArrayList<>();
final Set<String> renamedFiles = new HashSet<>();
final Map<String, String> renamedToDirectoryRevisions = new HashMap<>();
final SubversionRepository repository;
HistoryEntry entry;
StringBuilder sb;
boolean isRenamed;
boolean isRenamedFile;
boolean isRenamedDir;

Handler(String home, String prefix, int length, SubversionRepository repository) {
this.home = home;
Expand All @@ -87,19 +94,31 @@ private static class Handler extends DefaultHandler2 {
sb = new StringBuilder();
}

List<String> getRenamedFiles() {
return new ArrayList<>(renamedFiles);
Set<String> getRenamedFiles() {
return renamedFiles;
}

Map<String, String> getRenamedDirectories() {
return renamedToDirectoryRevisions;
}

@Override
public void startElement(String uri, String localName, String qname, Attributes attr) {
isRenamed = false;
isRenamedFile = false;
isRenamedDir = false;
if ("logentry".equals(qname)) {
entry = new HistoryEntry();
entry.setActive(true);
entry.setRevision(attr.getValue("revision"));
} else if ("path".equals(qname)) {
isRenamed = attr.getIndex("copyfrom-path") != -1;
if (attr.getIndex(COPYFROM_PATH) != -1) {
LOGGER.info("rename for:" + attr.getValue(COPYFROM_PATH) );
if ("dir".equals(attr.getValue("kind"))) {
isRenamedDir = true;
} else {
isRenamedFile = true;
}
}
}
sb.setLength(0);
}
Expand Down Expand Up @@ -132,9 +151,12 @@ public void endElement(String uri, String localName, String qname) throws SAXExc
// The same file names may be repeated in many commits,
// so intern them to reduce the memory footprint.
entry.addFile(path.intern());
if (isRenamed) {
if (isRenamedFile) {
renamedFiles.add(path.intern());
}
if (isRenamedDir) {
renamedToDirectoryRevisions.put(path.intern(), entry.getRevision());
}
} else {
LOGGER.log(Level.FINER, "Skipping file outside repository: " + s);
}
Expand Down Expand Up @@ -209,10 +231,26 @@ History parse(File file, SubversionRepository repos, String sinceRevision,
repos.removeAndVerifyOldestChangeset(entries, sinceRevision);
}

return new History(entries, handler.getRenamedFiles());
Set<String> allRenamedFiles = findRenamedFilesFromDirectories(handler.getRenamedDirectories(), repos, cmdType);
allRenamedFiles.addAll(handler.getRenamedFiles());

return new History(entries, new ArrayList<>(allRenamedFiles));
}

/**
/**
* @return a set of files that were present in the directories at the given revisions.
*/
private Set<String> findRenamedFilesFromDirectories(Map<String, String> renamedDirectories, SubversionRepository repos,
CommandTimeoutType cmdType) {

Set<String> renamedFiles = new HashSet<>();
for (Entry<String, String> entry : renamedDirectories.entrySet()) {
renamedFiles.addAll(repos.getFilesInDirectoryAtRevision(entry.getKey(), entry.getValue(), cmdType));
}
return renamedFiles;
}

/**
* Process the output from the log command and insert the HistoryEntries
* into the history field.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,25 @@
/*
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2020, Chris Fraire <[email protected]>.
* Portions Copyright (c) 2023, Ric Harris <[email protected]>.
*/
package org.opengrok.indexer.history;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
Expand All @@ -41,6 +49,7 @@
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.util.Executor;
import org.opengrok.indexer.util.Executor.StreamHandler;
import org.opengrok.indexer.util.LazilyInstantiate;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
Expand Down Expand Up @@ -249,7 +258,7 @@ boolean getHistoryGet(OutputStream out, String parent, String basename, String r

String filepath;
try {
filepath = (new File(parent, basename)).getCanonicalPath();
filepath = new File(parent, basename).getCanonicalPath();
} catch (IOException exp) {
LOGGER.log(Level.SEVERE,
"Failed to get canonical path: {0}", exp.getClass().toString());
Expand Down Expand Up @@ -282,6 +291,73 @@ boolean getHistoryGet(OutputStream out, String parent, String basename, String r
return false;
}

/**
* Get an executor to be used for retrieving the history log for the named
* file.
*
* @param dir The repository relative directory to retrieve file list for
* @param atRevision the revision number at which we want the directory contents
* @param cmdType command timeout type
* @return An Executor ready to be started
*/
private Executor getDirectoryListExecutor(final String dir, String atRevision, CommandTimeoutType cmdType) {

List<String> cmd = new ArrayList<>();
ensureCommand(CMD_PROPERTY_KEY, CMD_FALLBACK);
cmd.add(RepoCommand);
cmd.add("ls");
cmd.add(escapeFileName(dir));
cmd.add("-r" + atRevision);

return new Executor(cmd, new File(getDirectoryName()),
RuntimeEnvironment.getInstance().getCommandTimeout(cmdType));
}


/**
* Provides a list of files that were in a directory at a given revision.
* This is useful for finding files that will need special renamed file history
* handling because they were in a directory when it was renamed.
*
* Note that this doesn't throw an exception even if the command was not completed
* because we will still be able to get the file history up to this point.
*
* @param directory the directory to check
* @param revision the revision to check at
* @param cmdType the timeout setting.
* @return the files that were in the directory at that revision
*/
Set<String> getFilesInDirectoryAtRevision(String directory, String revision,
CommandTimeoutType cmdType) {

Executor executor = getDirectoryListExecutor(
RuntimeEnvironment.getInstance().getSourceRootPath() + File.separator + directory,
revision, cmdType);

Set<String> files = new HashSet<>();

StreamHandler directoryLogStreamHandler = new StreamHandler() {

@Override
public void processStream(InputStream in) throws IOException {
new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8))
.lines()
.filter(s -> !s.isBlank())
.map(s -> directory + File.separator + s)
.forEach(files::add);
}
};

int status = executor.exec(true, directoryLogStreamHandler);
if (status != 0) {
LOGGER.warning("Failed to get history for: \"" + directory + "\" Exit code: " + status);
}
LOGGER.info("log from directory / revision [" + directory + "] [" + revision + "]\n" + files);

return files;
}


@Override
boolean hasHistoryForDirectories() {
return true;
Expand Down
Loading