Skip to content

Commit

Permalink
Made ignoring files the default instead of adding unrestricted.
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Dec 8, 2024
1 parent 455cf5c commit 28d6484
Show file tree
Hide file tree
Showing 13 changed files with 176 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class FileUploadInclusionPredicate implements Predicate<File> {
@Override
public boolean evaluate(File file) {
return editFiles != null && (restrictedFiles ? isRestricted(file) : notRestricted(file))
&& notReplaced(file) && notIgnored(file);
&& notReplaced(file);
}

private boolean notReplaced(File file) {
Expand All @@ -45,8 +45,4 @@ private boolean isRestricted(File file) {
private boolean notRestricted(File file) {
return editFiles.getAddUnrestrictedFiles().contains(dataDir.relativize(file.toPath()).toString());
}

private boolean notIgnored(File file) {
return !editFiles.getIgnoreFiles().contains(dataDir.relativize(file.toPath()).toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public interface DansBagMappingService {

Dataset getDatasetMetadataFromDansDeposit(DansBagDeposit dansDeposit);

EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit);
EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset);

EditPermissions getEditPermissionsFromDansDeposit(DansBagDeposit dansDeposit);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ public Dataset getDatasetMetadataFromDansDeposit(DansBagDeposit dansDeposit) {
}

@Override
public EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit) {
public EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit, String updatesDataset) {
var files = getFileInfo(dansDeposit);
var dateAvailable = getDateAvailable(dansDeposit);
return new EditFilesComposer(files, dateAvailable, fileExclusionPattern, embargoExclusions).composeEditFiles();
if (updatesDataset == null) {
return new EditFilesComposer(files, dateAvailable, fileExclusionPattern, embargoExclusions).composeEditFiles();
}
else {
return new EditFilesComposerForUpdate(files, dateAvailable, updatesDataset, fileExclusionPattern, embargoExclusions, dataverseService).composeEditFiles();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
@AllArgsConstructor
public class DansDepositConverter {
private final DansBagDeposit dansDeposit;
private final String updatesDataset;
private final DansBagMappingService mappingService;
private final YamlService yamlService;

Expand All @@ -37,7 +38,7 @@ public void run() throws IOException {
var dataset = mappingService.getDatasetMetadataFromDansDeposit(dansDeposit);
yamlService.writeYaml(dataset, dansDeposit.getBagDir().resolve("dataset.yml"));

var editFiles = mappingService.getEditFilesFromDansDeposit(dansDeposit);
var editFiles = mappingService.getEditFilesFromDansDeposit(dansDeposit, updatesDataset);
yamlService.writeYaml(new EditFilesRoot(editFiles), dansDeposit.getBagDir().resolve("edit-files.yml"));

var editPermissions = mappingService.getEditPermissionsFromDansDeposit(dansDeposit);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public boolean convertDansDepositIfNeeded() {
ingestDataverseIngestDeposit.updateProperties(Map.of("updatesDataset", updatesDataset));
}
dansDeposit = dansBagMappingService.readDansDeposit(ingestDataverseIngestDeposit.getLocation());
new DansDepositConverter(dansDeposit, dansBagMappingService, yamlService).run();
new DansDepositConverter(dansDeposit, updatesDataset, dansBagMappingService, yamlService).run();
log.info("Conversion successful");
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ public EditFiles composeEditFiles() {
var ignoredFiles = getFilesToIgnore(pathFileInfoMap);

var editFiles = new EditFiles();
editFiles.setIgnoreFiles(ignoredFiles);
pathFileInfoMap = removeIgnoredFiles(pathFileInfoMap, ignoredFiles);

editFiles.setAutoRenameFiles(getAutoRenamedFiles(renamedFiles));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public EditFiles composeEditFiles() {
var filesToReplace = getFilesToReplace(pathFileInfoMap, fileReplacementCandidates);
log.debug("filesToReplace = {}", filesToReplace);
// TODO: check if we can do away with the IDs
editFiles.setReplaceFiles(filesToReplace.values().stream().map(FileInfo::getPath).map(Path::toString).collect(Collectors.toList()));
editFiles.setReplaceFiles(filesToReplace.values().stream().map(FileInfo::getPath).map(Path::toString).toList());

/*
* To find the files to delete we start from the paths in the deposit payload. In principle, these paths are remaining, so should NOT be deleted.
Expand All @@ -124,7 +124,7 @@ public EditFiles composeEditFiles() {
* After the movements have been performed, which paths are occupied? We start from the paths of the latest version (pathToFileMetaInLatestVersion.keySet)
*
* The old paths of the moved files (oldToNewPathMovedFiles.keySet) are no longer occupied, so they must be subtracted. This is important in the case where
* a deposit renames/moves a file (leaving the checksum unchanges) but provides a new file for the vacated path.
* a deposit renames/moves a file (leaving the checksum unchanged) but provides a new file for the vacated path.
*
* The paths of the deleted files (pathsToDelete) are no longer occupied, so must be subtracted. (It is not strictly necessary for the calculation
* of pathsToAdd, but done anyway to keep the logic consistent.)
Expand All @@ -144,21 +144,20 @@ public EditFiles composeEditFiles() {
.toList().stream()
.filter(f -> f.getMetadata().getRestricted())
.toList();
editFiles.setAddRestrictedFiles(restrictedFilesToAdd.stream().map(FileInfo::getPath).map(Path::toString).collect(Collectors.toList()));
editFiles.setAddRestrictedFiles(restrictedFilesToAdd.stream().map(FileInfo::getPath).map(Path::toString).toList());

// todo: set ignoredFiles
var unrestrictedFilesToAdd = pathsToAdd.stream()
.map(pathFileInfoMap::get)
.toList().stream()
.filter(f -> !f.getMetadata().getRestricted())
.toList();
editFiles.setAddUnrestrictedFiles(unrestrictedFilesToAdd.stream().map(FileInfo::getPath).map(Path::toString).toList());

// todo: embargoes

return editFiles;
}

private Set<Integer> getFileDeletions(Set<Path> pathsToDelete, Map<Path, FileMeta> pathToFileInfoInLatestVersion) {
return pathsToDelete.stream()
.map(p -> pathToFileInfoInLatestVersion.get(p).getDataFile().getId())
.collect(Collectors.toSet());
}

private Map<Integer, FileInfo> getFilesToReplace(Map<Path, FileInfo> pathToFileInfo, Map<Path, FileMeta> fileReplacementCandidates) {

var intersection = SetUtils.intersection(pathToFileInfo.keySet(), fileReplacementCandidates.keySet());
Expand All @@ -172,9 +171,9 @@ private Map<Integer, FileInfo> getFilesToReplace(Map<Path, FileInfo> pathToFileI
}

/**
* Creating a mapping for moving files to a new location. To determine this, the file needs to be unique in the old and the new version, because its checksum is used to locate it. Files that
* occur multiple times in either the old or the new version cannot be moved in this way. They will appear to have been deleted in the old version and added in the new. This has the same net
* result, except that the "Changes" overview in Dataverse does not record that the file was effectively moved.
* Creating a mapping for moving files to a new location. To determine this, the file needs to be unique in the old and the new version, because its checksum is used to locate it. Files that occur
* multiple times in either the old or the new version cannot be moved in this way. They will appear to have been deleted in the old version and added in the new. This has the same net result,
* except that the "Changes" overview in Dataverse does not record that the file was effectively moved.
*
* @param pathToFileMetaInLatestVersion map from path to file metadata in the old version
* @param pathToFileInfo map from path to file info in the new version (i.e. the deposit).
Expand Down Expand Up @@ -206,11 +205,11 @@ Map<Path, Path> getOldToNewPathOfFilesToMove(Map<Path, FileMeta> pathToFileMetaI

private Map<String, Path> getChecksumsToPathOfNonDuplicateFiles(Map<Path, String> pathToChecksum) {
// inverse map first
var inversed = pathToChecksum.entrySet().stream()
var inverse = pathToChecksum.entrySet().stream()
.collect(Collectors.groupingBy(Map.Entry::getValue, Collectors.mapping(Map.Entry::getKey, Collectors.toList())));

// filter out items with 0 or more than 1 items
return inversed.entrySet().stream()
// filter out items with 0 or more than 1 item
return inverse.entrySet().stream()
.filter(item -> item.getValue().size() == 1)
.collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get(0)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public class EditFiles {
private List<String> addUnrestrictedFiles = List.of();
private List<String> addRestrictedFiles = List.of();
private List<FromTo> moveFiles = List.of();
private List<String> ignoreFiles = List.of();
// private List<String> ignoreFiles = List.of();
private List<FromTo> autoRenameFiles = List.of();
private List<FileMeta> updateFileMetas = List.of();
private List<AddEmbargo> addEmbargoes = List.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,32 +33,6 @@ public void setUp() {
editFiles = new EditFiles();
}

@Test
public void ignored_files_are_skipped_for_unrestricted_file_upload() throws Exception {
// Given
editFiles.setIgnoreFiles(List.of("file1"));
var fileUploadInclusionPredicate = new FileUploadInclusionPredicate(editFiles, dataDir.toPath(), false);

// When
var result = fileUploadInclusionPredicate.evaluate(new File("dataDir/file1"));

// Then
assertThat(result).isFalse();
}

@Test
public void ignored_files_are_skipped_for_restricted_file_upload() throws Exception {
// Given
editFiles.setIgnoreFiles(List.of("file1"));
var fileUploadInclusionPredicate = new FileUploadInclusionPredicate(editFiles, dataDir.toPath(), true);

// When
var result = fileUploadInclusionPredicate.evaluate(new File("dataDir/file1"));

// Then
assertThat(result).isFalse();
}

@Test
public void restricted_files_are_included_for_restricted_file_upload() throws Exception {
// Given
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void run_converts_dans_sword_all_mappings_example_to_dataverse_ingest_dep
Mockito.when(dataverseServiceMock.getUserById(Mockito.anyString())).thenReturn(Optional.of(authenticatedUser));

// When
new DansDepositConverter(deposit, mappingService, yamlService).run();
new DansDepositConverter(deposit, null, mappingService, yamlService).run();

// Then
assertThat(deposit.getBagDir().resolve("dataset.yml")).exists();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

import nl.knaw.dans.dvingest.core.bagprocessor.DataversePath;
import nl.knaw.dans.dvingest.core.dansbag.deposit.FileInfo;
import nl.knaw.dans.dvingest.core.yaml.EditFiles;
import nl.knaw.dans.lib.dataverse.model.file.Checksum;
import nl.knaw.dans.lib.dataverse.model.file.DataFile;
import nl.knaw.dans.lib.dataverse.model.file.FileMeta;

import java.nio.file.Path;
Expand All @@ -25,6 +28,8 @@
import java.util.List;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;

public class EditFilesComposerFixture {
protected static final Instant inThePast = Instant.parse("2010-01-01T00:00:00Z");
protected final Instant inTheFuture = Instant.now().plus(1, ChronoUnit.DAYS);
Expand Down Expand Up @@ -82,4 +87,45 @@ protected FileInfo file(String path, String checksum, boolean restricted, String
protected void add(Map<Path, FileInfo> map, FileInfo fileInfo) {
map.put(fileInfo.getPath(), fileInfo);
}

protected void assertEmptyFieldsExcept(EditFiles editFiles, String... except) {
List<String> exceptList = List.of(except);
if (!exceptList.contains("addRestrictedFiles")) {
assertThat(editFiles.getAddRestrictedFiles()).as("expected addRestrictedFiles to remain empty").isEmpty();
}
if (!exceptList.contains("addUnrestrictedFiles")) {
assertThat(editFiles.getAddUnrestrictedFiles()).as("expected addUnrestrictedFiles to remain empty").isEmpty();
}
if (!exceptList.contains("autoRenameFiles")) {
assertThat(editFiles.getAutoRenameFiles()).as("expected autoRenameFiles to remain empty").isEmpty();
}
if (!exceptList.contains("updateFileMetas")) {
assertThat(editFiles.getUpdateFileMetas()).as("expected updateFileMetas to remain empty").isEmpty();
}
if (!exceptList.contains("addEmbargoes")) {
assertThat(editFiles.getAddEmbargoes()).as("expected addEmbargoes to remain empty").isEmpty();
}
if (!exceptList.contains("deleteFiles")) {
assertThat(editFiles.getDeleteFiles()).as("expected deleteFiles to remain empty").isEmpty();
}
if (!exceptList.contains("moveFiles")) {
assertThat(editFiles.getMoveFiles()).as("expected moveFiles to remain empty").isEmpty();
}
}

protected FileMeta fileMeta(String path, String checksum) {
var fileMeta = new FileMeta();
var dataversePath = new DataversePath(path);
fileMeta.setLabel(dataversePath.getLabel());
fileMeta.setDirectoryLabel(dataversePath.getDirectoryLabel());
var dataFile = new DataFile();
var cs = new Checksum();
cs.setType("SHA-1");
cs.setValue(checksum);
dataFile.setChecksum(cs);
dataFile.setFilename(dataversePath.getLabel());
// No directoryLabel?
fileMeta.setDataFile(dataFile);
return fileMeta;
}
}
Loading

0 comments on commit 28d6484

Please sign in to comment.