Skip to content

Commit

Permalink
Refactored EditFilesComposer for easier unit testing.
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Dec 8, 2024
1 parent 46659aa commit d973557
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDeposit;
import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDepositReader;
import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDepositReaderImpl;
import nl.knaw.dans.dvingest.core.dansbag.deposit.FileInfo;
import nl.knaw.dans.dvingest.core.dansbag.exception.InvalidDepositException;
import nl.knaw.dans.dvingest.core.dansbag.mapper.DepositToDvDatasetMetadataMapper;
import nl.knaw.dans.dvingest.core.dansbag.mapper.mapping.FileElement;
import nl.knaw.dans.dvingest.core.dansbag.xml.XPathEvaluator;
import nl.knaw.dans.dvingest.core.dansbag.xml.XmlReader;
import nl.knaw.dans.dvingest.core.dansbag.xml.XmlReaderImpl;
import nl.knaw.dans.dvingest.core.service.DataverseService;
Expand All @@ -42,9 +45,17 @@
import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Slf4j
public class DansBagMappingServiceImpl implements DansBagMappingService {
Expand Down Expand Up @@ -133,7 +144,9 @@ public Dataset getDatasetMetadataFromDansDeposit(DansBagDeposit dansDeposit) {

@Override
public EditFiles getEditFilesFromDansDeposit(DansBagDeposit dansDeposit) {
return new EditFilesComposer(dansDeposit, fileExclusionPattern, embargoExclusions).composeEditFiles();
var files = getFileInfo(dansDeposit);
var dateAvailable = getDateAvailable(dansDeposit);
return new EditFilesComposer(files, dateAvailable, fileExclusionPattern, embargoExclusions).composeEditFiles();
}

@Override
Expand All @@ -156,6 +169,49 @@ public String packageOriginalMetadata(DansBagDeposit dansDeposit) throws IOExcep
return zipFile.toString();
}

// todo: move to mapping package
private Map<Path, FileInfo> getFileInfo(DansBagDeposit dansDeposit) {
var files = FileElement.pathToFileInfo(dansDeposit, false); // TODO: handle migration case

return files.entrySet().stream()
.map(entry -> {
// relativize the path
var bagPath = entry.getKey();
var fileInfo = entry.getValue();
var newKey = Path.of("data").relativize(bagPath);

return Map.entry(newKey, fileInfo);
})
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

// todo: move to mapping package
private Instant getDateAvailable(DansBagDeposit dansBagDeposit) {
return XPathEvaluator.strings(dansBagDeposit.getDdm(), "/ddm:DDM/ddm:profile/ddm:available")
.map(DansBagMappingServiceImpl::parseDate)
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Deposit without a ddm:available element"));
}

// todo: move to util class
private static Instant parseDate(String value) {
try {
log.debug("Trying to parse {} as LocalDate", value);
return LocalDate.parse(value).atStartOfDay(ZoneId.systemDefault()).toInstant();
}
catch (DateTimeParseException e) {
try {
log.debug("Trying to parse {} as ZonedDateTime", value);
return ZonedDateTime.parse(value).toInstant();
}
catch (DateTimeParseException ee) {
log.debug("Trying to parse {} as LocalDateTime", value);
var id = ZoneId.systemDefault().getRules().getOffset(Instant.now());
return LocalDateTime.parse(value).toInstant(id);
}
}
}

Optional<String> getDateOfDeposit(DansBagDeposit dansDeposit) {
if (dansDeposit.isUpdate()) {
return Optional.empty(); // See for implementation CIT025B in DatasetUpdater
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,10 @@
package nl.knaw.dans.dvingest.core.dansbag;

import lombok.AllArgsConstructor;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import nl.knaw.dans.dvingest.core.bagprocessor.DataversePath;
import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDeposit;
import nl.knaw.dans.dvingest.core.dansbag.deposit.FileInfo;
import nl.knaw.dans.dvingest.core.dansbag.mapper.mapping.FileElement;
import nl.knaw.dans.dvingest.core.dansbag.xml.XPathEvaluator;
import nl.knaw.dans.dvingest.core.yaml.AddEmbargo;
import nl.knaw.dans.dvingest.core.yaml.EditFiles;
import nl.knaw.dans.dvingest.core.yaml.FromTo;
Expand All @@ -30,11 +28,6 @@
import java.nio.file.Path;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
import java.util.Date;
import java.util.List;
import java.util.Map;
Expand All @@ -48,15 +41,20 @@
@Slf4j
@AllArgsConstructor
public class EditFilesComposer {
protected final DansBagDeposit dansDeposit;
private final Pattern fileExclusionPattern;
private final List<String> embargoExclusions;
private static final SimpleDateFormat yyyymmddFormat = new SimpleDateFormat("yyyy-MM-dd");
protected static final SimpleDateFormat yyyymmddFormat = new SimpleDateFormat("yyyy-MM-dd");

@NonNull
protected final Map<Path, FileInfo> files;
@NonNull
protected final Instant dateAvailable;

protected final Pattern fileExclusionPattern;
@NonNull
protected final List<String> embargoExclusions;

public EditFiles composeEditFiles() {
var pathFileInfoMap = getFileInfo(dansDeposit);
var pathFileInfoMap = files;
var renamedFiles = getAutoRenameMap(pathFileInfoMap);
init(renamedFiles);
var ignoredFiles = getFilesToIgnore(pathFileInfoMap);

var editFiles = new EditFiles();
Expand All @@ -66,11 +64,7 @@ public EditFiles composeEditFiles() {
editFiles.setAutoRenameFiles(getAutoRenamedFiles(renamedFiles));
editFiles.setAddRestrictedFiles(getRestrictedFilesToAdd(pathFileInfoMap));
editFiles.setUpdateFileMetas(getUpdatedFileMetas(pathFileInfoMap));
editFiles.setDeleteFiles(getDeleteFiles(pathFileInfoMap));
editFiles.setMoveFiles(getFileMovements(pathFileInfoMap));
editFiles.setReplaceFiles(getReplacedFiles(pathFileInfoMap));

var dateAvailable = getDateAvailable(dansDeposit);
var filePathsToEmbargo = getEmbargoedFiles(pathFileInfoMap, dateAvailable);
if (!filePathsToEmbargo.isEmpty()) {
var addEmbargo = new AddEmbargo();
Expand All @@ -81,14 +75,6 @@ public EditFiles composeEditFiles() {
return editFiles;
}

protected List<String> getReplacedFiles(Map<Path, FileInfo> pathFileInfoMap) {
return List.of();
}

protected void init(Map<String, String> renamedFiles) {
// do nothing
}

/**
* Get the files that should not be processed by the ingest service.
*
Expand Down Expand Up @@ -130,26 +116,6 @@ protected List<FileMeta> getUpdatedFileMetas(Map<Path, FileInfo> files) {
.toList();
}

/**
* Get the files that should be deleted.
*
* @param files the file infos found in files.xml
* @return a list of file paths that should be deleted
*/
protected List<String> getDeleteFiles(Map<Path, FileInfo> files) {
return List.of();
}

/**
* Get the files that should be moved.
*
* @param files the file infos found in files.xml
* @return a list of FromTo objects that specify the files to move and their new location
*/
protected List<FromTo> getFileMovements(Map<Path, FileInfo> files) {
return List.of();
}

private List<Path> getEmbargoedFiles(Map<Path, FileInfo> files, Instant dateAvailable) {
var now = Instant.now();
if (dateAvailable.isAfter(now)) {
Expand All @@ -162,21 +128,6 @@ private List<Path> getEmbargoedFiles(Map<Path, FileInfo> files, Instant dateAvai
}
}

protected Map<Path, FileInfo> getFileInfo(DansBagDeposit dansDeposit) {
var files = FileElement.pathToFileInfo(dansDeposit, false); // TODO: handle migration case

return files.entrySet().stream()
.map(entry -> {
// relativize the path
var bagPath = entry.getKey();
var fileInfo = entry.getValue();
var newKey = Path.of("data").relativize(bagPath);

return Map.entry(newKey, fileInfo);
})
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

private boolean hasAttributes(FileMeta fileMeta) {
return (fileMeta.getCategories() != null && !fileMeta.getCategories().isEmpty()) ||
(fileMeta.getDescription() != null && !fileMeta.getDescription().isBlank());
Expand All @@ -200,31 +151,4 @@ protected Map<String, String> getAutoRenameMap(Map<Path, FileInfo> files) {
.collect(Collectors.toMap(entry -> entry.getKey().toString(),
entry -> new DataversePath(entry.getValue().getMetadata().getDirectoryLabel(), entry.getValue().getMetadata().getLabel()).toString()));
}

// TODO: move to mapping package
private Instant getDateAvailable(DansBagDeposit dansBagDeposit) {
return XPathEvaluator.strings(dansBagDeposit.getDdm(), "/ddm:DDM/ddm:profile/ddm:available")
.map(EditFilesComposer::parseDate)
.findFirst()
.orElseThrow(() -> new IllegalArgumentException("Deposit without a ddm:available element"));
}

private static Instant parseDate(String value) {
try {
log.debug("Trying to parse {} as LocalDate", value);
return LocalDate.parse(value).atStartOfDay(ZoneId.systemDefault()).toInstant();
}
catch (DateTimeParseException e) {
try {
log.debug("Trying to parse {} as ZonedDateTime", value);
return ZonedDateTime.parse(value).toInstant();
}
catch (DateTimeParseException ee) {
log.debug("Trying to parse {} as LocalDateTime", value);
var id = ZoneId.systemDefault().getRules().getOffset(Instant.now());
return LocalDateTime.parse(value).toInstant(id);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import lombok.extern.slf4j.Slf4j;
import nl.knaw.dans.dvingest.core.bagprocessor.FilesInDatasetCache;
import nl.knaw.dans.dvingest.core.dansbag.deposit.DansBagDeposit;
import nl.knaw.dans.dvingest.core.dansbag.deposit.FileInfo;
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.dvingest.core.yaml.EditFiles;
Expand All @@ -27,6 +26,7 @@

import java.io.IOException;
import java.nio.file.Path;
import java.time.Instant;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
Expand All @@ -42,15 +42,16 @@ public class EditFilesComposerForUpdate extends EditFilesComposer {
private final String updatesDatasetPid;
private final DataverseService dataverseService;

public EditFilesComposerForUpdate(DansBagDeposit dansDeposit, String updatesDatasetPid, Pattern fileExclusionPattern, List<String> embargoExclusions, DataverseService dataverseService) {
super(dansDeposit, fileExclusionPattern, embargoExclusions);
public EditFilesComposerForUpdate(Map<Path, FileInfo> files, Instant dateAvailable, String updatesDatasetPid, Pattern fileExclusionPattern, List<String> embargoExclusions,
DataverseService dataverseService) {
super(files, dateAvailable, fileExclusionPattern, embargoExclusions);
this.updatesDatasetPid = updatesDatasetPid;
this.dataverseService = dataverseService;
}

@Override
public EditFiles composeEditFiles() {
var pathFileInfoMap = getFileInfo(dansDeposit);
var pathFileInfoMap = files;
var renamedFiles = getAutoRenameMap(pathFileInfoMap);
// TODO: this should be a read-only variant of the cache
FilesInDatasetCache filesInDatasetCache = new FilesInDatasetCache(dataverseService, renamedFiles);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.dansbag;

public class EditFilesComposerForUpdateTest {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.dansbag;

public class EditFilesComposerTest {
}

0 comments on commit d973557

Please sign in to comment.