Skip to content

Commit

Permalink
WIP (#3)
Browse files Browse the repository at this point in the history
* FilesInDatasetCache

* Extended examples

* rename

* Changed copy-examples.sh so that you can run it from a different directory
and it will still use the dd-dataverse-ingest project directory as the
reference point.

* Transplanted moveFile algortithm from old code

* Copied old classes to new package before pruning

* Pruning and simplifying old code

* More pruning

* More pruning

* Simplified DepositFile/FileInfo

* Renamed Deposit in legacy code to DansBagDeposit

* Removed originalFilePathMapping as we are not going to use it in the Data Stations

* Removed domain package

* Removed more unnecessary classes and interfaces

* Done simplifying for now

* First transplant of update logic from dd-ingest-flow. Now, to test!

* Refactored EditFilesComposer for easier unit testing.

* Some unit tests for EditFilesComposer.

* Finishing testing EditFilesComposer for now.

* Started testing EditFilesComposerForUpdate.

* Added  'addUnrestrictedFiles' property. It is no longer the default, but must be declared explicitly.
'ignoredFiles' can now be removed.

* Made ignoring files the default instead of adding unrestricted.

* Started implementing tests for the sword examples.

* Removed redundant helper.

* Added saving the NBN to deposit.properties onSuccess + fixes some regression introduced after changing to explicit addUnrestrictedFiles.

* Some fixes in update editor.

* movements example from sword-examples working correctly.
  • Loading branch information
janvanmansum authored Dec 9, 2024
1 parent f0a22a0 commit e6dc9de
Show file tree
Hide file tree
Showing 192 changed files with 2,927 additions and 3,070 deletions.
9 changes: 4 additions & 5 deletions copy-examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
# limitations under the License.
#


SCRIPT_DIR=$(cd $(dirname "$0") && pwd)
BATCH_NAME=${1:-"test-deposits"}

rm -rf data/import/inbox/$BATCH_NAME
rm -fr data/import/outbox/$BATCH_NAME
cp -r src/test/resources/test-deposits data/import/inbox/$BATCH_NAME

rm -rf "$SCRIPT_DIR/data/import/inbox/$BATCH_NAME"
rm -rf "$SCRIPT_DIR/data/import/outbox/$BATCH_NAME"
cp -r "$SCRIPT_DIR/src/test/resources/test-deposits" "$SCRIPT_DIR/data/import/inbox/$BATCH_NAME"
11 changes: 5 additions & 6 deletions docs/description.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ editFiles:
addRestrictedFiles:
- 'file4.txt'
- 'subdirectory/file5.txt'
# Unrestricted files are added by default at this point
addUnrestrictedFiles:
- 'file6.txt'
moveFiles:
- from: 'file6.txt' # Old location in the dataset
to: 'subdirectory/file6.txt' # New location in the dataset
Expand All @@ -92,11 +93,9 @@ editFiles:
directoryLabel: "subdirectory"
restricted: false
categories: [ 'Testlabel' ]
ignoreFiles:
- 'file7.txt' # This file will NOT be added to the dataset
renameAtUploadFiles:
- from: 'file8.txt' # Local file name
to: 'file9.txt' # The file name assigned in the dataset
autoRenameFiles:
- from: "Unsanitize'd/file?" # Local file name
to: "Sanitize_d/file_" # The file name assigned in the dataset
addEmbargoes:
- filePaths: [ 'file1.txt' ] # All other files will NOT be embargoed
dateAvailable: '2030-01-01'
Expand Down
22 changes: 21 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@
<main-class>nl.knaw.dans.dvingest.DdDataverseIngestApplication</main-class>
<dd-dataverse-ingest-api.version>0.2.0</dd-dataverse-ingest-api.version>
<dd-validate-dans-bag-api.version>1.0.0</dd-validate-dans-bag-api.version>
<dd-dans-sword2-examples.version>1.0.0</dd-dans-sword2-examples.version>
</properties>

<scm>
<developerConnection>scm:git:ssh://github.com/DANS-KNAW/dd-dataverse-ingest</developerConnection>
<developerConnection>scm:git:ssh://github.com/DANS-KNAW/${project.artifactId}</developerConnection>
<tag>HEAD</tag>
</scm>

Expand Down Expand Up @@ -216,6 +217,7 @@
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>compile-dependencies</id>
<phase>initialize</phase>
<goals>
<goal>unpack</goal>
Expand All @@ -237,6 +239,24 @@
</artifactItems>
</configuration>
</execution>
<execution>
<id>test-dependencies</id>
<phase>generate-test-resources</phase>
<goals>
<goal>unpack</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>nl.knaw.dans</groupId>
<artifactId>dd-dans-sword2-examples</artifactId>
<version>${dd-dans-sword2-examples.version}</version>
<outputDirectory>${project.build.directory}/test</outputDirectory>
<includes>example-bags/**</includes>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@
import nl.knaw.dans.dvingest.core.dansbag.DansBagMappingService;
import nl.knaw.dans.dvingest.core.dansbag.DansBagMappingServiceImpl;
import nl.knaw.dans.dvingest.core.dansbag.SupportedLicenses;
import nl.knaw.dans.dvingest.core.dansbag.mapper.DepositToDvDatasetMetadataMapper;
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.dvingest.core.service.DataverseServiceImpl;
import nl.knaw.dans.dvingest.core.service.UtilityServicesImpl;
import nl.knaw.dans.dvingest.core.service.YamlServiceImpl;
import nl.knaw.dans.dvingest.resources.DefaultApiResource;
import nl.knaw.dans.dvingest.resources.IllegalArgumentExceptionMapper;
import nl.knaw.dans.dvingest.resources.IngestApiResource;
import nl.knaw.dans.ingest.core.service.mapper.DepositToDvDatasetMetadataMapper;
import nl.knaw.dans.lib.dataverse.DataverseException;
import nl.knaw.dans.lib.util.MappingLoader;
import org.apache.commons.io.FileUtils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
*/
package nl.knaw.dans.dvingest.client;

import nl.knaw.dans.ingest.core.exception.RejectedDepositException;
import nl.knaw.dans.validatedansbag.client.api.ValidateOkDto;
import nl.knaw.dans.validatedansbag.client.api.ValidateOkDto.InformationPackageTypeEnum;

import java.nio.file.Path;

/**
* Proxy for dd-validate-dans-bag service.
*/
public interface ValidateDansBagService {

ValidateOkDto validate(Path bag);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
package nl.knaw.dans.dvingest.config;

import lombok.Data;
import lombok.NonNull;

import javax.validation.Valid;
import javax.validation.constraints.NotNull;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
package nl.knaw.dans.dvingest.config;

import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
import io.dropwizard.util.DataSize;
import lombok.Data;

import javax.validation.Valid;
import javax.validation.constraints.NotNull;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
package nl.knaw.dans.dvingest.config;

import io.dropwizard.client.HttpClientConfiguration;
import io.dropwizard.client.JerseyClientConfiguration;
import lombok.Data;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ public class DataverseIngestDeposit implements Comparable<DataverseIngestDeposit
private Path location;

private final Properties depositProperties;
private final String updatesDataset;
private final YamlService yamlService;

public DataverseIngestDeposit(@NonNull Path location, @NonNull YamlService yamlService) {
Expand All @@ -58,13 +57,17 @@ public DataverseIngestDeposit(@NonNull Path location, @NonNull YamlService yamlS
}
this.creationTimestamp = OffsetDateTime.parse(creationTimestamp);
this.id = UUID.fromString(location.getFileName().toString());
this.updatesDataset = depositProperties.getProperty("updates.dataset");
}
catch (IOException e) {
throw new IllegalStateException("Error loading deposit properties from " + location.resolve("deposit.properties"), e);
}
}

@Override
public String getUpdatesDataset() {
return depositProperties.getProperty(UPDATES_DATASET_KEY);
}

@Override
public boolean convertDansDepositIfNeeded() {
return false;
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/Deposit.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import java.util.UUID;

public interface Deposit {
String UPDATES_DATASET_KEY = "updates-dataset";

boolean convertDansDepositIfNeeded();

String getUpdatesDataset();
Expand Down
8 changes: 5 additions & 3 deletions src/main/java/nl/knaw/dans/dvingest/core/DepositTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
import nl.knaw.dans.dvingest.core.bagprocessor.BagProcessor;
import nl.knaw.dans.dvingest.core.dansbag.DansBagMappingService;
import nl.knaw.dans.dvingest.core.dansbag.DansDepositSupport;
import nl.knaw.dans.dvingest.core.dansbag.exception.RejectedDepositException;
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.dvingest.core.service.UtilityServices;
import nl.knaw.dans.dvingest.core.service.YamlService;
import nl.knaw.dans.ingest.core.exception.RejectedDepositException;

import java.io.IOException;
import java.nio.file.Path;
Expand All @@ -50,7 +50,7 @@ public enum Status {
public DepositTask(DataverseIngestDeposit dataverseIngestDeposit, Path outputDir, boolean onlyConvertDansDeposit, ValidateDansBagService validateDansBagService, DataverseService dataverseService, UtilityServices utilityServices,
DansBagMappingService dansBagMappingService,
YamlService yamlService) {
this.deposit = dansBagMappingService == null ? dataverseIngestDeposit : new DansDepositSupport(validateDansBagService, dataverseIngestDeposit, dansBagMappingService, yamlService);
this.deposit = dansBagMappingService == null ? dataverseIngestDeposit : new DansDepositSupport(dataverseIngestDeposit, validateDansBagService, dansBagMappingService, dataverseService, yamlService);
this.dataverseService = dataverseService;
this.onlyConvertDansDeposit = onlyConvertDansDeposit;
this.utilityServices = utilityServices;
Expand All @@ -59,13 +59,14 @@ public DepositTask(DataverseIngestDeposit dataverseIngestDeposit, Path outputDir

@Override
public void run() {
String pid = deposit.getUpdatesDataset();
String pid = null;
try {
deposit.validate();
if (deposit.convertDansDepositIfNeeded() && onlyConvertDansDeposit) {
log.info("Only converting DANS deposit, LEAVING CONVERTED DEPOSIT IN PLACE");
return;
}
pid = deposit.getUpdatesDataset();

for (DataverseIngestBag bag : deposit.getBags()) {
log.info("START processing deposit / bag: {} / {}", deposit.getId(), bag);
Expand All @@ -81,6 +82,7 @@ public void run() {
deposit.onSuccess(pid, "Deposit processed successfully");
deposit.moveTo(outputDir.resolve("processed"));
}
// TODO: move RejectedDepositException to a package that is not specific to DANS
catch (RejectedDepositException e) {
try {
log.error("Deposit rejected", e);
Expand Down
3 changes: 0 additions & 3 deletions src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
package nl.knaw.dans.dvingest.core;

import io.dropwizard.lifecycle.Managed;
import lombok.Builder;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -27,8 +26,6 @@
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.dvingest.core.service.UtilityServices;
import nl.knaw.dans.dvingest.core.service.YamlService;
import nl.knaw.dans.dvingest.core.service.YamlServiceImpl;
import nl.knaw.dans.lib.util.inbox.Inbox;

import java.nio.file.Path;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import java.io.IOException;
import java.util.UUID;


/**
* Processes a bag, creating and/or editing a dataset version in Dataverse.
*/
@Slf4j
public class BagProcessor {
private final DatasetVersionCreator datasetVersionCreator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import org.apache.tika.utils.StringUtils;

/**
* A filepath in Dataverse is a combination of file label and directory label. This class converts between a regular representation of a path and the Dataverse representation.
* A filepath in Dataverse is a combination of file label and directory label. This class converts between a regular representation of a filepath and the Dataverse representation.
*/
@Value
public class DataversePath {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,8 @@ public class FileUploadInclusionPredicate implements Predicate<File> {

@Override
public boolean evaluate(File file) {
if (restrictedFiles) {
return editFiles != null && isRestricted(file) && notReplaced(file) && notIgnored(file);
}
else {
return editFiles == null || notRestricted(file) && notReplaced(file) && notIgnored(file);
}
return editFiles != null && (restrictedFiles ? isRestricted(file) : notRestricted(file))
&& notReplaced(file);
}

private boolean notReplaced(File file) {
Expand All @@ -47,10 +43,6 @@ private boolean isRestricted(File file) {
}

private boolean notRestricted(File file) {
return !editFiles.getAddRestrictedFiles().contains(dataDir.relativize(file.toPath()).toString());
}

private boolean notIgnored(File file) {
return !editFiles.getIgnoreFiles().contains(dataDir.relativize(file.toPath()).toString());
return editFiles.getAddUnrestrictedFiles().contains(dataDir.relativize(file.toPath()).toString());
}
}
Loading

0 comments on commit e6dc9de

Please sign in to comment.