diff --git a/src/main/assembly/dist/cfg/config.yml b/src/main/assembly/dist/cfg/config.yml index b5f1334..f18cb54 100644 --- a/src/main/assembly/dist/cfg/config.yml +++ b/src/main/assembly/dist/cfg/config.yml @@ -29,6 +29,7 @@ ingest: inbox: /var/opt/dans.knaw.nl/tmp/import/inbox outbox: /var/opt/dans.knaw.nl/tmp/import/outbox tempDir: /var/opt/dans.knaw.nl/tmp/zip-wrapping + metadataKeys: {} waitForReleasedState: # 10s * 360 = 1 hour maxNumberOfRetries: 360 diff --git a/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java b/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java index 1169ced..7b81707 100644 --- a/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java +++ b/src/main/java/nl/knaw/dans/dvingest/DdDataverseIngestApplication.java @@ -20,6 +20,7 @@ import io.dropwizard.core.setup.Bootstrap; import io.dropwizard.core.setup.Environment; import nl.knaw.dans.dvingest.config.DdDataverseIngestConfiguration; +import nl.knaw.dans.dvingest.core.DatasetTaskFactory; import nl.knaw.dans.dvingest.core.DataverseServiceImpl; import nl.knaw.dans.dvingest.core.IngestArea; import nl.knaw.dans.dvingest.core.UtilityServicesImpl; @@ -47,6 +48,7 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro var dataverseClient = configuration.getDataverse().build(); var dataverseService = DataverseServiceImpl.builder() .dataverseClient(dataverseClient) + .metadataKeys(configuration.getIngest().getMetadataKeys()) .millisecondsBetweenChecks(configuration.getIngest().getWaitForReleasedState().getTimeBetweenChecks().toMilliseconds()) .maxNumberOfRetries(configuration.getIngest().getWaitForReleasedState().getMaxNumberOfRetries()) .build(); @@ -58,6 +60,7 @@ public void run(final DdDataverseIngestConfiguration configuration, final Enviro .executorService(environment.lifecycle().executorService("import").minThreads(1).maxThreads(1).build()) .dataverseService(dataverseService) .utilityServices(utilityServices) + .datasetTaskFactory(new DatasetTaskFactory(dataverseService, utilityServices)) .inbox(configuration.getIngest().getImportConfig().getInbox()) .outbox(configuration.getIngest().getImportConfig().getOutbox()).build(); environment.jersey().register(new IngestApiResource(importArea)); diff --git a/src/main/java/nl/knaw/dans/dvingest/config/IngestConfig.java b/src/main/java/nl/knaw/dans/dvingest/config/IngestConfig.java index 13ca0a8..1682de5 100644 --- a/src/main/java/nl/knaw/dans/dvingest/config/IngestConfig.java +++ b/src/main/java/nl/knaw/dans/dvingest/config/IngestConfig.java @@ -21,6 +21,8 @@ import javax.validation.Valid; import javax.validation.constraints.NotNull; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; @Data public class IngestConfig { @@ -34,6 +36,7 @@ public class IngestConfig { private int maxNumberOfFilesPerUpload = 1000; + private Map metadataKeys = new HashMap<>(); @Valid @NotNull diff --git a/src/main/java/nl/knaw/dans/dvingest/core/IngestTask.java b/src/main/java/nl/knaw/dans/dvingest/core/CreateNewDatasetTask.java similarity index 98% rename from src/main/java/nl/knaw/dans/dvingest/core/IngestTask.java rename to src/main/java/nl/knaw/dans/dvingest/core/CreateNewDatasetTask.java index cd6b859..5fdbcf3 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/IngestTask.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/CreateNewDatasetTask.java @@ -26,7 +26,7 @@ @Slf4j @AllArgsConstructor -public class IngestTask implements Runnable { +public class CreateNewDatasetTask implements Runnable { private final Deposit deposit; private final DataverseService dataverseService; private final UtilityServices utilityServices; diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DatasetTaskFactory.java b/src/main/java/nl/knaw/dans/dvingest/core/DatasetTaskFactory.java new file mode 100644 index 0000000..c8f6dac --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/DatasetTaskFactory.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core; + +import lombok.AllArgsConstructor; + +import java.nio.file.Path; + +@AllArgsConstructor +public class DatasetTaskFactory { + private final DataverseService dataverseService; + private final UtilityServices utilityServices; + + public Runnable createIngestTask(Deposit deposit, Path outputDir) { + if (deposit.isUpdate()) { + return new UpdateDatasetTask(deposit, dataverseService, utilityServices, outputDir); + } + else { + return new CreateNewDatasetTask(deposit, dataverseService, utilityServices, outputDir); + } + } + +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DataverseService.java b/src/main/java/nl/knaw/dans/dvingest/core/DataverseService.java index 1085f28..29c564c 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/DataverseService.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/DataverseService.java @@ -20,6 +20,7 @@ import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetCreationResult; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetPublicationResult; +import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; import nl.knaw.dans.lib.dataverse.model.dataset.FileList; import nl.knaw.dans.lib.dataverse.model.file.FileMeta; @@ -35,4 +36,6 @@ public interface DataverseService { DataverseHttpResponse publishDataset(String persistentId) throws DataverseException, IOException; void waitForState(String persistentId, String state) throws DataverseException; + + DataverseHttpResponse updateMetadata(String targetDatasetPid, DatasetVersion datasetMetadata) throws DataverseException, IOException; } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DataverseServiceImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/DataverseServiceImpl.java index d3c10c0..65e0862 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/DataverseServiceImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/DataverseServiceImpl.java @@ -24,11 +24,14 @@ import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetCreationResult; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetPublicationResult; +import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; import nl.knaw.dans.lib.dataverse.model.dataset.FileList; import nl.knaw.dans.lib.dataverse.model.file.FileMeta; import java.io.IOException; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; @Builder @Slf4j @@ -42,8 +45,11 @@ public class DataverseServiceImpl implements DataverseService { @Builder.Default private long millisecondsBetweenChecks = 3000; + @Builder.Default + private Map metadataKeys = new HashMap<>(); + public DataverseHttpResponse createDataset(Dataset datasetMetadata) throws DataverseException, IOException { - return dataverseClient.dataverse("root").createDataset(datasetMetadata); + return dataverseClient.dataverse("root").createDataset(datasetMetadata, metadataKeys); } @Override @@ -51,10 +57,17 @@ public DataverseHttpResponse addFile(String persistentId, Path file, F return dataverseClient.dataset(persistentId).addFile(file, fileMeta); } + @Override public DataverseHttpResponse publishDataset(String persistentId) throws DataverseException, IOException { return dataverseClient.dataset(persistentId).publish(); } + @Override + public DataverseHttpResponse updateMetadata(String targetDatasetPid, DatasetVersion datasetMetadata) throws DataverseException, IOException { + return dataverseClient.dataset(targetDatasetPid).updateMetadata(datasetMetadata, metadataKeys); + } + + // TODO: move this to dans-dataverse-client-lib; it is similar to awaitLockState. public void waitForState(String datasetId, String expectedState) { var numberOfTimesTried = 0; var state = ""; diff --git a/src/main/java/nl/knaw/dans/dvingest/core/Deposit.java b/src/main/java/nl/knaw/dans/dvingest/core/Deposit.java index b233ad7..73cd7de 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/Deposit.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/Deposit.java @@ -103,13 +103,17 @@ public int getSequenceNumber() { } public OffsetDateTime getCreationTimestamp() { - var creationTimestamp = depositProperties.getProperty("creation-timestamp"); + var creationTimestamp = depositProperties.getProperty("creation.timestamp"); if (creationTimestamp == null) { return null; } return OffsetDateTime.parse(creationTimestamp); } + public boolean isUpdate() { + return Files.exists(getBagDir().resolve("update.yml")); + } + public void moveTo(Path targetDir) throws IOException { Files.move(location, targetDir.resolve(location.getFileName())); location = targetDir.resolve(location.getFileName()); @@ -127,4 +131,9 @@ else if (getCreationTimestamp() != null && deposit.getCreationTimestamp() != nul throw new IllegalStateException("Deposit " + getId() + " should contain either a sequence number or a creation timestamp"); } } + + public String getTargetDatasetPid() throws IOException { + var updateInstructions = MAPPER.readValue(FileUtils.readFileToString(getBagDir().resolve("update.yml").toFile(), "UTF-8"), UpdateInstructions.class); + return updateInstructions.getTargetDatasetPid(); + } } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/ImportJob.java b/src/main/java/nl/knaw/dans/dvingest/core/ImportJob.java index cd0c5f2..10eb5c6 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/ImportJob.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/ImportJob.java @@ -18,18 +18,14 @@ import lombok.Builder; import lombok.Getter; import lombok.NonNull; -import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import nl.knaw.dans.dvingest.api.ImportCommandDto; import nl.knaw.dans.dvingest.api.ImportJobStatusDto; import nl.knaw.dans.dvingest.api.ImportJobStatusDto.StatusEnum; -import nl.knaw.dans.lib.dataverse.DataverseClient; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; import java.util.TreeSet; @Slf4j @@ -44,17 +40,27 @@ public class ImportJob implements Runnable { private final DataverseService dataverseService; @NonNull private final UtilityServices utilityServices; + + @NonNull + private final DatasetTaskFactory datasetTaskFactory; + @NonNull private final CompletionHandler completionHandler; @Getter private final ImportJobStatusDto status = new ImportJobStatusDto(); - private ImportJob(ImportCommandDto importCommand, Path outputDir, DataverseService dataverseService, UtilityServices utilityServices, CompletionHandler completionHandler) { + private ImportJob(@NonNull ImportCommandDto importCommand, + @NonNull Path outputDir, + @NonNull DataverseService dataverseService, + @NonNull UtilityServices utilityServices, + @NonNull DatasetTaskFactory datasetTaskFactory, + @NonNull CompletionHandler completionHandler) { this.importCommand = importCommand; this.outputDir = outputDir; this.dataverseService = dataverseService; this.utilityServices = utilityServices; + this.datasetTaskFactory = datasetTaskFactory; this.completionHandler = completionHandler; } @@ -84,7 +90,7 @@ public void run() { // Process deposits for (Deposit deposit : deposits) { log.info("START Processing deposit: {}", deposit.getId()); - new IngestTask(deposit, dataverseService, utilityServices, outputDir).run(); + datasetTaskFactory.createIngestTask(deposit, outputDir).run(); log.info("END Processing deposit: {}", deposit.getId()); // TODO: record number of processed/rejected/failed deposits in ImportJob status } @@ -94,7 +100,8 @@ public void run() { catch (Exception e) { log.error("Failed to process import job", e); status.setStatus(StatusEnum.FAILED); - } finally { + } + finally { completionHandler.handle(this); } } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java b/src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java index c52b52a..e4aafbe 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/IngestArea.java @@ -39,17 +39,20 @@ public class IngestArea { @NonNull private final UtilityServices utilityServices; @NonNull + private final DatasetTaskFactory datasetTaskFactory; + @NonNull private final Path inbox; @NonNull private final Path outbox; private final Map importJobs = new ConcurrentHashMap<>(); - private IngestArea(ExecutorService executorService, DataverseService dataverseService, UtilityServices utilityServices, Path inbox, Path outbox) { + private IngestArea(ExecutorService executorService, DataverseService dataverseService, UtilityServices utilityServices, DatasetTaskFactory datasetTaskFactory, Path inbox, Path outbox) { try { this.executorService = executorService; this.dataverseService = dataverseService; this.utilityServices = utilityServices; + this.datasetTaskFactory = datasetTaskFactory; this.inbox = inbox.toAbsolutePath().toRealPath(); this.outbox = outbox.toAbsolutePath().toRealPath(); } @@ -97,6 +100,7 @@ private ImportJob createImportJob(ImportCommandDto importCommand) { .outputDir(outbox.resolve(relativePath)) .dataverseService(dataverseService) .utilityServices(utilityServices) + .datasetTaskFactory(datasetTaskFactory) .completionHandler(job -> importJobs.remove(job.getImportCommand().getPath())) .build(); } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/UpdateDatasetTask.java b/src/main/java/nl/knaw/dans/dvingest/core/UpdateDatasetTask.java new file mode 100644 index 0000000..006ee08 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/UpdateDatasetTask.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core; + +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.nio.file.Path; + +@AllArgsConstructor +@Slf4j +public class UpdateDatasetTask implements Runnable { + private final Deposit deposit; + private final DataverseService dataverseService; + private final UtilityServices utilityServices; + private final Path outputDir; + + @Override + public void run() { + try { + var targetDatasetPid = deposit.getTargetDatasetPid(); + dataverseService.updateMetadata(targetDatasetPid, deposit.getDatasetMetadata().getDatasetVersion()); + + dataverseService.publishDataset(targetDatasetPid); + dataverseService.waitForState(targetDatasetPid, "RELEASED"); + } + catch (Exception e) { + try { + log.error("Failed to ingest deposit", e); + deposit.moveTo(outputDir.resolve("failed")); + } + catch (IOException ioException) { + log.error("Failed to move deposit to failed directory", ioException); + } + } + + } +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/UpdateInstructions.java b/src/main/java/nl/knaw/dans/dvingest/core/UpdateInstructions.java new file mode 100644 index 0000000..1a9849a --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/UpdateInstructions.java @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core; + +import lombok.Data; +import nl.knaw.dans.lib.dataverse.model.dataset.UpdateType; + +@Data +public class UpdateInstructions { + private String targetDatasetPid; + // TODO: implement minor update + //private UpdateType updateType = UpdateType.major; +} diff --git a/src/test/java/nl/knaw/dans/dvingest/core/DepositTest.java b/src/test/java/nl/knaw/dans/dvingest/core/DepositTest.java index 8dd9a9b..be74cee 100644 --- a/src/test/java/nl/knaw/dans/dvingest/core/DepositTest.java +++ b/src/test/java/nl/knaw/dans/dvingest/core/DepositTest.java @@ -102,19 +102,19 @@ public void deposits_should_be_ordered_by_creation_timestamp() throws Exception var dir1 = testDir.resolve(id1); Files.createDirectories(dir1); var props1 = new Properties(); - props1.setProperty("creation-timestamp", "2023-01-01T10:00:00Z"); + props1.setProperty("creation.timestamp", "2023-01-01T10:00:00Z"); props1.store(Files.newBufferedWriter(dir1.resolve("deposit.properties")), ""); var dir2 = testDir.resolve(id2); Files.createDirectories(dir2); var props2 = new Properties(); - props2.setProperty("creation-timestamp", "2023-01-02T10:00:00Z"); + props2.setProperty("creation.timestamp", "2023-01-02T10:00:00Z"); props2.store(Files.newBufferedWriter(dir2.resolve("deposit.properties")), ""); var dir3 = testDir.resolve(id3); Files.createDirectories(dir3); var props3 = new Properties(); - props3.setProperty("creation-timestamp", "2023-01-03T10:00:00Z"); + props3.setProperty("creation.timestamp", "2023-01-03T10:00:00Z"); props3.store(Files.newBufferedWriter(dir3.resolve("deposit.properties")), ""); var deposit1 = new Deposit(dir1);