From 3906a34a8092808b73013ca6894156e1a6d33e84 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Thu, 14 Nov 2024 16:28:28 +0100 Subject: [PATCH] Implemented deleting files. --- TODO.TXT | 29 +++++++++++++++++++ pom.xml | 1 + .../knaw/dans/dvingest/core/DepositBag.java | 11 +++++-- .../knaw/dans/dvingest/core/DepositTask.java | 17 ++++++++--- .../java/nl/knaw/dans/dvingest/core/Edit.java | 26 +++++++++++++++++ .../core/service/DataverseService.java | 2 ++ .../core/service/DataverseServiceImpl.java | 20 +++++++++++++ 7 files changed, 100 insertions(+), 6 deletions(-) create mode 100644 TODO.TXT create mode 100644 src/main/java/nl/knaw/dans/dvingest/core/Edit.java diff --git a/TODO.TXT b/TODO.TXT new file mode 100644 index 0000000..5b023d5 --- /dev/null +++ b/TODO.TXT @@ -0,0 +1,29 @@ +==== + Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==== + +- regel de upload-batch grootte naar beneden als :ZipUploadFilesLimit kleiner is dan de ingest batch grootte +- implement replaceFiles in edit +- implement providing file metadata +- implement setting license +- implement setting embargo +- implement setting request access permission + +//edit.yml +deleteFiles: [] +replaceFiles: [] +terms: + license: +embargo: ... diff --git a/pom.xml b/pom.xml index 4fe0058..391287d 100644 --- a/pom.xml +++ b/pom.xml @@ -59,6 +59,7 @@ nl.knaw.dans dans-dataverse-client-lib + 1.0.1 nl.knaw.dans diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DepositBag.java b/src/main/java/nl/knaw/dans/dvingest/core/DepositBag.java index cabc27b..fe65a79 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/DepositBag.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/DepositBag.java @@ -18,13 +18,13 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import lombok.ToString; import nl.knaw.dans.lib.dataverse.MetadataFieldDeserializer; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.MetadataField; import org.apache.commons.io.FileUtils; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; @@ -50,11 +50,18 @@ public DepositBag(Path bagDir) { } public Dataset getDatasetMetadata() throws IOException { - var dataset = MAPPER.readValue(FileUtils.readFileToString(bagDir.resolve("dataset.yml").toFile(), "UTF-8"), Dataset.class); + var dataset = MAPPER.readValue(FileUtils.readFileToString(bagDir.resolve("dataset.yml").toFile(), StandardCharsets.UTF_8), Dataset.class); dataset.getDatasetVersion().setFiles(Collections.emptyList()); // files = null or a list of files is not allowed return dataset; } + public Edit getEditInstructions() throws IOException { + if (!Files.exists(bagDir.resolve("edit.yml"))) { + return null; + } + return MAPPER.readValue(FileUtils.readFileToString(bagDir.resolve("edit.yml").toFile(), StandardCharsets.UTF_8), Edit.class); + } + @Override public int compareTo(DepositBag depositBag) { return bagDir.getFileName().toString().compareTo(depositBag.bagDir.getFileName().toString()); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/DepositTask.java b/src/main/java/nl/knaw/dans/dvingest/core/DepositTask.java index abaca6d..45ef135 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/DepositTask.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/DepositTask.java @@ -81,6 +81,7 @@ private String processBag(DepositBag bag, String targetPid) throws IOException, else { updateMetadata(bag, targetPid); } + // TODO: replaceFiles(bag, targetPid); deleteFiles(bag, targetPid); addFiles(bag, targetPid); publishVersion(targetPid); @@ -88,6 +89,7 @@ private String processBag(DepositBag bag, String targetPid) throws IOException, } private String createNewDataset(DepositBag bag) throws IOException, DataverseException { + log.debug("Creating new dataset"); var result = dataverseService.createDataset(bag.getDatasetMetadata()); var pid = result.getData().getPersistentId(); log.debug(result.getEnvelopeAsString()); @@ -95,17 +97,24 @@ private String createNewDataset(DepositBag bag) throws IOException, DataverseExc } private void updateMetadata(DepositBag bag, String pid) throws IOException, DataverseException { + log.debug("Updating dataset metadata for {}", pid); dataverseService.updateMetadata(pid, bag.getDatasetMetadata().getDatasetVersion()); } private void deleteFiles(DepositBag bag, String pid) throws IOException, DataverseException { - // var files = bag.getFiles(); - // for (var file : files) { - // dataverseService.deleteFile(pid, file.getDataFile().getId()); - // } + var edit = bag.getEditInstructions(); + if (edit == null) { + log.debug("No edit instructions found. Skipping file deletion."); + return; + } + for (var file : edit.getDeleteFiles()) { + log.debug("Deleting file: {}", file); + dataverseService.deleteFile(pid, file); + } } private void addFiles(DepositBag bag, String pid) throws IOException, DataverseException { + log.debug("Adding files from {}", bag.getDataDir()); var iterator = new PathIterator(FileUtils.iterateFiles(bag.getDataDir().toFile(), null, true)); while (iterator.hasNext()) { uploadFileBatch(iterator, bag.getDataDir(), pid); diff --git a/src/main/java/nl/knaw/dans/dvingest/core/Edit.java b/src/main/java/nl/knaw/dans/dvingest/core/Edit.java new file mode 100644 index 0000000..10ead94 --- /dev/null +++ b/src/main/java/nl/knaw/dans/dvingest/core/Edit.java @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2024 DANS - Data Archiving and Networked Services (info@dans.knaw.nl) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package nl.knaw.dans.dvingest.core; + +import lombok.Data; + +import java.util.List; + +@Data +public class Edit { + private List deleteFiles; + +} diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java index 941afba..c940d36 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseService.java @@ -38,4 +38,6 @@ public interface DataverseService { void waitForState(String persistentId, String state) throws DataverseException; DataverseHttpResponse updateMetadata(String targetDatasetPid, DatasetVersion datasetMetadata) throws DataverseException, IOException; + + public DataverseHttpResponse deleteFile(String persistentId, String filepath) throws DataverseException, IOException; } diff --git a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java index 4500a27..01d8d03 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/service/DataverseServiceImpl.java @@ -21,12 +21,14 @@ import nl.knaw.dans.lib.dataverse.DataverseClient; import nl.knaw.dans.lib.dataverse.DataverseException; import nl.knaw.dans.lib.dataverse.DataverseHttpResponse; +import nl.knaw.dans.lib.dataverse.Version; import nl.knaw.dans.lib.dataverse.model.dataset.Dataset; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetCreationResult; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetPublicationResult; import nl.knaw.dans.lib.dataverse.model.dataset.DatasetVersion; import nl.knaw.dans.lib.dataverse.model.dataset.FileList; import nl.knaw.dans.lib.dataverse.model.file.FileMeta; +import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.nio.file.Path; @@ -67,6 +69,24 @@ public DataverseHttpResponse updateMetadata(String targetDataset return dataverseClient.dataset(targetDatasetPid).updateMetadata(datasetMetadata, metadataKeys); } + public DataverseHttpResponse deleteFile(String persistentId, String filepath) throws DataverseException, IOException { + var result = dataverseClient.dataset(persistentId).getFiles(Version.DRAFT.toString()); + var optFileToDelete = result.getData().stream() + .filter(file -> { + var fp = StringUtils.isBlank(file.getDirectoryLabel()) ? + file.getLabel() : + file.getDirectoryLabel() + "/" + file.getLabel(); + return filepath.equals(fp); + }) + .findFirst(); + + if (optFileToDelete.isEmpty()) { + throw new IllegalArgumentException("File not found: " + filepath); + } + log.debug("Deleting file with id {}", optFileToDelete.get().getDataFile().getId()); + return dataverseClient.sword().deleteFile(optFileToDelete.get().getDataFile().getId()); + } + // TODO: move this to dans-dataverse-client-lib; it is similar to awaitLockState. public void waitForState(String datasetId, String expectedState) { var numberOfTimesTried = 0;