From 666c78c9495caf0071e2acce85d2e664ff158c62 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Fri, 6 Dec 2024 13:24:38 +0100 Subject: [PATCH 1/2] DD-1736. Fixed problem by looking up all renamedAtUpload files by their new name when adding them to the embargo --- docs/description.md | 6 ++--- docs/dev.md | 24 ++++++++++++++++++ mkdocs.yml | 2 ++ .../core/bagprocessor/FilesEditor.java | 25 +++++++++++++------ 4 files changed, 47 insertions(+), 10 deletions(-) diff --git a/docs/description.md b/docs/description.md index aac46c4..3bacbe7 100644 --- a/docs/description.md +++ b/docs/description.md @@ -106,10 +106,10 @@ editFiles: reason: 'Pending publication' ``` -The actions specified in this file correspond roughly to the action available in the dropdown menu in the file view of a dataset in Dataverse. +The actions specified in this file correspond roughly to the actions available in the dropdown menu in the file view of a dataset in Dataverse. -The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files the -replacement files will automatically be skipped in the add files step, the deleted files, however, will not. In other words, it is also possible to remove a +The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files in +`replaceFiles` will automatically be skipped in the add files step, the deleted files, however, will not. In other words, it is also possible to remove a file and add a file back to the same location in one deposit. In that case, there will be no continuous history of the file in the dataset. The `addRestrictedFiles` action is included, because it allows you to add a large number of restricted files in a more efficient way than by updating the file diff --git a/docs/dev.md b/docs/dev.md index e69de29..b47ec61 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -0,0 +1,24 @@ +Development +=========== + +## Local debugging + +To locally debug you need to have the following services running: + +* A dataverse instance. Internal DANS developers can use the vagrant boxes with development versions of the Data Stations for this. You will need to configure + access to the admin interface to use the unblock-key: + + curl -X PUT -d s3kretKey http://localhost:8080/api/admin/settings/:BlockedApiKey + curl -X PUT -d unblock-key http://localhost:8080/api/admin/settings/:BlockedApiPolicy + + # When done debugging, you can reset the policy to localhost-only: + curl -X PUT -d localhost-only http://localhost:8080/api/admin/settings/:BlockedApiPolicy/?unblock-key=s3kretKey + +* [dd-validate-dans-bag]{:target="_blank"}. Note that its `validation.baseFolder` configuration property should point to the deposit area or an ancestor of it. + +Calling `dd-dataverse-ingest` is most conveniently done through the [dd-dataverse-ingest-cli]{:target=_blank} command line tool. + + + +[dd-validate-dans-bag]: {{ validate_dans_bag_url }} +[dd-dataverse-ingest-cli]: {{ dataverse_ingest_cli_url }} diff --git a/mkdocs.yml b/mkdocs.yml index 25a0143..71bf407 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,8 @@ extra: bagit_specs_url: https://www.rfc-editor.org/rfc/rfc8493.html dans_bag_profile_url: https://dans-knaw.github.io/dans-bagit-profile/versions/1.2.0/ dataverse_api_url: https://guides.dataverse.org/en/latest/api + validate_dans_bag_url: https://github.com/DANS-KNAW/dd-validate-dans-bag + dataverse_ingest_cli_url: https://github.com/DANS-KNAW/dd-dataverse-ingest-cli plugins: - markdownextradata diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java index 54a5c42..3dbffba 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java @@ -60,6 +60,8 @@ public class FilesEditor { private final Map filesInDataset = new java.util.HashMap<>(); private boolean filesRetrieved = false; + private Map renameMap; + public void editFiles(String pid) throws IOException, DataverseException { /* * TODO: @@ -69,16 +71,15 @@ public void editFiles(String pid) throws IOException, DataverseException { * - updateFileMetas must exist in bag if first version deposit */ if (editFiles == null) { - try (var stream = Files.list(dataDir)) { - if (stream.findAny().isEmpty()) { - log.debug("No files to edit for deposit {}", depositId); - return; - } + if (isEmptyDir(dataDir)) { + log.debug("No files to edit for deposit {}", depositId); + return; } } log.debug("Start editing files for deposit {}", depositId); this.pid = pid; + this.renameMap = getRenameMap(); if (editFiles != null) { deleteFiles(); replaceFiles(); @@ -93,6 +94,12 @@ public void editFiles(String pid) throws IOException, DataverseException { log.debug("End editing files for deposit {}", depositId); } + private boolean isEmptyDir(Path dir) throws IOException { + try (var stream = Files.list(dir)) { + return stream.findAny().isEmpty(); + } + } + private void deleteFiles() throws IOException, DataverseException { log.debug("Start deleting {} files for deposit {}", depositId, editFiles.getDeleteFiles().size()); for (var file : editFiles.getDeleteFiles()) { @@ -156,7 +163,7 @@ private Map getRenameMap() { private void uploadFileBatch(PathIterator iterator, boolean restrict) throws IOException, DataverseException { var tempZipFile = utilityServices.createTempZipFile(); try { - var zipFile = utilityServices.createPathIteratorZipperBuilder(getRenameMap()) + var zipFile = utilityServices.createPathIteratorZipperBuilder(renameMap) .rootDir(dataDir) .sourceIterator(iterator) .targetZipFile(tempZipFile) @@ -224,10 +231,14 @@ private void addEmbargoes() throws IOException, DataverseException { var embargo = new Embargo(); embargo.setDateAvailable(addEmbargo.getDateAvailable()); embargo.setReason(addEmbargo.getReason()); - var fileIds = addEmbargo.getFilePaths().stream().map(filesInDataset::get).mapToInt(file -> file.getDataFile().getId()).toArray(); + var fileIds = addEmbargo.getFilePaths().stream().map(this::renameFile).map(filesInDataset::get).mapToInt(file -> file.getDataFile().getId()).toArray(); embargo.setFileIds(fileIds); dataverseService.addEmbargo(pid, embargo); } log.debug("End adding embargoes for deposit {}", depositId); } + + private String renameFile(String file) { + return renameMap.getOrDefault(file, file); + } } From fdd6b7508290af512b8503349b7de8b31a739d80 Mon Sep 17 00:00:00 2001 From: Jan van Mansum Date: Fri, 6 Dec 2024 13:33:30 +0100 Subject: [PATCH 2/2] Better error message should embargo on non-existing file occur again --- .../dvingest/core/bagprocessor/FilesEditor.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java index 3dbffba..af2ad80 100644 --- a/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java +++ b/src/main/java/nl/knaw/dans/dvingest/core/bagprocessor/FilesEditor.java @@ -231,7 +231,12 @@ private void addEmbargoes() throws IOException, DataverseException { var embargo = new Embargo(); embargo.setDateAvailable(addEmbargo.getDateAvailable()); embargo.setReason(addEmbargo.getReason()); - var fileIds = addEmbargo.getFilePaths().stream().map(this::renameFile).map(filesInDataset::get).mapToInt(file -> file.getDataFile().getId()).toArray(); + var fileIds = addEmbargo.getFilePaths() + .stream() + .map(this::renameFile) + .map(this::throwIfFileNotInDataset) + .map(filesInDataset::get) + .mapToInt(file -> file.getDataFile().getId()).toArray(); embargo.setFileIds(fileIds); dataverseService.addEmbargo(pid, embargo); } @@ -241,4 +246,11 @@ private void addEmbargoes() throws IOException, DataverseException { private String renameFile(String file) { return renameMap.getOrDefault(file, file); } + + private String throwIfFileNotInDataset(String file) { + if (!filesInDataset.containsKey(file)) { + throw new IllegalArgumentException("File not found in dataset: " + file); + } + return file; + } }