Skip to content

Commit

Permalink
Init.yml
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanmansum committed Dec 11, 2024
1 parent 3ec7a96 commit df8c0df
Show file tree
Hide file tree
Showing 12 changed files with 217 additions and 16 deletions.
29 changes: 27 additions & 2 deletions docs/description.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,37 @@ closely follow the JSON that is passed to the Dataverse API.

| File | Description |
|------------------------|------------------------------------------------------------------------------------------------------------------------------------------|
| `init.yml` | Preconditions and instructions for creating a new dataset. |
| `dataset.yml` | Dataset level metadata. |
| `edit-files.yml` | Instructions for deleting, replacing or moving files, or updating the file metadata;<br> also included: restricting and embargoing files |
| `edit-metadata.yml` | Edit dataset level metadata, including metadata value deletions |
| `edit-permissions.yml` | Role assignments to create or delete on the dataset |
| `update-state.yml` | Whether to publish the dataset version or submit it for review |

##### init.yml

The init file initializes the ingest process. It can be used to verify that an expected precondition is met:

```yaml
init:
expect:
state: 'released' # or 'draft', 'absent'.
```
If the state of the dataset does not match the expected state, the ingest procedure will be aborted. The state can be either `released` or `draft`. The default
is 'released', if `updates-dataset` is set in `deposit.properties`, and 'absent' otherwise.

It can also be used to instruct the service to import the bag as a dataset with an existing DOI:

```yaml
init:
create:
importPid: 'doi:10.5072/FK2/ABCDEF'
```

In this case the `updates-dataset` property in `deposit.properties` must not be set. If 'create' is set, 'expect.state' must either be left out or set to
'absent'.

##### dataset.yml

The format is the same as the JSON that is passed to the [createDataset]{:target=_blank} endpoint of the Dataverse API. Note that the `files` field is not used.
Expand Down Expand Up @@ -83,7 +108,7 @@ editFiles:
- 'file4.txt'
- 'subdirectory/file5.txt'
addUnrestrictedFiles:
- 'file6.txt'
- 'file6.txt'
moveFiles:
- from: 'file6.txt' # Old location in the dataset
to: 'subdirectory/file6.txt' # New location in the dataset
Expand All @@ -107,7 +132,7 @@ editFiles:

The actions specified in this file correspond roughly to the actions available in the dropdown menu in the file view of a dataset in Dataverse.

The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files in
The replacement file is looked up in the bag, under the `data` directory under the same path as the original file has in the dataset. Note that files in
`replaceFiles` will automatically be skipped in the add files step, the deleted files, however, will not. In other words, it is also possible to remove a
file and add a file back to the same location in one deposit. In that case, there will be no continuous history of the file in the dataset.

Expand Down
11 changes: 11 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/DataverseIngestBag.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import nl.knaw.dans.dvingest.core.yaml.EditMetadataRoot;
import nl.knaw.dans.dvingest.core.yaml.EditPermissions;
import nl.knaw.dans.dvingest.core.yaml.EditPermissionsRoot;
import nl.knaw.dans.dvingest.core.yaml.Init;
import nl.knaw.dans.dvingest.core.yaml.InitRoot;
import nl.knaw.dans.dvingest.core.yaml.UpdateState;
import nl.knaw.dans.lib.dataverse.model.dataset.Dataset;

Expand All @@ -35,6 +37,7 @@
public class DataverseIngestBag implements Comparable<DataverseIngestBag> {
private final YamlServiceImpl yamService;

public static final String INIT_YML = "init.yml";
public static final String DATASET_YML = "dataset.yml";
public static final String EDIT_FILES_YML = "edit-files.yml";
public static final String EDIT_METADATA_YML = "edit-metadata.yml";
Expand All @@ -56,6 +59,14 @@ public boolean looksLikeDansBag() {
return Files.exists(bagDir.resolve("metadata/dataset.xml"));
}

public Init getInit() throws IOException, ConfigurationException {
if (!Files.exists(bagDir.resolve(INIT_YML))) {
return null;
}
var initRoot = yamService.readYaml(bagDir.resolve(INIT_YML), InitRoot.class);
return initRoot.getInit();
}

public Dataset getDatasetMetadata() throws IOException, ConfigurationException {
if (!Files.exists(bagDir.resolve(DATASET_YML))) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class BagProcessor {

@Builder
private BagProcessor(UUID depositId, DataverseIngestBag bag, DataverseService dataverseService, UtilityServices utilityServices) throws IOException, ConfigurationException {
this.datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseService, bag.getDatasetMetadata());
this.datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseService, bag.getInit(), bag.getDatasetMetadata());
this.filesEditor = new FilesEditor(depositId, bag.getDataDir(), bag.getEditFiles(), dataverseService, utilityServices);
this.metadataEditor = new MetadataEditor(depositId, bag.getEditMetadata(), dataverseService);
this.permissionsEditor = new PermissionsEditor(depositId, bag.getEditPermissions(), dataverseService);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.dvingest.core.yaml.Create;
import nl.knaw.dans.dvingest.core.yaml.Expect;
import nl.knaw.dans.dvingest.core.yaml.Init;
import nl.knaw.dans.lib.dataverse.DataverseException;
import nl.knaw.dans.lib.dataverse.model.dataset.Dataset;
import org.apache.commons.lang3.StringUtils;

import java.io.IOException;
import java.util.UUID;
Expand All @@ -36,15 +40,28 @@ public class DatasetVersionCreator {
@NonNull
private final DataverseService dataverseService;

private final Init init;

private final Dataset dataset;

public String createDatasetVersion(String targetPid) throws IOException, DataverseException {
if (init != null) {
verifyCreate(init.getCreate(), init.getExpect(), targetPid);
verifyExpect(init.getExpect(), targetPid);
}

var pid = targetPid;
if (targetPid == null) {
if (dataset == null) {
throw new IllegalArgumentException("Must have dataset metadata to create a new dataset.");
}
pid = createDataset();
if (init != null && init.getCreate() != null && init.getCreate().getImportPid() != null) {
importDataset(init.getCreate().getImportPid());
pid = init.getCreate().getImportPid();
}
else {
pid = createDataset();
}
}
// Even if we just created the dataset, we still need to update the metadata, because Dataverse ignores some things
// in the create request.
Expand All @@ -54,6 +71,48 @@ public String createDatasetVersion(String targetPid) throws IOException, Dataver
return pid;
}

private void verifyCreate(Create create, Expect expect, String targetPid) throws IOException, DataverseException {
if (create != null && create.getImportPid() != null) {
if (targetPid != null) {
throw new IllegalArgumentException("Cannot import a dataset when updating an existing dataset.");
}
if (StringUtils.isBlank(create.getImportPid())) {
throw new IllegalArgumentException("Cannot import a dataset without a PID.");
}
if (expect != null && expect.getState() != null && !"absent".equals(expect.getState().toLowerCase())) {
throw new IllegalArgumentException("Cannot expect a state other than 'absent' when importing a dataset.");
}
}
}

private void verifyExpect(Expect expect, String targetPid) throws IOException, DataverseException {
var expectedState = targetPid == null ? "absent" : "released";
if (expect != null && expect.getState() != null) {
expectedState = expect.getState().toLowerCase();
}
if (targetPid == null) {
if (!expectedState.equals("absent")) {
throw new IllegalArgumentException("Cannot expect a state other than 'absent' when creating a new dataset.");
}
// Nothing to check, the dataset is absent by definition if we are creating it; if we are importing it, the action will fail if the PID already exists.
}
else {
if (expectedState.equals("absent")) {
throw new IllegalArgumentException("Cannot expect state 'absent' when updating an existing dataset.");
}
var actualState = dataverseService.getDatasetState(targetPid);
if (!expectedState.equals(actualState.toLowerCase())) {
throw new IllegalStateException("Expected state " + expectedState + " but found " + actualState + " for dataset " + targetPid);
}
}
}

private void importDataset(String pid) throws IOException, DataverseException {
log.debug("Start importing dataset for deposit {}", depositId);
dataverseService.importDataset(pid, dataset);
log.debug("End importing dataset for deposit {}", depositId);
}

private String createDataset() throws IOException, DataverseException {
log.debug("Start creating dataset for deposit {}", depositId);
var pid = dataverseService.createDataset(dataset);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,8 @@ public interface DataverseService {
List<String> findDoiByMetadataField(String fieldName, String value) throws IOException, DataverseException;

DatasetVersion getDatasetMetadata(String pid) throws IOException, DataverseException;

String getDatasetState(String targetPid) throws IOException, DataverseException;

void importDataset(String pid, Dataset dataset) throws IOException, DataverseException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,19 @@ public DatasetVersion getDatasetMetadata(String pid) throws IOException, Dataver
return dataverseClient.dataset(pid).getVersion().getData();
}

@Override
public String getDatasetState(String pid) throws IOException, DataverseException {
return dataverseClient.dataset(pid).getVersion(Version.LATEST.toString(), true).getData().getVersionState();
}

@Override
public void importDataset(String pid, Dataset dataset) throws IOException, DataverseException {
log.debug("Start importing dataset for deposit {}", pid);
var result = dataverseClient.dataverse("root").importDataset(dataset, pid, false);
log.debug(result.getEnvelopeAsString());
log.debug("End importing dataset for deposit {}", pid);
}

// TODO: move this to dans-dataverse-client-lib; it is similar to awaitLockState.
public void waitForState(String datasetId, String expectedState) {
var numberOfTimesTried = 0;
Expand Down Expand Up @@ -242,10 +255,4 @@ public void waitForState(String datasetId, String expectedState) {
throw new RuntimeException(e);
}
}

private String getDatasetState(String datasetId) throws IOException, DataverseException {
var version = dataverseClient.dataset(datasetId).getVersion(Version.LATEST.toString(), true);
return version.getData().getVersionState();

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import nl.knaw.dans.dvingest.core.yaml.EditFilesRoot;
import nl.knaw.dans.dvingest.core.yaml.EditMetadataRoot;
import nl.knaw.dans.dvingest.core.yaml.EditPermissionsRoot;
import nl.knaw.dans.dvingest.core.yaml.InitRoot;
import nl.knaw.dans.dvingest.core.yaml.UpdateState;
import nl.knaw.dans.lib.dataverse.MetadataFieldDeserializer;
import nl.knaw.dans.lib.dataverse.model.dataset.Dataset;
Expand Down Expand Up @@ -69,6 +70,7 @@ public YamlServiceImpl() {
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, true);
mapper.addMixIn(FileMeta.class, FileMetaMixin.class);
mapper.registerModule(module);
yamlConfigurationFactories.put(InitRoot.class, new YamlConfigurationFactory<>(InitRoot.class, factory.getValidator(), mapper, "dw"));
yamlConfigurationFactories.put(Dataset.class, new YamlConfigurationFactory<>(Dataset.class, factory.getValidator(), mapper, "dw"));
yamlConfigurationFactories.put(EditFilesRoot.class, new YamlConfigurationFactory<>(EditFilesRoot.class, factory.getValidator(), mapper, "dw"));
yamlConfigurationFactories.put(EditMetadataRoot.class, new YamlConfigurationFactory<>(EditMetadataRoot.class, factory.getValidator(), mapper, "dw"));
Expand Down
23 changes: 23 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/yaml/Create.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.yaml;

import lombok.Data;

@Data
public class Create {
private String importPid;
}
23 changes: 23 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/yaml/Expect.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.yaml;

import lombok.Data;

@Data
public class Expect {
private String state;
}
24 changes: 24 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/yaml/Init.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.yaml;

import lombok.Data;

@Data
public class Init {
private Expect expect = new Expect();
private Create create;
}
23 changes: 23 additions & 0 deletions src/main/java/nl/knaw/dans/dvingest/core/yaml/InitRoot.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright (C) 2024 DANS - Data Archiving and Networked Services ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.knaw.dans.dvingest.core.yaml;

import lombok.Data;

@Data
public class InitRoot {
private Init init;
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public void createDatasetVersion_creates_a_new_dataset_if_targetPid_is_null() th
// Given
var depositId = UUID.randomUUID();
var dataset = new Dataset();
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, dataset);
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, dataset);

// When
datasetVersionCreator.createDatasetVersion(null);
Expand All @@ -53,7 +53,7 @@ public void createDatasetVersion_updates_the_dataset_if_targetPid_is_not_null()
// Given
var depositId = UUID.randomUUID();
var dataset = new Dataset();
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, dataset);
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, dataset);

// When
datasetVersionCreator.createDatasetVersion("pid");
Expand All @@ -67,7 +67,7 @@ public void createDatasetVersion_updates_the_dataset_if_targetPid_is_not_null()
public void createDatasetVersion_throws_IllegalArgumentException_if_dataset_is_null() {
// Given
var depositId = UUID.randomUUID();
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null);
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, null);

// When
// Then
Expand All @@ -80,7 +80,7 @@ public void createDatasetVersion_throws_IllegalArgumentException_if_dataset_is_n
public void createDatasetVersion_is_noop_if_dataset_is_null_and_targetPid_is_not_null() throws Exception {
// Given
var depositId = UUID.randomUUID();
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null);
DatasetVersionCreator datasetVersionCreator = new DatasetVersionCreator(depositId, dataverseServiceMock, null, null);

// When
datasetVersionCreator.createDatasetVersion("pid");
Expand All @@ -96,14 +96,14 @@ public void ctor_throws_NullPointerException_if_dataverseService_is_null() {
var depositId = UUID.randomUUID();
// When
// Then
assertThatThrownBy(() -> new DatasetVersionCreator(depositId, null, new Dataset()))
assertThatThrownBy(() -> new DatasetVersionCreator(depositId, null, null, new Dataset()))
.isInstanceOf(NullPointerException.class);
}

// Throws NullPointerException if dataverseService is null
@Test
public void ctor_throws_NullPointerException_if_depositId_is_null() {
assertThatThrownBy(() -> new DatasetVersionCreator(null, dataverseServiceMock, new Dataset()))
assertThatThrownBy(() -> new DatasetVersionCreator(null, dataverseServiceMock, null, new Dataset()))
.isInstanceOf(NullPointerException.class);
}
}

0 comments on commit df8c0df

Please sign in to comment.