Skip to content

Commit

Permalink
Merge pull request #12 from janvanmansum/DD-1764
Browse files Browse the repository at this point in the history
DD-1764
  • Loading branch information
janvanmansum authored Jan 30, 2025
2 parents f0ba0bf + 079db43 commit b7255a0
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,29 @@

@Slf4j
public class DdDataverseIngestApplication extends Application<DdDataverseIngestConfiguration> {
/*
- 'rapporten'
- 'verwervingswijzen'
- 'complextypen'
- 'artefacten'
- 'periodes'
*/


public static final String SPATIAL_COVERAGE_COUNTRY_TERMS_FILENAME = "spatial-coverage-country-terms.txt";
public static final String ISO_639_1_TO_DV_FILENAME = "iso639-1-to-dv.csv";
public static final String ISO_639_2_TO_DV_FILENAME = "iso639-2-to-dv.csv";
public static final String ABR_REPORT_CODE_TO_TERM_FILENAME = "rapporten-code-to-term.csv";
public static final String ABR_VERWERVINGSWIJZEN_CODE_TO_TERM_FILENAME = "verwervingswijzen-code-to-term.csv";
public static final String ABR_COMPLEXTYPE_CODE_TO_TERM_FILENAME = "complextypen-code-to-term.csv";
public static final String ABR_ARTIFACT_CODE_TO_TERM_FILENAME = "artefacten-code-to-term.csv";
public static final String ABR_PERIOD_CODE_TO_TERM_FILENAME = "periodes-code-to-term.csv";

public static final String ISO_639_1_TO_DV_KEY_COLUMN = "ISO639-1";
public static final String ISO_639_2_TO_DV_KEY_COLUMN = "ISO639-2";
public static final String DATAVERSE_LANGUAGE_COLUMN = "Dataverse-language";
public static final String CODE_COLUMN = "code";
public static final String SUBJECT_COLUMN = "subject";

public static void main(final String[] args) throws Exception {
new DdDataverseIngestApplication().run(args);
Expand Down Expand Up @@ -179,14 +202,14 @@ private DepositToDvDatasetMetadataMapper createMapper(boolean isMigration, DansD
isMigration,
dansDepositConversionConfig.isDeduplicate(),
dataverseService.getActiveMetadataBlockNames(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("iso639-1-to-dv.csv")).keyColumn("ISO639-1").valueColumn("Dataverse-language").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("iso639-2-to-dv.csv")).keyColumn("ISO639-2").valueColumn("Dataverse-language").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("abr-report-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("verwervingswijzen-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("abr-complextype-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("abr-artifact-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve("abr-period-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
FileUtils.readLines(mappingDefsDir.resolve("spatial-coverage-country-terms.txt").toFile(), StandardCharsets.UTF_8),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ISO_639_1_TO_DV_FILENAME)).keyColumn(ISO_639_1_TO_DV_KEY_COLUMN).valueColumn(DATAVERSE_LANGUAGE_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ISO_639_2_TO_DV_FILENAME)).keyColumn(ISO_639_2_TO_DV_KEY_COLUMN).valueColumn(DATAVERSE_LANGUAGE_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ABR_REPORT_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ABR_VERWERVINGSWIJZEN_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ABR_COMPLEXTYPE_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ABR_ARTIFACT_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(mappingDefsDir.resolve(ABR_PERIOD_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
FileUtils.readLines(mappingDefsDir.resolve(SPATIAL_COVERAGE_COUNTRY_TERMS_FILENAME).toFile(), StandardCharsets.UTF_8),
dansDepositConversionConfig.getDataSuppliers(),
dansDepositConversionConfig.getSkipFields());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static String getValueUri(Node node, Map<String, String> codeToTerm) {
.map(Node::getTextContent)
.orElse(null);
if (valueCode == null) {
throw new IllegalArgumentException(String.format("No valueURI or valueCode found for for %s element", node.getLocalName()));
throw new IllegalArgumentException(String.format("No valueURI or valueCode found for %s element", node.getLocalName()));
}
var term = codeToTerm.get(valueCode.trim());
if (term == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
import nl.knaw.dans.dvingest.core.dansbag.SupportedLicenses;
import nl.knaw.dans.dvingest.core.dansbag.mapper.DepositToDvDatasetMetadataMapper;
import nl.knaw.dans.dvingest.core.service.DataverseService;
import nl.knaw.dans.lib.dataverse.model.RoleAssignmentReadOnly;
import nl.knaw.dans.lib.util.MappingLoader;
import org.apache.commons.io.FileUtils;

import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.HashMap;
Expand All @@ -35,6 +33,8 @@
import java.util.Set;
import java.util.regex.Pattern;

import static nl.knaw.dans.dvingest.DdDataverseIngestApplication.*;

/**
* Helper for creating a {@link DansBagMappingService} instance to test, with sensible defaults for most fields. Currently, not all fields can be set. For example, the language mapping files are
* hardcoded to the default configuration files in the distribution.
Expand Down Expand Up @@ -68,14 +68,14 @@ public DansBagMappingService build() throws Exception {
isMigration,
deduplicate,
Set.of("citation", "dansRights", "dansRelationMetadata", "dansArchaeologyMetadata", "dansTemporalSpatial", "dansDataVaultMetadata"),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("iso639-1-to-dv.csv")).keyColumn("ISO639-1").valueColumn("Dataverse-language").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("iso639-2-to-dv.csv")).keyColumn("ISO639-2").valueColumn("Dataverse-language").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("abr-report-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("verwervingswijzen-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("abr-complextype-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("abr-artifact-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve("abr-period-code-to-term.csv")).keyColumn("code").valueColumn("subject").build().load(),
FileUtils.readLines(defaultConfigDir.resolve("spatial-coverage-country-terms.txt").toFile(), StandardCharsets.UTF_8),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ISO_639_1_TO_DV_FILENAME)).keyColumn(ISO_639_1_TO_DV_KEY_COLUMN).valueColumn(DATAVERSE_LANGUAGE_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ISO_639_2_TO_DV_FILENAME)).keyColumn(ISO_639_2_TO_DV_KEY_COLUMN).valueColumn(DATAVERSE_LANGUAGE_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ABR_REPORT_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ABR_VERWERVINGSWIJZEN_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ABR_COMPLEXTYPE_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ABR_ARTIFACT_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
MappingLoader.builder().csvFile(defaultConfigDir.resolve(ABR_PERIOD_CODE_TO_TERM_FILENAME)).keyColumn(CODE_COLUMN).valueColumn(SUBJECT_COLUMN).build().load(),
FileUtils.readLines(defaultConfigDir.resolve(SPATIAL_COVERAGE_COUNTRY_TERMS_FILENAME).toFile(), StandardCharsets.UTF_8),
dataSuppliers,
skipFields);
var supportedLicenses = new SupportedLicenses(dataverseService);
Expand Down

0 comments on commit b7255a0

Please sign in to comment.