Skip to content

Commit

Permalink
Treat unknown names slightly nicer, see also gbif/dwc-api#3
Browse files Browse the repository at this point in the history
  • Loading branch information
mdoering committed Mar 23, 2020
1 parent 33c1870 commit d10ae8a
Show file tree
Hide file tree
Showing 11 changed files with 13,403 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ public static VerbatimRecord createVerbatim() {
}
rec.put(UnknownTerm.build("http://col.plus/terms/punk"),
RandomUtils.randomLatinString(500 + RND.nextInt(2000)));
rec.put(UnknownTerm.build("Col_name"), RandomUtils.randomSpecies());
rec.addIssue(Issue.ACCEPTED_NAME_MISSING);
rec.addIssue(Issue.NAME_VARIANT);
return rec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.gbif.dwc.terms.Term;
import org.gbif.dwc.terms.UnknownTerm;
import org.junit.Test;

/**
*
Expand All @@ -16,4 +17,11 @@ public UnknownTermSerdeTest() {
public UnknownTerm genTestValue() throws Exception {
return UnknownTerm.build("http://col.plus/terms/punk");
}

@Test
public void testUnkown() throws Exception {
testRoundtrip(UnknownTerm.build("Col_name"));
}


}
6 changes: 6 additions & 0 deletions dao/src/main/resources/life/catalogue/db/dbschema.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and done it manually. So we can as well log changes here.
ALTER TABLE dataset ADD COLUMN source_key INTEGER REFERENCES dataset;
ALTER TABLE dataset ALTER COLUMN origin TYPE text;
ALTER TABLE dataset_archive ALTER COLUMN origin TYPE text;
ALTER TABLE dataset_import ALTER COLUMN origin TYPE text;
DROP TYPE DATASETORIGIN;
CREATE TYPE DATASETORIGIN AS ENUM (
Expand All @@ -26,9 +27,14 @@ UPDATE dataset SET origin='EXTERNAL' WHERE origin='UPLOADED' AND data_access IS
UPDATE dataset SET origin='MANAGED' WHERE origin='UPLOADED';
UPDATE dataset SET origin='RELEASED' WHERE origin='MANAGED' AND locked;
UPDATE dataset SET source_key=3 WHERE origin='RELEASED';
UPDATE dataset_archive SET origin='EXTERNAL' WHERE origin='UPLOADED' AND data_access IS NOT NULL;
UPDATE dataset_archive SET origin='MANAGED' WHERE origin='UPLOADED';
UPDATE dataset_archive SET origin='RELEASED' WHERE origin='MANAGED' AND locked;
UPDATE dataset_archive SET source_key=3 WHERE origin='RELEASED';
UPDATE dataset_import SET origin='EXTERNAL' WHERE origin='UPLOADED' AND data_access IS NOT NULL;
UPDATE dataset_import SET origin='MANAGED' WHERE origin='UPLOADED';
ALTER TABLE dataset ALTER COLUMN origin TYPE DATASETORIGIN USING origin::DATASETORIGIN;
ALTER TABLE dataset_archive ALTER COLUMN origin TYPE DATASETORIGIN USING origin::DATASETORIGIN;
ALTER TABLE dataset_import ALTER COLUMN origin TYPE DATASETORIGIN USING origin::DATASETORIGIN;
```

Expand Down
12 changes: 10 additions & 2 deletions webservice/src/main/java/life/catalogue/csv/CsvReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,19 @@ protected <T extends Enum & Term> void reportMissingSchemas(Class<T> enumClass)
}

private static Optional<Term> findTerm(String termPrefix, String name, boolean isClassTerm) {
String qualName = name;
if (termPrefix != null && !name.contains(":")) {
name = termPrefix + ":" + name;
qualName = termPrefix + ":" + name;
}
try {
return Optional.of(VocabularyUtils.findTerm(name, isClassTerm));
Term t = VocabularyUtils.findTerm(qualName, isClassTerm);
if (t instanceof UnknownTerm) {
// avoid that the prefix is being used as part of the unknown URI
t = UnknownTerm.build(name, isClassTerm);
TermFactory.instance().registerTerm(t);
return Optional.of(t);
}
return Optional.of(t);
} catch (IllegalArgumentException e) {
return Optional.empty();
}
Expand Down
4 changes: 2 additions & 2 deletions webservice/src/main/java/life/catalogue/csv/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ public Field(Term term, String value, Integer index, String delimiter) {

public Field field(Term term) {
for (Field f : columns) {
if (f.term == term) return f;
if (f.term != null && f.term.equals(term)) return f;
}
return null;
}

public boolean hasTerm(Term term) {
for (Field f : columns) {
if (f.term == term) return true;
if (f.term != null && f.term.equals(term)) return true;
}
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class AcefInserterTest extends InserterBaseTest {
public NeoInserter newInserter(Path resource) throws IOException {
return new AcefInserter(store, resource, new ReferenceFactory(store), ImageService.passThru());
}

@Test
public void readMetadata() throws Exception {
NeoInserter ins = setup("/acef/0");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package life.catalogue.importer.dwca;

import com.google.common.collect.Lists;
import life.catalogue.api.model.Dataset;
import life.catalogue.api.model.VerbatimRecord;
import life.catalogue.api.vocab.DataFormat;
import life.catalogue.api.vocab.DatasetType;
import life.catalogue.img.ImageService;
import life.catalogue.importer.InserterBaseTest;
import life.catalogue.importer.NeoInserter;
import life.catalogue.importer.neo.model.NeoUsage;
import life.catalogue.importer.reference.ReferenceFactory;
import org.junit.Ignore;
import org.junit.Test;
import org.neo4j.graphdb.Transaction;

import java.io.IOException;
import java.nio.file.Path;

import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.*;

public class DwcaInserterTest extends InserterBaseTest {

@Override
public NeoInserter newInserter(Path resource) throws IOException {
return new DwcaInserter(store, resource, new ReferenceFactory(store), ImageService.passThru());
}
/**
* EEA redlist file with unknown term columns
*/
@Test
public void dwca37() throws Exception {
NeoInserter ins = setup("/dwca/37");
ins.insertAll();

try (Transaction tx = store.getNeo().beginTx()) {
NeoUsage u = store.usages().objByID("319088");
assertNotNull(u.getVerbatimKey());
VerbatimRecord v = store.getVerbatim(u.getVerbatimKey());
v.hasTerm(DwcaReaderTest.TERM_CoL_name);
}
}

@Test
@Ignore
public void readMetadata() throws Exception {
NeoInserter ins = setup("/acef/0");
Dataset d = ins.readMetadata().get();

assertEquals(DatasetType.TAXONOMIC, d.getType());
assertEquals(DataFormat.ACEF, d.getDataFormat());
assertEquals("ILDIS World", d.getTitle());
assertEquals("ILDIS", d.getAlias());
assertEquals("12, May 2014", d.getVersion());
assertNotNull(d.getReleased());
//assertEquals("2014-05-05", d.getReleased().toString());
assertEquals(1, d.getAuthorsAndEditors().size());
assertThat(d.getAuthorsAndEditors(), is(Lists.newArrayList("Roskov Y.R.")));
assertEquals("Legumes", d.getGroup());
assertEquals("The International Legume Database & Information Service (ILDIS) is an international project which aims to document and catalogue the world's legume species diversity in a readily accessible form. Research groups in many countries are participating on a co-operative basis to pool information in the ILDIS World Database of Legumes, which is used to provide a worldwide information service through publications, electronic access and enquiry services.", d.getDescription());
assertThat(d.getOrganisations(), is(Lists.newArrayList("International")));
assertEquals("http://www.ildis.org", d.getWebsite().toString());
assertEquals((Integer)96, d.getCompleteness());
assertEquals((Integer)4, d.getConfidence());
assertEquals("YR Roskov & JL", d.getContact());

assertNull(d.getLicense());
assertEquals("http://ILDIS.gif", d.getLogo().toString());
assertNull(d.getCitation());
assertNull(d.getCode());
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package life.catalogue.importer.dwca;

import java.net.URI;
import java.nio.file.Paths;
import java.util.concurrent.atomic.AtomicInteger;

Expand All @@ -15,7 +16,8 @@
*
*/
public class DwcaReaderTest {

public static final Term TERM_CoL_name = new UnknownTerm(URI.create("http://unknown.org/CoL_name"), false);

@Test
public void metaIF() throws Exception {
DwcaReader reader = DwcaReader.from(PathUtils.classPathTestRes("dwca/0"));
Expand Down Expand Up @@ -57,7 +59,22 @@ public void dwca1() throws Exception {
});
assertEquals(21, counter.get());
}


/**
* EEA redlist file with unknown term columns
*/
@Test
public void dwca37() throws Exception {
DwcaReader reader = DwcaReader.from(PathUtils.classPathTestRes("dwca/37"));

assertEquals(1, reader.size());
assertEquals(DwcTerm.Taxon, reader.coreRowType());
assertEquals(13, reader.coreSchema().size());
assertTrue(reader.coreSchema().hasTerm(TERM_CoL_name));
assertTrue(reader.coreSchema().hasTerm(DwcTerm.scientificName));
assertTrue(reader.coreSchema().hasTerm(DwcTerm.taxonRank));
}

@Test
public void dwca6() throws Exception {
DwcaReader reader = DwcaReader.from(PathUtils.classPathTestRes("dwca/6"));
Expand Down
1 change: 1 addition & 0 deletions webservice/src/test/resources/dwca/37/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pro parte names in dwca with multiple accepted names
11 changes: 11 additions & 0 deletions webservice/src/test/resources/dwca/37/expected.tree
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Phylata [phylum]
Anthurium lanceum Engl. [species]
Canna ehemannii [species]
Corydalis gigantea Trautv. & Meyer [species]
Laeliocattleya welsiana [species]
Opisthomonorcheides overstreeti (Ahmad, 1985) Madhavi, 2011 [species]
Paphiopedilum littleanum [species]
Photorhabdus asymbiotica Fischer-Le Saux & al., 1999 [species]
Placostegus crystallinus [species]
Stichotricha cornuta Dumas, 1858 [species]
Strombidium striatum [species]
Loading

0 comments on commit d10ae8a

Please sign in to comment.