Skip to content

Commit

Permalink
Merge pull request #36 from hbz/rpb-28-hbzIds
Browse files Browse the repository at this point in the history
Map almaMmsIds to rpbIds for input data with `#983` (RPB-28)
  • Loading branch information
fsteeg authored Aug 9, 2023
2 parents 914e3b2 + 72f78e1 commit 97fe517
Show file tree
Hide file tree
Showing 59 changed files with 1,224 additions and 1,727 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install metafacture-core-5.5.1-rc1
- name: Install metafacture-core 5.7.0-rc1
run: |
git clone https://github.com/metafacture/metafacture-core.git
git clone https://github.com/metafacture/metafacture-core.git -b 5.7.0-rc1
cd metafacture-core
git checkout metafacture-core-5.5.1-rc1
./gradlew publishToMavenLocal
cd ..
- name: Install metafacture-fix 0.5.1
- name: Install metafacture-fix 0.6.0-rc3
run: |
git clone https://github.com/metafacture/metafacture-fix.git -b 0.5.1
git clone https://github.com/metafacture/metafacture-fix.git -b 0.6.0-rc3
cd metafacture-fix
./gradlew publishToMavenLocal
cd ..
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ bulk.ndjson
application-log-*.gz
RPB-Export_HBZ_SW.txt
RPB-Export_HBZ_Tit.txt
RPB-Export_HBZ_Tit_hbzIds.txt
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ sh validateJsonOutput.sh

This validates the resulting files against the JSON schemas in `test/rpb/schemas/`.

### Index creation

If you're not indexing into an existing lobid-resources index, make sure to create one with the proper index settings, e.g. to create `resources-rpb-20230623` from `quaoar3`:

```bash
unset http_proxy # for putting on weywot
sol@quaoar3:~/git/rpb$ curl -XPUT -H "Content-Type: application/json" weywot5:9200/resources-rpb-20230623?pretty -d @../lobid-resources/src/main/resources/alma/index-config.json
```

For testing, the real index name (e.g. `resources-rpb-20230623`) is aliased by `resources-rpb-test`, which is used by https://test.lobid.org/resources / http://test.rpb.lobid.org and in the transformation.

### Run full transformation and indexing

Get full data at: http://lobid.org/download/rpb-gesamtexport/, place files in `conf/`.
Expand Down
7 changes: 5 additions & 2 deletions app/rpb/Decode.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ public final class Decode extends DefaultObjectPipe<String, StreamReceiver> {
private String recordId;
private String recordTitle;
private boolean inMultiVolumeRecord;
private String currentRecord;

@Override
public void process(final String obj) {
currentRecord = obj;
LOG.debug("Process record: " + obj);
final String[] vals = obj.split("\\[/\\]");
recordId = getId(obj, vals);
Expand Down Expand Up @@ -47,9 +49,10 @@ private void processFields(final String[] vals) {
if("#36 ".equals(k) && "sm".equals(v)) {
inMultiVolumeRecord = true;
} else if(inMultiVolumeRecord && "#01 ".equals(k)) {
if(volumeCounter == 0) {
if(volumeCounter == 0 && currentRecord.contains("#36 sbd")) { // s. RPB-28
// we're still in the main (multi volume) record, so we mark that here:
getReceiver().literal(fieldName("#36t"), "MultiVolumeBook");
getReceiver().literal(fieldName("#36t"),
currentRecord.contains("#88 ") ? "Periodical" : "MultiVolumeBook");
}
getReceiver().endRecord(); // first time, we end main record, then each volume
volumeCounter++;
Expand Down
49 changes: 49 additions & 0 deletions app/rpb/MapAlmaToRpb.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/* Copyright 2023 Fabian Steeg, hbz. Licensed under the GPLv2 */

package rpb;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;

import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.helpers.DefaultStreamPipe;

import com.fasterxml.jackson.databind.JsonNode;

import play.Logger;
import play.libs.Json;

/**
* Create a mapping for RPB Allegro export data with hbzIds: map almaMmsId to rpbId.
*/
public final class MapAlmaToRpb extends DefaultStreamPipe<ObjectReceiver<String>> {

private String id;
@Override
public void startRecord(String identifier) {
this.id = identifier;
super.startRecord(identifier);
}
@Override
public void literal(String name, String hbzId) {
String almaMmsId = getAlmaMmsId(hbzId);
getReceiver().process(String.format("%s\tRPB%s", almaMmsId, id));
}
private String getAlmaMmsId(String hbzId) {
String url = "https://test.lobid.org/resources/" + hbzId;
Logger.debug("Trying to get almaMmsId from: " + url);
try {
URLConnection connection = new URL(url).openConnection();
connection.setRequestProperty("Accept", "application/json");
connection.connect();
JsonNode jsonNode = Json.parse(connection.getInputStream());
Thread.sleep(100);
return jsonNode.get("almaMmsId").textValue();
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
return "#983: " + hbzId;
}

}
1 change: 1 addition & 0 deletions app/views/tags/result_doc.scala.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
@((pub\"frequency").asOpt[Seq[JsValue]].map { freq =>
optional("Erscheinungsweise", "label", freq.head)
})
@optional("Erscheinungsverlauf", "publicationHistory", pub)
</td>
</tr>
}
Expand Down
16 changes: 8 additions & 8 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ libraryDependencies ++= Seq(
cache,
javaWs,
"com.typesafe.play" % "play-test_2.11" % "2.4.11",
"org.metafacture" % "metafacture-elasticsearch" % "5.5.1-rc1",
"org.metafacture" % "metafacture-io" % "5.5.1-rc1",
"org.metafacture" % "metafacture-strings" % "5.5.1-rc1",
"org.metafacture" % "metafacture-json" % "5.5.1-rc1",
"org.metafacture" % "metafacture-flux" % "5.5.1-rc1",
"org.metafacture" % "metafacture-triples" % "5.5.1-rc1",
"org.metafacture" % "metafacture-formatting" % "5.5.1-rc1",
"org.metafacture" % "metafix" % "0.5.1",
"org.metafacture" % "metafacture-elasticsearch" % "5.7.0-rc1",
"org.metafacture" % "metafacture-io" % "5.7.0-rc1",
"org.metafacture" % "metafacture-strings" % "5.7.0-rc1",
"org.metafacture" % "metafacture-json" % "5.7.0-rc1",
"org.metafacture" % "metafacture-flux" % "5.7.0-rc1",
"org.metafacture" % "metafacture-triples" % "5.7.0-rc1",
"org.metafacture" % "metafacture-formatting" % "5.7.0-rc1",
"org.metafacture" % "metafix" % "0.6.0-rc3",
"org.elasticsearch" % "elasticsearch" % "1.7.5" withSources(),
"com.github.jsonld-java" % "jsonld-java" % "0.5.0",
"org.apache.commons" % "commons-rdf-jena" % "0.5.0",
Expand Down
1 change: 1 addition & 0 deletions conf/RPB-Export_HBZ_Titel_Test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@
[/]#00 929t112197[/]#20 Seit 1911 weht im Vinzenzhaus der Geist der N�chstenliebe : 100-Jahr-Feier in Gebhardshain ; vom Konvent zum Seniorenheim[/]#30 _sn611070_[/]#31 _r99_ _o13209039_[/]#32 _16326855n1_ ; _00Sn10s3817103a_[/]#35 _r99_ _o13209039_ ; _sn611070_[/]#36 u[/]#37 _sn611070_ ; _r99_ _o13209039_[/]#40 _929n991002_[/]#70b66[/]#70h126 vom 31.05.[/]#70j2011[/]#70s24[/]#70tRhein-Zeitung, Ausg. H[/]#70yRhein-Zeitung, Ausg. H. - 66 (2011), 126 vom 31.05., S. 24[/]#76a2011[/]#76b2011[/]#77 Ill.[/]#88 2522761-0[/]#90dZB 151:2011;MZ 52:2011[/]#95 20110601/09:14:31KO[/]#96 20120809/16:13:40ZR[/]
[/]#00 929t112198[/]#20 �Das� Leben der Bilder oder die Kunst des Sehens[/]#30 _sn848000_[/]#31 _r99_ _o13203050_[/]#32 _4114333n4_ ; _4143413n4_[/]#32a_118605364_[/]#35 _r99_ _o13203050_ ; _sn848000_[/]#36 s[/]#37 _sn848000_ ; _r99_ _o13203050_[/]#39 John Berger[/]#40 _00Pn11k2198999a_[/]#71 11. Aufl.[/]#74 Berlin[/]#75 Wagenbach[/]#76a2009[/]#76b2009[/]#77 141 S. : Ill.[/]#81 Aus dem Engl. �bers.[/]#83 Sander, August / 1876-1964[/]#85 Salto ; [13][/]#87 3-8031-1114-5[/]#871978-3-8031-1114-2[/]#90d2011/1822[/]#95 20110601/09:39:26HBZ[/]#96 20110601/09:44:37KO[/]#983HT015954631[/]
[/]#00 929t112199[/]#20 Charte vom Laufe des Rheins von Coblenz bis Wesel [Elektronische Ressource] : das Herzogthum Berg, die Grafschaften Wied, Nieder-Isenburg und andere L�nder vorstellend : mit r�m. kaiserl. allergnaed. Freyheit[/]#27 Charte vom Laufe des Rheins von Koblenz bis Wesel [Elektronische Ressource] : das Herzogthum Berg, die Grafschaften Wied, Nieder-Isenburg und andere L�nder vorstellend : mit r�m. kaiserl. allergnaed. Freyheit[/]#30 _sn126000_[/]#31 _r22_[/]#32 _4074902n2_ ; _4611904n8_ ; _4511937n5_[/]#32a_4042224n0_ ; _4611904n8_ ; _4511937n5_[/]#32b_4005659n4_ ; _4611904n8_ ; _4511937n5_[/]#35 _r22_ ; _sn126000_[/]#36 s[/]#37 _sn126000_ ; _r22_[/]#39 entworfen von F. L. G�ssefeld ; [Stecher:] I. Rausch[/]#42 _00Gn11k2199217a_[/]#57 _178964131_[/]#571_00Pn01k23917383a_[/]#572_00Pn01t22981785a_[/]#74 N�rnberg[/]#75 Hom�nnische Erben[/]#76a1797[/]#76b1797[/]#77 1 Kt. : Kupferst. ; 57 x 41 cm[/]#81 Nebenkarte oben rechts: "Charte den Lauf des Rheins von Coblenz bis Unkel nebst den anliegenden L�ndern enthaltend" (16 x 23 cm). - Nullmeridian: Ferro - Ma�stab in graph,. Form (Deutsche Meilen) - Titelkartusche unten links[/]#90ehttps://nbn-resolving.org/urn:nbn:de:hbz:061:1-42026[/]#95 20110601/09:46:34HBZ[/]#96 20110601/09:51:55KO[/]#983CT003007380[/]
[/]#00 929t111801[/]#19 Rhein-Zeitung / C[/]#20 Rhein-Zeitung : unabh�ngige Tageszeitung[/]#23 Ausg. C, Andernach/Mayen[/]#30 _sn882026_[/]#31 _r99_ _o137_[/]#32 _4038117n1_ ; _4067510n5_[/]#35 _r99_ _o137_ ; _sn882026_[/]#36 sm[/]#37 _sn882026_ ; _r99_ _o137_[/]#74 Koblenz[/]#75 Mittelrhein-Verl.[/]#76a2011[/]#76b2011[/]#81 Vorg. u.d.T.: Rhein-Zeitung / CA. - Vorg. u.d.T.: Rhein-Zeitung / CM[/]#88 2602949-2[/]#90dZB 32 MAG[/]#95 20110503/12:48:18HBZ[/]#96 20110503/13:09:46ZR[/]#97xper[/]#983HT016782154[/]#01 66.2011,77(1.Apr.) -[/]#36 sbd[/]
10 changes: 5 additions & 5 deletions conf/output/test-output-10.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"type" : [ "BibliographicResource", "Article" ],
"rpbId" : "RPB036t0121513",
"id" : "https://lobid.org/resources/RPB036t0121513",
"rpbId" : "RPB036t0121519",
"id" : "https://lobid.org/resources/RPB036t0121519",
"inCollection" : [ {
"id" : "http://lobid.org/resources/HT013494180#!",
"type" : [ "Collection" ],
"label" : "Rheinland-Pfälzische Bibliographie"
} ],
"title" : "Soldaten für Holland",
"title" : "Sprendlinger kämpften unter Napoleon",
"publication" : [ {
"startDate" : "1957",
"startDate" : "1963",
"type" : [ "PublicationEvent" ]
} ],
"contribution" : [ {
Expand All @@ -24,5 +24,5 @@
},
"type" : [ "Contribution" ]
} ],
"bibliographicCitation" : "Heimat am Mittelrhein. - 2 (1957), Nr. 6"
"bibliographicCitation" : "Heimat am Mittelrhein. - 8 (1963), Nr. 12"
}
10 changes: 5 additions & 5 deletions conf/output/test-output-11.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"type" : [ "BibliographicResource", "Article" ],
"rpbId" : "RPB036t0121514",
"id" : "https://lobid.org/resources/RPB036t0121514",
"rpbId" : "RPB036t0121520",
"id" : "https://lobid.org/resources/RPB036t0121520",
"inCollection" : [ {
"id" : "http://lobid.org/resources/HT013494180#!",
"type" : [ "Collection" ],
"label" : "Rheinland-Pfälzische Bibliographie"
} ],
"title" : "Sonnenuhr an der Kirche",
"title" : "Sprendlinger Stifter und ihre Familien",
"publication" : [ {
"startDate" : "1958",
"startDate" : "1983",
"type" : [ "PublicationEvent" ]
} ],
"contribution" : [ {
Expand All @@ -24,5 +24,5 @@
},
"type" : [ "Contribution" ]
} ],
"bibliographicCitation" : "Heimat am Mittelrhein. - 3 (1958), Nr. 11"
"bibliographicCitation" : "Heimat am Mittelrhein. - 1983, H. 2"
}
10 changes: 5 additions & 5 deletions conf/output/test-output-12.json
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"type" : [ "BibliographicResource", "Article" ],
"rpbId" : "RPB036t0121515",
"id" : "https://lobid.org/resources/RPB036t0121515",
"rpbId" : "RPB036t0121521",
"id" : "https://lobid.org/resources/RPB036t0121521",
"inCollection" : [ {
"id" : "http://lobid.org/resources/HT013494180#!",
"type" : [ "Collection" ],
"label" : "Rheinland-Pfälzische Bibliographie"
} ],
"title" : "Souvenir ...",
"title" : "Steuern in alter Zeit",
"publication" : [ {
"startDate" : "1963",
"startDate" : "1957",
"type" : [ "PublicationEvent" ]
} ],
"contribution" : [ {
Expand All @@ -24,5 +24,5 @@
},
"type" : [ "Contribution" ]
} ],
"bibliographicCitation" : "Heimat am Mittelrhein. - 8 (1963), Nr. 1"
"bibliographicCitation" : "Heimat am Mittelrhein. - 2 (1957), Nr. 12"
}
53 changes: 44 additions & 9 deletions conf/output/test-output-13.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,56 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"type" : [ "BibliographicResource", "Article" ],
"rpbId" : "RPB036t0121516",
"id" : "https://lobid.org/resources/RPB036t0121516",
"rpbId" : "RPB107t01147869",
"id" : "https://lobid.org/resources/RPB107t01147869",
"inCollection" : [ {
"id" : "http://lobid.org/resources/HT013494180#!",
"type" : [ "Collection" ],
"label" : "Rheinland-Pfälzische Bibliographie"
} ],
"title" : "Sprendlingen und das Kloster Lorsch",
"title" : "Grabmal und Taufstein - zwei spätgotische Denkmäler der Pfarrkirche in Hainfeld",
"extent" : "Ill.",
"publication" : [ {
"startDate" : "1967",
"startDate" : "1984",
"type" : [ "PublicationEvent" ]
} ],
"subject" : [ {
"id" : "http://purl.org/lobid/rpb#n844030",
"label" : "Platzhalter Schlagwortlabel",
"type" : [ "Concept" ],
"source" : {
"id" : "http://purl.org/lobid/rpb",
"label" : "Systematik der Rheinland-Pfälzischen Bibliographie"
}
}, {
"type" : [ "ComplexSubject" ],
"label" : "Sankt Barbara / Hainfeld, Südliche Weinstraße | Grabplatte | Taufbecken",
"componentList" : [ {
"id" : "https://d-nb.info/gnd/7746576-3",
"label" : "Sankt Barbara / Hainfeld, Südliche Weinstraße",
"source" : {
"id" : "https://d-nb.info/gnd/7749153-1",
"label" : "Gemeinsame Normdatei (GND)"
}
}, {
"id" : "https://d-nb.info/gnd/4113778-4",
"label" : "Grabplatte",
"source" : {
"id" : "https://d-nb.info/gnd/7749153-1",
"label" : "Gemeinsame Normdatei (GND)"
}
}, {
"id" : "https://d-nb.info/gnd/4135651-2",
"label" : "Taufbecken",
"source" : {
"id" : "https://d-nb.info/gnd/7749153-1",
"label" : "Gemeinsame Normdatei (GND)"
}
} ]
} ],
"spatial" : [ {
"id" : "https://rpb.lobid.org/spatial#n339",
"label" : "o339",
"id" : "https://rpb.lobid.org/spatial#n33703036",
"label" : "o33703036",
"type" : [ "Concept" ],
"source" : {
"id" : "https://rpb.lobid.org/spatial",
Expand All @@ -24,14 +59,14 @@
} ],
"contribution" : [ {
"agent" : {
"id" : "https://rpb.lobid.org/agent/00Pn01m21445842a",
"label" : "Wagner, Hermann Josef"
"id" : "https://d-nb.info/gnd/11944027X",
"label" : "Müller, Carl Werner / 1931-2018"
},
"role" : {
"id" : "http://id.loc.gov/vocabulary/relators/aut",
"label" : "Autor/in"
},
"type" : [ "Contribution" ]
} ],
"bibliographicCitation" : "Landkreis Bingen: Heimat-Jahrbuch. - 1967, S. 23-26"
"bibliographicCitation" : "Archiv für mittelrheinische Kirchengeschichte. - 36 (1984), S. 47-63"
}
58 changes: 51 additions & 7 deletions conf/output/test-output-14.json
Original file line number Diff line number Diff line change
@@ -1,28 +1,72 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"type" : [ "BibliographicResource", "Article" ],
"rpbId" : "RPB036t0121517",
"id" : "https://lobid.org/resources/RPB036t0121517",
"rpbId" : "RPB107t01147870",
"id" : "https://lobid.org/resources/RPB107t01147870",
"inCollection" : [ {
"id" : "http://lobid.org/resources/HT013494180#!",
"type" : [ "Collection" ],
"label" : "Rheinland-Pfälzische Bibliographie"
} ],
"title" : "Sprendlingens Einwohnerzahl",
"title" : "¬Die¬ Herren von Hainfeld - zur Geschichte e. vorderpfälzischen Adelssitzes im Mittelalter",
"publication" : [ {
"startDate" : "1959",
"startDate" : "1983",
"type" : [ "PublicationEvent" ]
} ],
"subject" : [ {
"id" : "http://purl.org/lobid/rpb#n240400",
"label" : "Platzhalter Schlagwortlabel",
"type" : [ "Concept" ],
"source" : {
"id" : "http://purl.org/lobid/rpb",
"label" : "Systematik der Rheinland-Pfälzischen Bibliographie"
}
}, {
"id" : "http://purl.org/lobid/rpb#n207020",
"label" : "Platzhalter Schlagwortlabel",
"type" : [ "Concept" ],
"source" : {
"id" : "http://purl.org/lobid/rpb",
"label" : "Systematik der Rheinland-Pfälzischen Bibliographie"
}
}, {
"type" : [ "ComplexSubject" ],
"label" : "Hainfeld / Familie | Geschichte 1100-1300",
"componentList" : [ {
"id" : "https://rpb.lobid.org/sw/00Sn01s147870528a",
"label" : "Hainfeld / Familie",
"source" : {
"id" : "http://rpb.lobid.org/sw",
"label" : "RPB-Sachsystematik"
}
}, {
"id" : "https://rpb.lobid.org/sw/00Sn06k1444137a",
"label" : "Geschichte 1100-1300",
"source" : {
"id" : "http://rpb.lobid.org/sw",
"label" : "RPB-Sachsystematik"
}
} ]
} ],
"spatial" : [ {
"id" : "https://rpb.lobid.org/spatial#n33703036",
"label" : "o33703036",
"type" : [ "Concept" ],
"source" : {
"id" : "https://rpb.lobid.org/spatial",
"label" : "RPB-Raumsystematik"
}
} ],
"contribution" : [ {
"agent" : {
"id" : "https://rpb.lobid.org/agent/00Pn01m21445842a",
"label" : "Wagner, Hermann Josef"
"id" : "https://d-nb.info/gnd/11944027X",
"label" : "Müller, Carl Werner / 1931-2018"
},
"role" : {
"id" : "http://id.loc.gov/vocabulary/relators/aut",
"label" : "Autor/in"
},
"type" : [ "Contribution" ]
} ],
"bibliographicCitation" : "Heimat am Mittelrhein. - 4 (1959), Nr. 1"
"bibliographicCitation" : "Historischer Verein der Pfalz: Mitteilungen des Historischen Vereins der Pfalz. - 81 (1983), S. 229-271"
}
Loading

0 comments on commit 97fe517

Please sign in to comment.