Skip to content

Commit

Permalink
Merge branch 'rppd_rpb-43-doNotCrawl' of https://github.com/hbz/lobid…
Browse files Browse the repository at this point in the history
…-gnd into rppd

Resolves #401
  • Loading branch information
fsteeg committed Jun 24, 2024
2 parents 37b922a + d7c22f8 commit 2456313
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 0 deletions.
11 changes: 11 additions & 0 deletions app/controllers/HomeController.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import modules.IndexComponent;
import play.Environment;
import play.Logger;
import play.cache.Cached;
import play.libs.Json;
import play.libs.ws.WSBodyReadables;
import play.libs.ws.WSBodyWritables;
Expand Down Expand Up @@ -735,4 +736,14 @@ private static String findText(JsonNode info, String field) {
return node != null ? node.get("value").asText().replace("\n", " ").trim() : "";
}

@Cached(key = "robots", duration = 24 * 60 * 60) // One day
public Result robots() throws IOException {
return ok("User-agent: *\nDisallow: /"
+ Files.readAllLines(Paths.get("conf/rppd-export.jsonl")).stream()
.filter(line -> line.contains("doNotCrawl\":true"))
.map(line -> line.replaceAll(".*gndIdentifier\":\"(.+?)\".*", "$1")
.replaceAll("Keine GND-Ansetzung für ", ""))
.collect(Collectors.joining("\nDisallow: /")));
}

}
2 changes: 2 additions & 0 deletions app/modules/IndexComponent.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import controllers.HomeController;
import net.sf.ehcache.CacheManager;
import play.Logger;
import play.inject.ApplicationLifecycle;

Expand Down Expand Up @@ -74,6 +75,7 @@ public ElasticsearchServer(ApplicationLifecycle lifecycle) {
});
lifecycle.addStopHook(() -> {
client.close();
CacheManager.getInstance().shutdown();
return null;
});
}
Expand Down
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ libraryDependencies += guice

libraryDependencies += ws

libraryDependencies += ehcache

libraryDependencies += "com.github.jsonld-java" % "jsonld-java" % "0.12.0"

libraryDependencies += "javax.mail" % "mail" % "1.4.1"
Expand Down
2 changes: 2 additions & 0 deletions conf/routes
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ GET /advanced controllers.HomeController.advanced()

GET /cgi-bin/wwwalleg/:name.pl controllers.HomeController.authorityPl(name, db ?= "rnam", index: Int ?= 1, zeilen: Int ?= 1, s1)

GET /robots.txt controllers.HomeController.robots()

GET /:id.:format controllers.HomeController.authorityDotFormat(id, format)

GET /:id controllers.HomeController.authority(id, format ?= null)
Expand Down

0 comments on commit 2456313

Please sign in to comment.