diff --git a/MServer-Config.yaml b/MServer-Config.yaml
index 85cb9940f..758fe625f 100644
--- a/MServer-Config.yaml
+++ b/MServer-Config.yaml
@@ -1,7 +1,7 @@
#### Server configurations ####
# The maximum amount of cpu threads to be used.
-maximumCpuThreads: 10
+maximumCpuThreads: 1
# The maximum duration in minutes the server should run.
# If set to 0 the server runs without a time limit.
@@ -24,15 +24,17 @@ senderIncluded:
#- ARTE_PL
#- ARTE_IT
#- ARTE_ES
- #- 3SAT
+ #- DREISAT
#- FUNK
#- KIKA
- #- DW
- #- ORF
+ # - DW
+ - ORF
#- PHOENIX
#- SRF
- - SR
+ #- SR
#- ZDF
+
+#SRF,SR,PHONIX,ORF,KIKA,DW,3SAT<
# If set the server will be awake after the crawler run and restarts the run after the given amount.
#schedules:
@@ -133,7 +135,7 @@ topicsSearchEnabled: true
# The maximum amount of sub pages to be crawled.
# Example: If a Sendung overview side has 10 pages with videos for this Sendung and
# the amount set by this is 5 then the crawler crawls pages 1 to 5.
-maximumSubpages: 1
+maximumSubpages: 5
# The maximum amount of days going to past will be crawled for the "Sendung Verpasst?" section.
maximumDaysForSendungVerpasstSection: 7
@@ -155,8 +157,7 @@ senderConfigurations:
#10,20,40 ok
maximumSubpages: 0
ORF:
- #2,4,8 ok
- maximumUrlsPerTask: 40
+ maximumRequestsPerSecond: 10.0
ARTE_DE:
maximumUrlsPerTask: 1
maximumDaysForSendungVerpasstSectionFuture: 0
@@ -178,10 +179,12 @@ senderConfigurations:
maximumRequestsPerSecond: 10.0
FUNK:
maximumUrlsPerTask: 99
- DW:
- maximumSubpages: 0
- SR:
+ DREISAT:
maximumSubpages: 5
+ maximumDaysForSendungVerpasstSection: 60
+ PHOENIX:
+ maximumSubpages: 500
+
# configure string variables
crawlerApiParams:
diff --git a/src/main/java/de/mediathekview/mserver/base/utils/JsonUtils.java b/src/main/java/de/mediathekview/mserver/base/utils/JsonUtils.java
index 30deb2aa8..00f8fcff5 100644
--- a/src/main/java/de/mediathekview/mserver/base/utils/JsonUtils.java
+++ b/src/main/java/de/mediathekview/mserver/base/utils/JsonUtils.java
@@ -77,7 +77,18 @@ public static Optional getAttributeAsInt(final JsonObject jsonObject, f
}
public static Optional getElementValueAsString(final JsonElement aJsonElement, final String... aElementIds) {
- Optional rs = Optional.empty();
+ Optional rs = JsonUtils.getElement(aJsonElement, aElementIds);
+ if (rs.isPresent()) {
+ return Optional.of(rs.get().getAsString());
+ }
+ return Optional.empty();
+ }
+
+ public static Optional getElement(final JsonElement aJsonElement, final String... aElementIds) {
+ Optional rs = Optional.empty();
+ if (aElementIds == null || aElementIds.length == 0) {
+ return rs;
+ }
JsonObject aJsonObject = aJsonElement.getAsJsonObject();
for (int i = 0; i < aElementIds.length-1; i++) {
String elementId = aElementIds[i];
@@ -91,7 +102,7 @@ public static Optional getElementValueAsString(final JsonElement aJsonEl
//
String elementId = aElementIds[aElementIds.length-1];
if (aJsonObject != null && aJsonObject.has(elementId) && !aJsonObject.get(elementId).isJsonNull()) {
- rs = Optional.of(aJsonObject.get(elementId).getAsString());
+ rs = Optional.of(aJsonObject.get(elementId));
}
//
return rs;
diff --git a/src/main/java/de/mediathekview/mserver/base/webaccess/JsoupConnection.java b/src/main/java/de/mediathekview/mserver/base/webaccess/JsoupConnection.java
index 6ff257725..18330f14c 100644
--- a/src/main/java/de/mediathekview/mserver/base/webaccess/JsoupConnection.java
+++ b/src/main/java/de/mediathekview/mserver/base/webaccess/JsoupConnection.java
@@ -1,6 +1,7 @@
package de.mediathekview.mserver.base.webaccess;
import okhttp3.ConnectionPool;
+import okhttp3.Headers;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
@@ -11,7 +12,12 @@
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
+import com.google.gson.Gson;
+import com.google.gson.JsonElement;
+
import java.io.IOException;
+import java.util.Map;
+import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import static jakarta.ws.rs.core.HttpHeaders.CONTENT_LENGTH;
@@ -41,11 +47,32 @@ public JsoupConnection(final int timeout, final int threadPoolSize) {
* @throws IOException If no connection to the url could be opened.
*/
public String requestBodyAsString(final String url) throws IOException {
+ return requestBodyAsString(url, null);
+
+ }
+ /**
+ * Request an url and receive the body as String. Add headers as a string map.
+ * @param url
+ * @param headerMap
+ * @return
+ * @throws IOException
+ */
+ public String requestBodyAsString(final String url, final Map headerMap) throws IOException {
int retry = 0;
int httpResponseCode;
final String responseString = "";
do {
- final Request request = new Request.Builder().url(url).build();
+ okhttp3.Headers.Builder headerBuilder = new Headers.Builder();
+ if (headerMap != null) {
+ for (Entry headerValue : headerMap.entrySet()) {
+ headerBuilder.add(headerValue.getKey(), headerValue.getValue());
+ }
+ }
+ Request request = new Request.Builder()
+ .url(url)
+ .headers(headerBuilder.build())
+ .build();
+
try (final Response response = client.newCall(request).execute()) {
httpResponseCode = response.code();
if (response.body() == null || httpResponseCode == 404 || httpResponseCode == 410) {
@@ -62,6 +89,17 @@ public String requestBodyAsString(final String url) throws IOException {
return responseString;
}
+ /**
+ * Request an url and receive the body as HTML JSOUP Document
+ *
+ * @param url The url to request.
+ * @return request body as HTML JSOUP Document
+ * @throws IOException If no connection to the url could be opened.
+ */
+ public JsonElement requestBodyAsJsonElement(final String url, final Map headerMap) throws IOException {
+ return new Gson().fromJson(requestBodyAsString(url, headerMap), JsonElement.class);
+ }
+
/**
* Request an url and receive the body as HTML JSOUP Document
*
diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
index 6ab02771c..b7799eaf3 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
@@ -24,7 +24,7 @@
import de.mediathekview.mserver.crawler.dw.DwCrawler;
import de.mediathekview.mserver.crawler.funk.FunkCrawler;
import de.mediathekview.mserver.crawler.kika.KikaApiCrawler;
-import de.mediathekview.mserver.crawler.orf.OrfCrawler;
+import de.mediathekview.mserver.crawler.orfon.OrfOnCrawler;
import de.mediathekview.mserver.crawler.phoenix.PhoenixCrawler;
import de.mediathekview.mserver.crawler.sr.SrCrawler;
import de.mediathekview.mserver.crawler.srf.SrfCrawler;
@@ -519,8 +519,10 @@ private void initializeCrawler(final MServerConfigManager rootConfig) {
new KikaApiCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
Sender.DW, new DwCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
+ //crawlerMap.put(
+ // Sender.ORF, new OrfCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
- Sender.ORF, new OrfCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
+ Sender.ORF, new OrfOnCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
crawlerMap.put(
Sender.PHOENIX,
new PhoenixCrawler(forkJoinPool, messageListeners, progressListeners, rootConfig));
diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java
index b9faa3455..c3f685314 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java
@@ -193,7 +193,6 @@ public Document requestBodyAsXmlDocument(String url) throws IOException {
* @return size of the response in KB or -1 in case we could not determine the size.
*/
public long determineFileSizeInKB(String url) {
- getRateLimiter().acquire();
return getConnection().determineFileSize(url) / 1024;
}
@@ -203,7 +202,6 @@ public long determineFileSizeInKB(String url) {
* @return return true if the request was successfully processed by the server
*/
public boolean requestUrlExists(String url) {
- getRateLimiter().acquire();
return getConnection().requestUrlExists(url);
}
/**
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfConstants.java b/src/main/java/de/mediathekview/mserver/crawler/orf/OrfConstants.java
deleted file mode 100644
index f79832eca..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfConstants.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package de.mediathekview.mserver.crawler.orf;
-
-public final class OrfConstants {
-
- public static final String URL_BASE = "https://tvthek.orf.at";
-
- /**
- * URL für die Sendungen eines Tages
- * Muss am Ende noch um das Datum dd.MM.yyyy ergänzt werden
- */
- public static final String URL_DAY = URL_BASE + "/schedule/";
-
- /**
- * Basis-URL für Übersichtsseite nach Buchstaben
- * Muss am Ende noch um Buchstabe bzw. 0 ergänzt werden
- */
- public static final String URL_SHOW_LETTER_PAGE = URL_BASE + "/profiles/letter/";
-
- /**
- * URL für erste Übersichtsseite nach Buchstaben
- */
- public static final String URL_SHOW_LETTER_PAGE_A = URL_SHOW_LETTER_PAGE + "A";
-
- /**
- * URL für verpasste Sendungen eines Tages
- * Muss am Ende noch um Datum ergänzt werden im Format DD.MM.YYYY
- */
- public static final String URL_DATE = URL_BASE + "/schedule/";
-
- /**
- * URL für Übersichtsseite des Archivs
- */
- public static final String URL_ARCHIVE = URL_BASE + "/history";
-
- private OrfConstants() {}
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/orf/OrfCrawler.java
deleted file mode 100644
index 3fd9d25c8..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfCrawler.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package de.mediathekview.mserver.crawler.orf;
-
-import de.mediathekview.mlib.daten.Film;
-import de.mediathekview.mlib.daten.Sender;
-import de.mediathekview.mlib.messages.listener.MessageListener;
-import de.mediathekview.mserver.base.config.MServerConfigManager;
-import de.mediathekview.mserver.base.messages.ServerMessages;
-import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
-import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
-import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
-import de.mediathekview.mserver.crawler.orf.tasks.*;
-import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
-import java.time.temporal.ChronoUnit;
-import java.util.Collection;
-import java.util.Queue;
-import java.util.Set;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ForkJoinPool;
-import java.util.concurrent.RecursiveTask;
-
-public class OrfCrawler extends AbstractCrawler {
-
- private static final Logger LOG = LogManager.getLogger(OrfCrawler.class);
-
- public OrfCrawler(
- final ForkJoinPool aForkJoinPool,
- final Collection aMessageListeners,
- final Collection aProgressListeners,
- final MServerConfigManager rootConfig) {
- super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig);
-
- }
-
- @Override
- public Sender getSender() {
- return Sender.ORF;
- }
-
- private Set getArchiveEntries() throws InterruptedException, ExecutionException {
- final OrfHistoryOverviewTask historyTask = new OrfHistoryOverviewTask(this);
- final Queue topics = forkJoinPool.submit(historyTask).get();
-
- final OrfHistoryTopicTask topicTask = new OrfHistoryTopicTask(this, topics);
- final Set shows = forkJoinPool.submit(topicTask).get();
-
- printMessage(
- ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
-
- return shows;
- }
-
- private Set getDaysEntries() throws InterruptedException, ExecutionException {
- final OrfDayTask dayTask = new OrfDayTask(this, getDayUrls());
- final Set shows = forkJoinPool.submit(dayTask).get();
-
- printMessage(
- ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
-
- return shows;
- }
-
- private Queue getDayUrls() {
- final Queue urls = new ConcurrentLinkedQueue<>();
- for (int i = 0;
- i
- < crawlerConfig.getMaximumDaysForSendungVerpasstSection()
- + crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture();
- i++) {
- urls.add(
- new CrawlerUrlDTO(
- OrfConstants.URL_DAY
- + LocalDateTime.now()
- .plus(
- crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(),
- ChronoUnit.DAYS)
- .minus(i, ChronoUnit.DAYS)
- .format(DateTimeFormatter.ofPattern("dd.MM.yyyy"))));
- }
-
- return urls;
- }
-
- private Queue getLetterEntries() throws InterruptedException, ExecutionException {
- final OrfLetterPageTask letterTask = new OrfLetterPageTask(this);
- final Queue shows = forkJoinPool.submit(letterTask).get();
-
- printMessage(
- ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
-
- return shows;
- }
-
- @Override
- protected RecursiveTask> createCrawlerTask() {
- try {
- boolean processMoreEpisodes = false;
-
- final Queue shows = new ConcurrentLinkedQueue<>();
-
- if (Boolean.TRUE.equals(crawlerConfig.getTopicsSearchEnabled())) {
- shows.addAll(getArchiveEntries());
- addShows(shows, getLetterEntries());
- processMoreEpisodes = true;
- } else {
- addShows(shows, getDaysEntries());
- processMoreEpisodes = false;
- }
-
- printMessage(
- ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
- getAndSetMaxCount(shows.size());
-
- return new OrfFilmDetailTask(this, shows, processMoreEpisodes);
- } catch (final InterruptedException ex) {
- LOG.debug("{} crawler interrupted.", getSender().getName(), ex);
- Thread.currentThread().interrupt();
- } catch (final ExecutionException ex) {
- LOG.fatal("Exception in {} crawler.", getSender().getName(), ex);
- }
- return null;
- }
-
- private void addShows(Queue shows, Collection showsToAdd) {
- showsToAdd.forEach(
- show -> {
- // compare only urls because topics can be different in letter and day lists
- if (shows.stream().noneMatch(s -> s.getUrl().equals(show.getUrl()))) {
- shows.add(show);
- } else {
- LOG.debug("duplicated url {} of topic {} removed", show.getUrl(), show.getTopic());
- }
- });
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfEpisodeInfoDTO.java b/src/main/java/de/mediathekview/mserver/crawler/orf/OrfEpisodeInfoDTO.java
deleted file mode 100644
index ce058688d..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfEpisodeInfoDTO.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package de.mediathekview.mserver.crawler.orf;
-
-import java.time.Duration;
-import java.util.Optional;
-
-public class OrfEpisodeInfoDTO {
- private final OrfVideoInfoDTO videoInfo;
- private final Optional description;
- private final Optional duration;
- private final Optional title;
-
- public OrfEpisodeInfoDTO(final OrfVideoInfoDTO aVideoInfo,
- final Optional aTitle,
- final Optional aDescription,
- final Optional aDuration
- ) {
- title = aTitle;
- description = aDescription;
- duration = aDuration;
- videoInfo = aVideoInfo;
- }
-
- public OrfVideoInfoDTO getVideoInfo() {
- return videoInfo;
- }
-
- public Optional getDescription() {
- return description;
- }
-
- public Optional getDuration() {
- return duration;
- }
-
- public Optional getTitle() {
- return title;
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfVideoInfoDTO.java b/src/main/java/de/mediathekview/mserver/crawler/orf/OrfVideoInfoDTO.java
deleted file mode 100644
index 596fa9393..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/OrfVideoInfoDTO.java
+++ /dev/null
@@ -1,50 +0,0 @@
-package de.mediathekview.mserver.crawler.orf;
-
-import java.util.EnumMap;
-import java.util.Map;
-import de.mediathekview.mlib.daten.Resolution;
-
-public class OrfVideoInfoDTO {
-
- private static final String FILTER_JUGENDSCHUTZ = ".*/Jugendschutz\\d{4}b\\d{4}_.*";
- private final Map videoUrls;
- private String subtitleUrl;
-
- public OrfVideoInfoDTO() {
- videoUrls = new EnumMap<>(Resolution.class);
- }
-
- public boolean hasVideoUrls() {
- return !videoUrls.isEmpty();
- }
-
- public Resolution getDefaultQuality() {
- if (videoUrls.containsKey(Resolution.NORMAL)) {
- return Resolution.NORMAL;
- }
- return videoUrls.keySet().iterator().next();
- }
-
- public String getDefaultVideoUrl() {
- return videoUrls.get(getDefaultQuality());
- }
-
- public String getSubtitleUrl() {
- return subtitleUrl;
- }
-
- public Map getVideoUrls() {
- return videoUrls;
- }
-
- public String put(final Resolution key, final String value) {
- if (value == null || value.matches(FILTER_JUGENDSCHUTZ)) {
- return "";
- }
- return videoUrls.put(key, value);
- }
-
- public void setSubtitleUrl(final String subtitleUrl) {
- this.subtitleUrl = subtitleUrl;
- }
-}
\ No newline at end of file
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/json/OrfMoreEpisodesDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/orf/json/OrfMoreEpisodesDeserializer.java
deleted file mode 100644
index 45b52710b..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/json/OrfMoreEpisodesDeserializer.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.json;
-
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import de.mediathekview.mserver.base.utils.JsonUtils;
-import de.mediathekview.mserver.base.utils.UrlUtils;
-import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
-import de.mediathekview.mserver.crawler.orf.OrfConstants;
-
-import java.lang.reflect.Type;
-import java.util.Optional;
-
-public class OrfMoreEpisodesDeserializer implements JsonDeserializer {
-
- private static final String ATTRIBUTE_URL = "url";
-
- @Override
- public CrawlerUrlDTO deserialize(
- JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) {
-
- final Optional url =
- JsonUtils.getAttributeAsString(jsonElement.getAsJsonObject(), ATTRIBUTE_URL);
- return url.map(s -> new CrawlerUrlDTO(UrlUtils.addDomainIfMissing(s, OrfConstants.URL_BASE))).orElse(null);
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfMoreEpisodesParser.java b/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfMoreEpisodesParser.java
deleted file mode 100644
index 28a6c3418..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfMoreEpisodesParser.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.parser;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
-import java.util.ArrayList;
-import java.util.List;
-import org.jsoup.nodes.Document;
-
-public class OrfMoreEpisodesParser {
- private static final String EPISODES_SELECTOR = "article.b-teaser > a.teaser-link";
-
- public List parse(final Document document, final String topic) {
- final List result = new ArrayList<>();
-
- document
- .select(EPISODES_SELECTOR)
- .forEach(
- episode -> {
- final String url = episode.attr(HtmlConsts.ATTRIBUTE_HREF);
- result.add(new TopicUrlDTO(topic, url));
- });
-
- return result;
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfPlaylistDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfPlaylistDeserializer.java
deleted file mode 100644
index 9716a1e03..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfPlaylistDeserializer.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.parser;
-
-import com.google.gson.JsonArray;
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import de.mediathekview.mserver.base.utils.JsonUtils;
-import de.mediathekview.mserver.crawler.orf.OrfEpisodeInfoDTO;
-import de.mediathekview.mserver.crawler.orf.OrfVideoInfoDTO;
-import java.lang.reflect.Type;
-import java.time.Duration;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-
-public class OrfPlaylistDeserializer implements JsonDeserializer> {
-
- private static final String ELEMENT_GAPLESS_VIDEO = "gapless_video";
- private static final String ELEMENT_PLAYLIST = "playlist";
- private static final String ELEMENT_VIDEOS = "videos";
-
- private static final String ATTRIBUTE_TITLE = "title";
- private static final String ATTRIBUTE_DESCRIPTION = "description";
- private static final String ATTRIBUTE_DURATION = "duration";
- private static final String ATTRIBUTE_DURATION_IN_SECONDS = "duration_in_seconds";
-
- @Override
- public List deserialize(
- JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) {
-
- List episodes = new ArrayList<>();
-
- if (!aJsonElement.getAsJsonObject().has(ELEMENT_PLAYLIST)) {
- return episodes;
- }
-
- JsonObject playlistObject =
- aJsonElement.getAsJsonObject().get(ELEMENT_PLAYLIST).getAsJsonObject();
- if (JsonUtils.hasElements(playlistObject, ELEMENT_GAPLESS_VIDEO)) {
- parseGaplessVideo(episodes, playlistObject);
- }
-
- parseVideos(episodes, playlistObject);
-
- return episodes;
- }
-
- private void parseGaplessVideo(List aEpisodes, JsonObject aPlaylistObject) {
-
- final Optional title = JsonUtils.getAttributeAsString(aPlaylistObject, ATTRIBUTE_TITLE);
- final Optional duration = parseDurationInSeconds(aPlaylistObject);
-
- final Optional videoInfoOptional =
- parseUrls(aPlaylistObject.getAsJsonObject(ELEMENT_GAPLESS_VIDEO));
-
- if (videoInfoOptional.isPresent()) {
- OrfEpisodeInfoDTO episode =
- new OrfEpisodeInfoDTO(videoInfoOptional.get(), title, Optional.empty(), duration);
- aEpisodes.add(episode);
- }
- }
-
- private void parseVideos(List aEpisodes, JsonObject aPlaylistObject) {
- JsonArray videosArray = aPlaylistObject.getAsJsonObject().get(ELEMENT_VIDEOS).getAsJsonArray();
-
- for (JsonElement videoElement : videosArray) {
- JsonObject videoObject = videoElement.getAsJsonObject();
- final Optional title = JsonUtils.getAttributeAsString(videoObject, ATTRIBUTE_TITLE);
- final Optional description =
- JsonUtils.getAttributeAsString(videoObject, ATTRIBUTE_DESCRIPTION);
- final Optional duration = parseDuration(videoObject);
-
- final Optional videoInfoOptional = parseUrls(videoObject);
-
- if (videoInfoOptional.isPresent()) {
- OrfEpisodeInfoDTO episode =
- new OrfEpisodeInfoDTO(videoInfoOptional.get(), title, description, duration);
- aEpisodes.add(episode);
- }
- }
- }
-
- private Optional parseUrls(final JsonObject aVideoObject) {
-
- OrfVideoDetailDeserializer deserializer = new OrfVideoDetailDeserializer();
- return deserializer.deserializeVideoObject(aVideoObject);
- }
-
- private static Optional parseDuration(final JsonObject aVideoObject) {
- if (aVideoObject.has(ATTRIBUTE_DURATION)) {
- Long durationValue = aVideoObject.get(ATTRIBUTE_DURATION).getAsLong();
-
- // Duration ist in Millisekunden angegeben, diese interessieren aber nicht
- return Optional.of(Duration.ofSeconds(durationValue / 1000));
- }
-
- return Optional.empty();
- }
-
- private static Optional parseDurationInSeconds(final JsonObject aVideoObject) {
- if (aVideoObject.has(ATTRIBUTE_DURATION_IN_SECONDS)) {
- Double durationValue = aVideoObject.get(ATTRIBUTE_DURATION_IN_SECONDS).getAsDouble();
-
- return Optional.of(Duration.ofSeconds(durationValue.longValue()));
- }
-
- return Optional.empty();
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfVideoDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfVideoDetailDeserializer.java
deleted file mode 100644
index 2a9c70b11..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/parser/OrfVideoDetailDeserializer.java
+++ /dev/null
@@ -1,163 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.parser;
-
-import com.google.gson.JsonDeserializationContext;
-import com.google.gson.JsonDeserializer;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import de.mediathekview.mlib.daten.Resolution;
-import de.mediathekview.mserver.crawler.orf.OrfVideoInfoDTO;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-
-import java.lang.reflect.Type;
-import java.util.Optional;
-
-import static de.mediathekview.mserver.base.HtmlConsts.ATTRIBUTE_SRC;
-
-public class OrfVideoDetailDeserializer implements JsonDeserializer> {
-
- private static final Logger LOG = LogManager.getLogger(OrfVideoDetailDeserializer.class);
-
- private static final String WRONG_HTTPS_URL_PART = ".apa.";
- private static final String RIGHT_HTTPS_URL_PART = ".sf.apa.";
-
- private static final String ELEMENT_PLAYLIST = "playlist";
- private static final String ELEMENT_VIDEOS = "videos";
- private static final String ELEMENT_SUBTITLES = "subtitles";
- private static final String ELEMENT_SOURCES = "sources";
-
- private static final String ATTRIBUTE_DELIVERY = "delivery";
- private static final String ATTRIBUTE_PROTOCOL = "protocol";
- private static final String ATTRIBUTE_QUALITY = "quality";
- private static final String ATTRIBUTE_TYPE = "type";
-
- private static final String RELEVANT_DELIVERY1 = "progressive";
- private static final String RELEVANT_DELIVERY2 = "hls";
- private static final String RELEVANT_PROTOCOL = "http";
- private static final String RELEVANT_SUBTITLE_TYPE = "ttml";
- private static final String RELEVANT_VIDEO_TYPE1 = "video/mp4";
- private static final String RELEVANT_VIDEO_TYPE2 = "application/x-mpegURL";
-
- private static String fixHttpsUrl(final String url) {
- if (url.contains(RIGHT_HTTPS_URL_PART)) {
- return url;
- }
- return url.replace(WRONG_HTTPS_URL_PART, RIGHT_HTTPS_URL_PART);
- }
-
- private static void parseVideo(final JsonElement aVideoElement, final OrfVideoInfoDTO dto) {
- if (aVideoElement.isJsonArray()) {
- aVideoElement
- .getAsJsonArray()
- .forEach(
- videoElement -> {
- final JsonObject videoObject = videoElement.getAsJsonObject();
- if (videoObject.has(ATTRIBUTE_PROTOCOL)
- && videoObject.has(ATTRIBUTE_QUALITY)
- && videoObject.has(ATTRIBUTE_SRC)
- && videoObject.has(ATTRIBUTE_TYPE)) {
- final String type = videoObject.get(ATTRIBUTE_TYPE).getAsString();
- final String protocol = videoObject.get(ATTRIBUTE_PROTOCOL).getAsString();
- final String delivery = videoObject.get(ATTRIBUTE_DELIVERY).getAsString();
-
- if (isVideoRelevant(type, protocol, delivery)) {
- final String quality = videoObject.get(ATTRIBUTE_QUALITY).getAsString();
- final String url = fixHttpsUrl(videoObject.get(ATTRIBUTE_SRC).getAsString());
-
- final Optional resolution = getQuality(quality);
- resolution.ifPresent(resolution1 -> dto.put(resolution1, url));
- }
- }
- });
- }
- }
-
- public Optional deserializeVideoObject(final JsonObject aVideoObject) {
- final OrfVideoInfoDTO dto = new OrfVideoInfoDTO();
-
- if (aVideoObject.has(ELEMENT_SOURCES)) {
- parseVideo(aVideoObject.get(ELEMENT_SOURCES), dto);
- }
-
- if (aVideoObject.has(ELEMENT_SUBTITLES)) {
- parseSubtitles(aVideoObject.get(ELEMENT_SUBTITLES), dto);
- }
-
- if (dto.hasVideoUrls()) {
- return Optional.of(dto);
- }
-
- return Optional.empty();
- }
-
- private static boolean isVideoRelevant(
- final String type, final String protocol, final String delivery) {
- return (type.equalsIgnoreCase(RELEVANT_VIDEO_TYPE1)
- || type.equalsIgnoreCase(RELEVANT_VIDEO_TYPE2))
- && protocol.equalsIgnoreCase(RELEVANT_PROTOCOL)
- && (delivery.equalsIgnoreCase(RELEVANT_DELIVERY1)
- || delivery.equalsIgnoreCase(RELEVANT_DELIVERY2));
- }
-
- private static void parseSubtitles(
- final JsonElement aSubtitlesElement, final OrfVideoInfoDTO dto) {
- if (aSubtitlesElement.isJsonArray()) {
- aSubtitlesElement
- .getAsJsonArray()
- .forEach(
- subtitleElement -> {
- final JsonObject subtitleObject = subtitleElement.getAsJsonObject();
- if (subtitleObject.has(ATTRIBUTE_SRC) && subtitleObject.has(ATTRIBUTE_TYPE)) {
- final String type = subtitleObject.get(ATTRIBUTE_TYPE).getAsString();
-
- if (type.equalsIgnoreCase(RELEVANT_SUBTITLE_TYPE)) {
- final String url = fixHttpsUrl(subtitleObject.get(ATTRIBUTE_SRC).getAsString());
- dto.setSubtitleUrl(url);
- }
- }
- });
- }
- }
-
- private static Optional getQuality(final String aQuality) {
- switch (aQuality) {
- case "Q1A":
- return Optional.of(Resolution.VERY_SMALL);
- case "Q4A":
- return Optional.of(Resolution.SMALL);
- case "Q6A":
- return Optional.of(Resolution.NORMAL);
- case "Q8C":
- return Optional.of(Resolution.HD);
- case "Q0A":
- // QXA/QXB(DRM): another m3u8 has to be loaded which is often geoblocked
- case "QXA":
- case "QXADRM":
- case "QXB":
- case "QXBDRM":
- case "Q8A":
- return Optional.empty();
- default:
- LOG.debug("ORF: unknown quality: {}", aQuality);
- }
- return Optional.empty();
- }
-
- @Override
- public Optional deserialize(
- final JsonElement aJsonElement, final Type aType, final JsonDeserializationContext aContext) {
-
- final JsonObject jsonObject = aJsonElement.getAsJsonObject();
- if (jsonObject.has(ELEMENT_PLAYLIST)) {
- final JsonObject playlistObject = jsonObject.get(ELEMENT_PLAYLIST).getAsJsonObject();
- if (playlistObject.has(ELEMENT_VIDEOS)) {
- final JsonObject videoObject =
- playlistObject.get(ELEMENT_VIDEOS).getAsJsonArray().get(0).getAsJsonObject();
-
- return deserializeVideoObject(videoObject);
- }
- }
-
- return Optional.empty();
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfDayTask.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfDayTask.java
deleted file mode 100644
index b9312c748..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfDayTask.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.basic.*;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import java.util.Queue;
-
-public class OrfDayTask extends AbstractDocumentTask {
-
- private static final String ITEM_SELECTOR = "article a";
- private static final String TITLE_SELECTOR1 = ".item-title";
- private static final String TITLE_SELECTOR2 = ".teaser-title";
-
- public OrfDayTask(
- final AbstractCrawler crawler,
- final Queue urlToCrawlDTOs) {
- super(crawler, urlToCrawlDTOs);
- }
-
- @Override
- protected void processDocument(final CrawlerUrlDTO urlDto, final Document document) {
- final Elements elements = document.select(ITEM_SELECTOR);
- elements.forEach(
- item -> {
- final Element titleElement = getTitleElement(item);
- if (titleElement != null) {
- final String theme = OrfHelper.parseTheme(titleElement.text());
- final String url = item.attr(HtmlConsts.ATTRIBUTE_HREF);
-
- final TopicUrlDTO dto = new TopicUrlDTO(theme, url);
- taskResults.add(dto);
- }
- });
- }
-
- private Element getTitleElement(final Element item) {
- Element titleElement = item.selectFirst(TITLE_SELECTOR1);
- if (titleElement == null) {
- titleElement = item.selectFirst(TITLE_SELECTOR2);
- }
- return titleElement;
- }
-
- @Override
- protected AbstractUrlTask createNewOwnInstance(
- final Queue aUrlsToCrawl) {
- return new OrfDayTask(crawler, aUrlsToCrawl);
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfFilmDetailTask.java
deleted file mode 100644
index b3544fd9f..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfFilmDetailTask.java
+++ /dev/null
@@ -1,306 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import com.google.gson.reflect.TypeToken;
-import de.mediathekview.mlib.daten.Film;
-import de.mediathekview.mlib.daten.FilmUrl;
-import de.mediathekview.mlib.daten.GeoLocations;
-import de.mediathekview.mlib.daten.Resolution;
-import de.mediathekview.mserver.base.utils.HtmlDocumentUtils;
-import de.mediathekview.mserver.crawler.basic.*;
-import de.mediathekview.mserver.crawler.orf.OrfEpisodeInfoDTO;
-import de.mediathekview.mserver.crawler.orf.OrfVideoInfoDTO;
-import de.mediathekview.mserver.crawler.orf.json.OrfMoreEpisodesDeserializer;
-import de.mediathekview.mserver.crawler.orf.parser.OrfMoreEpisodesParser;
-import de.mediathekview.mserver.crawler.orf.parser.OrfPlaylistDeserializer;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.jsoup.nodes.Document;
-
-import java.io.IOException;
-import java.lang.reflect.Type;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.time.Duration;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
-import java.time.format.DateTimeParseException;
-import java.time.temporal.ChronoUnit;
-import java.util.*;
-import java.util.concurrent.ConcurrentLinkedQueue;
-
-public class OrfFilmDetailTask extends AbstractDocumentTask {
-
- private static final Logger LOG = LogManager.getLogger(OrfFilmDetailTask.class);
-
- private static final String TITLE_SELECTOR = ".description-container .description-title";
- private static final String VIDEO_META_DATA_SELECTOR = ".video-meta-data";
- private static final String TIME_SELECTOR = VIDEO_META_DATA_SELECTOR + " time";
- private static final String DURATION_SELECTOR = VIDEO_META_DATA_SELECTOR + " span.duration";
- private static final String DESCRIPTION_SELECTOR = ".description-container .description-text";
- private static final String VIDEO_SELECTOR = "div.jsb_VideoPlaylist";
- private static final String MORE_EPISODES_SELECTOR = "div.more-episodes";
-
- private static final String ATTRIBUTE_DATETIME = "datetime";
- private static final String ATTRIBUTE_DATA_JSB = "data-jsb";
- private static final String PREFIX_AUDIO_DESCRIPTION = "AD |";
-
- private static final DateTimeFormatter DATE_TIME_FORMATTER =
- DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
-
- private static final Type CRAWLER_URL_TYPE_TOKEN = new TypeToken() {}.getType();
- private static final Type LIST_EPISODEINFO_TYPE_TOKEN =
- new TypeToken>() {}.getType();
- private final boolean processMoreEpisodes;
-
- public OrfFilmDetailTask(
- final AbstractCrawler aCrawler, final Queue aUrlToCrawlDtos, boolean processMoreEpisodes) {
- super(aCrawler, aUrlToCrawlDtos);
-
- this.processMoreEpisodes = processMoreEpisodes;
- }
-
- private static Optional parseDate(final Document aDocument) {
- final Optional date =
- HtmlDocumentUtils.getElementAttributeString(TIME_SELECTOR, ATTRIBUTE_DATETIME, aDocument);
- if (date.isPresent()) {
- final String dateValue = date.get().replace("CET", " ").replace("CEST", " ");
- try {
- final LocalDateTime localDate = LocalDateTime.parse(dateValue, DATE_TIME_FORMATTER);
- return Optional.of(localDate);
- } catch (final DateTimeParseException e) {
- LOG.debug("OrfFilmDetailTask: unknown date format: {}", date.get());
- }
- }
-
- return Optional.empty();
- }
-
- private static Optional parseDuration(final Document aDocument) {
- final Optional duration =
- HtmlDocumentUtils.getElementString(DURATION_SELECTOR, aDocument);
- if (!duration.isPresent()) {
- return Optional.empty();
- }
-
- final Optional unit = determineChronoUnit(duration.get());
- if (!unit.isPresent()) {
- LOG.debug("OrfFilmDetailTask: unknown duration type: {}", duration.get());
- return Optional.empty();
- }
-
- final String[] parts = duration.get().split(" ")[0].trim().split(":");
- if (parts.length != 2) {
- LOG.debug("OrfFilmDetailTask: unknown duration part count: {}", duration.get());
- return Optional.empty();
- }
-
- final ChronoUnit unitValue = unit.get();
- if (unitValue == ChronoUnit.SECONDS || unitValue == ChronoUnit.MINUTES) {
- return Optional.of(
- Duration.ofMinutes(Long.parseLong(parts[0])).plusSeconds(Long.parseLong(parts[1])));
- }
- if (unitValue == ChronoUnit.HOURS) {
- return Optional.of(
- Duration.ofHours(Long.parseLong(parts[0])).plusMinutes(Long.parseLong(parts[1])));
- }
-
- return Optional.empty();
- }
-
- private static Optional determineChronoUnit(final String aDuration) {
- if (aDuration.contains("Min.")) {
- return Optional.of(ChronoUnit.MINUTES);
- }
- if (aDuration.contains("Std.")) {
- return Optional.of(ChronoUnit.HOURS);
- }
- if (aDuration.contains("Sek.")) {
- return Optional.of(ChronoUnit.SECONDS);
- }
-
- return Optional.empty();
- }
-
- @Override
- protected void processDocument(final TopicUrlDTO aUrlDto, final Document aDocument) {
- final Optional title = HtmlDocumentUtils.getElementString(TITLE_SELECTOR, aDocument);
- final Optional time = parseDate(aDocument);
- final Optional duration = parseDuration(aDocument);
- final Optional description =
- HtmlDocumentUtils.getElementString(DESCRIPTION_SELECTOR, aDocument);
-
- final List episodes = parseEpisodes(aDocument);
- if (episodes.size() > 1) {
- crawler.incrementMaxCountBySizeAndGetNewSize(episodes.size() - 1L);
- crawler.updateProgress();
- }
-
- for (int i = 0; i < episodes.size(); i++) {
- final OrfEpisodeInfoDTO episode = episodes.get(i);
- if (i == 0) {
- createFilm(aUrlDto, episode.getVideoInfo(), title, description, time, duration);
- } else {
- createFilm(
- aUrlDto,
- episode.getVideoInfo(),
- episode.getTitle(),
- episode.getDescription(),
- time,
- episode.getDuration());
- }
- }
-
- if (processMoreEpisodes) {
- final List topicUrlDTOS = parseMoreEpisodes(aDocument, aUrlDto.getTopic());
- topicUrlDTOS.remove(aUrlDto);
- processMoreEpisodes(topicUrlDTOS);
- }
- }
-
- @Override
- protected AbstractUrlTask createNewOwnInstance(
- final Queue aUrlsToCrawl) {
- return createNewOwnInstance(aUrlsToCrawl, processMoreEpisodes);
- }
-
- private AbstractUrlTask createNewOwnInstance(final Queue urlsToCrawl, boolean processMoreEpisodes) {
- return new OrfFilmDetailTask(crawler, urlsToCrawl, processMoreEpisodes);
- }
-
- private void createFilm(
- final TopicUrlDTO aUrlDto,
- final OrfVideoInfoDTO aVideoInfo,
- final Optional aTitle,
- final Optional aDescription,
- final Optional aTime,
- final Optional aDuration) {
-
- try {
- if (aTitle.isPresent()) {
- boolean isAudioDescription = aUrlDto.getTopic().startsWith(PREFIX_AUDIO_DESCRIPTION);
-
- final Film film =
- new Film(
- UUID.randomUUID(),
- crawler.getSender(),
- isAudioDescription
- ? trimAudioDescriptionPrefix(aTitle.get())
- : aTitle.get(),
- isAudioDescription
- ? trimAudioDescriptionPrefix(aUrlDto.getTopic())
- : aUrlDto.getTopic(),
- aTime.orElse(LocalDateTime.now()),
- aDuration.orElse(Duration.ZERO));
-
- film.setWebsite(new URL(aUrlDto.getUrl()));
- aDescription.ifPresent(film::setBeschreibung);
-
- if (StringUtils.isNotBlank(aVideoInfo.getSubtitleUrl())) {
- film.addSubtitle(new URL(aVideoInfo.getSubtitleUrl()));
- }
-
- addUrls(film, aVideoInfo.getVideoUrls(), isAudioDescription);
-
- setGeoLocations(aVideoInfo, film);
-
- taskResults.add(film);
- crawler.incrementAndGetActualCount();
- crawler.updateProgress();
- } else {
- LOG.error("OrfFilmDetailTask: no title or video found for url {}", aUrlDto.getUrl());
- crawler.incrementAndGetErrorCount();
- crawler.updateProgress();
- }
- } catch (final MalformedURLException ex) {
- LOG.fatal("A ORF URL can't be parsed.", ex);
- crawler.printErrorMessage();
- crawler.incrementAndGetErrorCount();
- crawler.updateProgress();
- }
- }
-
- private String trimAudioDescriptionPrefix(String text) {
- return text.substring(PREFIX_AUDIO_DESCRIPTION.length());
- }
-
- private void setGeoLocations(final OrfVideoInfoDTO aVideoInfo, final Film film) {
- final List geoLocations = new ArrayList<>();
- if (aVideoInfo.getDefaultVideoUrl().contains("cms-austria")) {
- geoLocations.add(GeoLocations.GEO_AT);
- } else {
- geoLocations.add(GeoLocations.GEO_NONE);
- }
- film.setGeoLocations(geoLocations);
- }
-
- private void addUrls(
- final Film aFilm, final Map aVideoUrls, boolean isAudioDescription)
- throws MalformedURLException {
-
- for (final Map.Entry qualitiesEntry : aVideoUrls.entrySet()) {
- final String url = qualitiesEntry.getValue();
- final FilmUrl filmUrl = new FilmUrl(url, crawler.determineFileSizeInKB(url));
- final Resolution key = qualitiesEntry.getKey();
-
- if (isAudioDescription) {
- aFilm.addAudioDescription(key, filmUrl);
- } else {
- aFilm.addUrl(key, filmUrl);
- }
-
- }
- }
-
- private List parseEpisodes(final Document aDocument) {
- final Optional json =
- HtmlDocumentUtils.getElementAttributeString(VIDEO_SELECTOR, ATTRIBUTE_DATA_JSB, aDocument);
-
- if (json.isPresent()) {
-
- final Gson gson =
- new GsonBuilder()
- .registerTypeAdapter(LIST_EPISODEINFO_TYPE_TOKEN, new OrfPlaylistDeserializer())
- .create();
-
- return gson.fromJson(json.get(), LIST_EPISODEINFO_TYPE_TOKEN);
- }
-
- return new ArrayList<>();
- }
-
- private List parseMoreEpisodes(final Document document, final String topic) {
- final Optional json = HtmlDocumentUtils.getElementAttributeString(MORE_EPISODES_SELECTOR, ATTRIBUTE_DATA_JSB, document);
- if (json.isPresent()) {
- final Gson gson =
- new GsonBuilder()
- .registerTypeAdapter(CRAWLER_URL_TYPE_TOKEN, new OrfMoreEpisodesDeserializer())
- .create();
-
- CrawlerUrlDTO moreEpisodesUrl = gson.fromJson(json.get(), CRAWLER_URL_TYPE_TOKEN);
- if (moreEpisodesUrl != null) {
- try {
- final Document moreEpisodesDocument = crawler.requestBodyAsHtmlDocument(moreEpisodesUrl.getUrl());
- OrfMoreEpisodesParser parser = new OrfMoreEpisodesParser();
- return parser.parse(moreEpisodesDocument, topic);
- } catch (IOException e) {
- LOG.error("OrfFilmDetailTask: loading more episodes url {} failed.", moreEpisodesUrl.getUrl());
- crawler.incrementAndGetErrorCount();
- }
- }
- }
-
- return new ArrayList<>();
- }
-
- private void processMoreEpisodes(final List moreFilms) {
- if (moreFilms != null && !moreFilms.isEmpty()) {
- final Queue queue = new ConcurrentLinkedQueue<>(moreFilms);
- final OrfFilmDetailTask task = (OrfFilmDetailTask) createNewOwnInstance(queue, false);
- task.fork();
- taskResults.addAll(task.join());
- }
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHelper.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHelper.java
deleted file mode 100644
index 900e37f46..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHelper.java
+++ /dev/null
@@ -1,61 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.orf.OrfConstants;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/** Helper methods for ORF tasks. */
-class OrfHelper {
-
- private static final String LETTER_URL_SELECTOR = "li.letter-item > a";
-
- private OrfHelper() {}
-
- static String parseTheme(final Element aItem) {
- final String theme = aItem.attr(HtmlConsts.ATTRIBUTE_TITLE);
- return parseTheme(theme);
- }
-
- static String parseTheme(final String theme) {
- final String result = theme.replaceAll("\\d{1,2}:\\d{2}$", "").trim();
- // Thema steht vor Doppelpunkt
- // Ausnahmen
- // - ZIB-Sendungen mit Uhrzeit
- // - DokEins-Sendungen
- // - Ungarisches Magazin
- final int index = result.indexOf(':');
- if (index > 0
- && !result.startsWith("ZIB")
- && !result.startsWith("DOKeins")
- && !result.contains("Ungarisches Magazin")) {
- return result.substring(0, index).trim();
- }
- return result;
- }
-
- /**
- * determines the links to the letter pages.
- *
- * @param aDocument the html document with letter links
- * @return list with urls
- */
- static List parseLetterLinks(final Document aDocument) {
- final List results = new ArrayList<>();
-
- final Elements links = aDocument.select(LETTER_URL_SELECTOR);
- links.forEach(
- element -> {
- if (element.hasAttr(HtmlConsts.ATTRIBUTE_HREF)) {
- final String subpage = element.attr(HtmlConsts.ATTRIBUTE_HREF);
- results.add(OrfConstants.URL_BASE + subpage);
- }
- });
-
- return results;
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryOverviewTask.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryOverviewTask.java
deleted file mode 100644
index 567992534..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryOverviewTask.java
+++ /dev/null
@@ -1,42 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
-import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
-import de.mediathekview.mserver.crawler.orf.OrfConstants;
-import org.jsoup.nodes.Document;
-import org.jsoup.select.Elements;
-
-import java.util.Queue;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentLinkedQueue;
-
-public class OrfHistoryOverviewTask implements Callable> {
-
- private static final String TOPIC_URL_SELECTOR = "section.has-4-in-row article > a";
-
- private final AbstractCrawler crawler;
-
- public OrfHistoryOverviewTask(
- final AbstractCrawler aCrawler) {
- crawler = aCrawler;
- }
-
- @Override
- public Queue call() throws Exception {
- final Queue results = new ConcurrentLinkedQueue<>();
-
- // URLs für Seiten parsen
- final Document document = crawler.requestBodyAsHtmlDocument(OrfConstants.URL_ARCHIVE);
-
- final Elements topics = document.select(TOPIC_URL_SELECTOR);
- topics.forEach(
- topicElement -> {
- final String url = topicElement.attr(HtmlConsts.ATTRIBUTE_HREF);
- final String topic = topicElement.attr(HtmlConsts.ATTRIBUTE_TITLE);
- results.add(new TopicUrlDTO(topic, url));
- });
-
- return results;
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryTopicTask.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryTopicTask.java
deleted file mode 100644
index 546844237..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfHistoryTopicTask.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
-import de.mediathekview.mserver.crawler.basic.AbstractDocumentTask;
-import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask;
-import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
-import org.jsoup.nodes.Document;
-
-import java.util.Queue;
-
-public class OrfHistoryTopicTask extends AbstractDocumentTask {
-
- private static final String SHOW_URL_SELECTOR = "article > a";
-
- public OrfHistoryTopicTask(
- final AbstractCrawler crawler,
- final Queue urlToCrawlDTOs
- ) {
- super(crawler, urlToCrawlDTOs);
- }
-
- @Override
- protected AbstractRecursiveConverterTask createNewOwnInstance(
- final Queue aElementsToProcess) {
- return new OrfHistoryTopicTask(crawler, aElementsToProcess);
- }
-
- @Override
- protected void processDocument(final TopicUrlDTO aUrlDto, final Document aDocument) {
- aDocument
- .select(SHOW_URL_SELECTOR)
- .forEach(
- showElement -> {
- final String url = showElement.attr(HtmlConsts.ATTRIBUTE_HREF);
- taskResults.add(new TopicUrlDTO(aUrlDto.getTopic(), url));
- });
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfLetterPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfLetterPageTask.java
deleted file mode 100644
index d4ea67344..000000000
--- a/src/main/java/de/mediathekview/mserver/crawler/orf/tasks/OrfLetterPageTask.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package de.mediathekview.mserver.crawler.orf.tasks;
-
-import de.mediathekview.mserver.base.HtmlConsts;
-import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
-import de.mediathekview.mserver.crawler.basic.TopicUrlDTO;
-import de.mediathekview.mserver.crawler.orf.OrfConstants;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.Logger;
-import org.jsoup.nodes.Document;
-import org.jsoup.select.Elements;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Queue;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentLinkedQueue;
-
-public class OrfLetterPageTask implements Callable> {
-
- private static final Logger LOG = LogManager.getLogger(OrfLetterPageTask.class);
-
- private static final String SHOW_URL_SELECTOR = "article > a";
- private final AbstractCrawler crawler;
-
- /** @param aCrawler The crawler which uses this task. */
- public OrfLetterPageTask(final AbstractCrawler aCrawler) {
- crawler = aCrawler;
- }
-
- @Override
- public Queue call() throws Exception {
- final Queue results = new ConcurrentLinkedQueue<>();
-
- // URLs für Seiten parsen
- final Document document = crawler.getConnection().requestBodyAsHtmlDocument(OrfConstants.URL_SHOW_LETTER_PAGE_A);
- final List overviewLinks = OrfHelper.parseLetterLinks(document);
-
- // Sendungen für die einzelnen Seiten pro Buchstabe ermitteln
- overviewLinks.forEach(
- url -> {
- try {
- final Document subpageDocument = crawler.requestBodyAsHtmlDocument(url);
- results.addAll(parseOverviewPage(subpageDocument));
- } catch (final IOException ex) {
- LOG.fatal("OrfLetterPageTask: error parsing url {}", url, ex);
- } catch (final NullPointerException e) {
- LOG.fatal(e);
- }
- });
-
- return results;
- }
-
- private Queue parseOverviewPage(final Document aDocument) {
- final Queue results = new ConcurrentLinkedQueue<>();
-
- final Elements links = aDocument.select(SHOW_URL_SELECTOR);
- links.forEach(
- element -> {
- if (element.hasAttr(HtmlConsts.ATTRIBUTE_HREF)) {
- final String link = element.attr(HtmlConsts.ATTRIBUTE_HREF);
- final String theme = OrfHelper.parseTheme(element);
-
- results.add(new TopicUrlDTO(theme, link));
- }
- });
-
- return results;
- }
-}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnBreadCrumsUrlDTO.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnBreadCrumsUrlDTO.java
new file mode 100644
index 000000000..90a8bd82e
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnBreadCrumsUrlDTO.java
@@ -0,0 +1,61 @@
+package de.mediathekview.mserver.crawler.orfon;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO;
+
+public class OrfOnBreadCrumsUrlDTO extends CrawlerUrlDTO {
+ private List breadCrums = new ArrayList<>();
+
+ public OrfOnBreadCrumsUrlDTO(String breadCrum, String aUrl) {
+ super(aUrl);
+ setBreadCrums(List.of(breadCrum));
+ }
+ public OrfOnBreadCrumsUrlDTO(List breadCrums, String aUrl) {
+ super(aUrl);
+ setBreadCrums(breadCrums);
+ }
+
+ public List getBreadCrums() {
+ return breadCrums;
+ }
+
+ public void setBreadCrums(List breadCrums) {
+ this.breadCrums = breadCrums;
+ }
+
+ public void setBreadCrumsPath(List breadCrums) {
+ List fullPath = new ArrayList<>();
+ fullPath.addAll(breadCrums);
+ fullPath.addAll(getBreadCrums());
+ setBreadCrums(fullPath);
+ }
+
+ public boolean addBreadCrum(String value) {
+ if (!breadCrums.contains(value)) {
+ breadCrums.add(value);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+
+ if (super.equals(obj)) {
+ return breadCrums.containsAll(((OrfOnBreadCrumsUrlDTO)obj).breadCrums);
+ }
+
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(super.hashCode(), this.breadCrums);
+ }
+}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnConstants.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnConstants.java
new file mode 100644
index 000000000..9404439b5
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnConstants.java
@@ -0,0 +1,23 @@
+package de.mediathekview.mserver.crawler.orfon;
+
+public final class OrfOnConstants {
+ //
+ public static final String HOST = "https://api-tvthek.orf.at/api/v4.3";
+ //
+ public static final String SCHEDULE = HOST + "/schedule";
+ //
+ public static final String AZ = HOST + "/profiles/lettergroup";
+ public static final int PAGE_SIZE = 200;
+ //
+ public static final String HISTORY = HOST + "/history";
+ //
+ public static final String EPISODE = HOST + "/episode";
+ //
+ public static final String AUTH = "Basic b3JmX29uX3Y0MzpqRlJzYk5QRmlQU3h1d25MYllEZkNMVU41WU5aMjhtdA==";
+ //
+ private OrfOnConstants() {}
+ //
+ public static String createMaxLimmitUrl(String plainUrl) {
+ return plainUrl + "?limit=" + OrfOnConstants.PAGE_SIZE;
+ }
+}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java
new file mode 100644
index 000000000..00894ef43
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java
@@ -0,0 +1,149 @@
+package de.mediathekview.mserver.crawler.orfon;
+
+import de.mediathekview.mlib.daten.Film;
+import de.mediathekview.mlib.daten.Sender;
+import de.mediathekview.mlib.messages.listener.MessageListener;
+import de.mediathekview.mserver.base.config.MServerConfigManager;
+import de.mediathekview.mserver.base.messages.ServerMessages;
+import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnAZTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnEpisodeTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnEpisodesTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnHistoryChildrenTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnHistoryTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnHistoryVideoItemTask;
+import de.mediathekview.mserver.crawler.orfon.task.OrfOnScheduleTask;
+import de.mediathekview.mserver.progress.listeners.SenderProgressListener;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Queue;
+import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.ForkJoinTask;
+import java.util.concurrent.RecursiveTask;
+
+public class OrfOnCrawler extends AbstractCrawler {
+ private static final Logger LOG = LogManager.getLogger(OrfOnCrawler.class);
+ private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+
+ public OrfOnCrawler(
+ final ForkJoinPool aForkJoinPool,
+ final Collection aMessageListeners,
+ final Collection aProgressListeners,
+ final MServerConfigManager aRootConfig) {
+ super(aForkJoinPool, aMessageListeners, aProgressListeners, aRootConfig);
+ }
+
+ @Override
+ public Sender getSender() {
+ return Sender.ORF;
+ }
+
+ @Override
+ protected RecursiveTask> createCrawlerTask() {
+ Set allVideos = new HashSet<>();
+ try {
+ // Sendungen Verpasst (letzten 14 Tage)
+ // TAG > Episode > Episode2Film
+ final Set epsiodesFromDay = processDayUrlsToCrawl();
+ allVideos.addAll(epsiodesFromDay);
+ printMessage(ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), allVideos.size());
+ getAndSetMaxCount(allVideos.size());
+
+ if (Boolean.TRUE.equals(crawlerConfig.getTopicsSearchEnabled())) {
+ //
+ // Sendungen a-z
+ // Buchstabe > Episoden > Episode2Film
+ final Set videosFromTopics = processAZUrlsToCrawl();
+ allVideos.addAll(videosFromTopics);
+ printMessage(
+ ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), allVideos.size());
+ getAndSetMaxCount(allVideos.size());
+ //
+ // History (top categories) > children > VideoItem > Episode > Episode2Film
+ final Set historyVideos = processHistoryUrlToCrawl();
+ allVideos.addAll(historyVideos);
+ printMessage(
+ ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), allVideos.size());
+ getAndSetMaxCount(allVideos.size());
+ }
+ //
+ return new OrfOnEpisodeTask(this, new ConcurrentLinkedQueue<>(allVideos));
+ } catch (final Exception ex) {
+ LOG.fatal("Exception in ORFON crawler.", ex);
+ Thread.currentThread().interrupt();
+ }
+
+ return null;
+ }
+
+ private Set processDayUrlsToCrawl() throws InterruptedException, ExecutionException {
+ final ForkJoinTask> dayTask = forkJoinPool.submit(new OrfOnScheduleTask(this, createDayUrlsToCrawl()));
+ final Set dayTaskFilms = dayTask.get();
+ return dayTaskFilms;
+ }
+
+ private Queue createDayUrlsToCrawl() {
+ final Queue dayUrlsToCrawl = new ConcurrentLinkedQueue<>();
+ final LocalDateTime now = LocalDateTime.now();
+ for (int i = 0; i <= crawlerConfig.getMaximumDaysForSendungVerpasstSection(); i++) {
+ final String day = now.minusDays(i).format(DAY_PAGE_DATE_FORMATTER);
+ final String url = OrfOnConstants.SCHEDULE + "/" + day;
+ dayUrlsToCrawl.offer(new OrfOnBreadCrumsUrlDTO(day,url));
+ }
+ return dayUrlsToCrawl;
+ }
+
+ private Set processAZUrlsToCrawl() throws InterruptedException, ExecutionException {
+ final ForkJoinTask> letterTask = forkJoinPool.submit(new OrfOnAZTask(this, createAZUrlsToCrawl()));
+ final Set letterTaskTopics = letterTask.get();
+ final ForkJoinTask> episodesFromTopicsTask = forkJoinPool.submit(new OrfOnEpisodesTask(this, new ConcurrentLinkedQueue<>(letterTaskTopics)));
+ final Set episodesFromTopics = episodesFromTopicsTask.get();
+ return episodesFromTopics;
+
+ }
+
+
+ private Queue createAZUrlsToCrawl() {
+ final Queue letterUrlsToCrawl = new ConcurrentLinkedQueue<>();
+ for (char letter = 'A'; letter <= 'Z'; letter++) {
+ final String url = OrfOnConstants.AZ + "/" + letter + "?limit="+OrfOnConstants.PAGE_SIZE;
+ letterUrlsToCrawl.offer(new OrfOnBreadCrumsUrlDTO(String.valueOf(letter),url));
+ }
+ // 0 gibt es auch
+ final String url = OrfOnConstants.AZ + "/0" + "?limit="+OrfOnConstants.PAGE_SIZE;
+ letterUrlsToCrawl.offer(new OrfOnBreadCrumsUrlDTO("0",url));
+ return letterUrlsToCrawl;
+ }
+
+ private Set processHistoryUrlToCrawl() throws InterruptedException, ExecutionException {
+ final ForkJoinTask> histroyTask = forkJoinPool.submit(new OrfOnHistoryTask(this, createHistoryUrlToCrawl()));
+ final Set historyChidrenUrls = histroyTask.get();
+ LOG.debug("Found {} entries in OrfOnHistoryTask ", historyChidrenUrls.size());
+ //
+ final ForkJoinTask> historyChildrenTask = forkJoinPool.submit(new OrfOnHistoryChildrenTask(this, new ConcurrentLinkedQueue<>(historyChidrenUrls)));
+ final Set historyItemUrls = historyChildrenTask.get();
+ LOG.debug("Found {} entries in OrfOnHistoryChildrenTask ", historyItemUrls.size());
+ //
+ final ForkJoinTask> historyItemTask = forkJoinPool.submit(new OrfOnHistoryVideoItemTask(this, new ConcurrentLinkedQueue<>(historyItemUrls)));
+ final Set historyEpisodesUrls = historyItemTask.get();
+ LOG.debug("Found {} entries in OrfOnHistoryVideoItemTask ", historyEpisodesUrls.size());
+ //
+ return historyEpisodesUrls;
+ }
+
+ private Queue createHistoryUrlToCrawl() {
+ final Queue history = new ConcurrentLinkedQueue<>();
+ history.offer(new OrfOnBreadCrumsUrlDTO("Base",OrfOnConstants.HISTORY));
+ return history;
+ }
+
+
+}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnVideoInfoDTO.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnVideoInfoDTO.java
new file mode 100644
index 000000000..442c857f0
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnVideoInfoDTO.java
@@ -0,0 +1,122 @@
+package de.mediathekview.mserver.crawler.orfon;
+
+import java.net.URL;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+
+import de.mediathekview.mlib.daten.FilmUrl;
+import de.mediathekview.mlib.daten.GeoLocations;
+import de.mediathekview.mlib.daten.Resolution;
+
+
+public class OrfOnVideoInfoDTO {
+ private Optional id;
+ private Optional channel;
+ private Optional title;
+ private Optional titleWithDate;
+ private Optional topic;
+ private Optional topicForArchive;
+ private Optional aired;
+ private Optional duration;
+ private Optional description;
+ private Optional website;
+ private Optional> georestriction;
+ private Optional subtitleSource;
+ private Optional