From 699bf4a75469ecc986c140ca68f89b247c2ce9dc Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Thu, 15 Feb 2024 21:37:57 +0100 Subject: [PATCH] ignore zdf entries --- .../ard/json/ArdTopicsLetterDeserializer.java | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java index 161f66cbf..12870c8cc 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java @@ -10,6 +10,7 @@ import mServer.crawler.sender.base.JsonUtils; import java.lang.reflect.Type; +import java.util.Arrays; import java.util.HashSet; import java.util.Optional; import java.util.Set; @@ -23,9 +24,13 @@ public class ArdTopicsLetterDeserializer implements JsonDeserializer parseTeaser(final JsonObject teaserObject) { id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); } - id.ifPresent( - nonNullId -> - results.add( - new CrawlerUrlDTO( - String.format( - ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE)))); + if (isRelevant(teaserObject)) { + id.ifPresent( + nonNullId -> + results.add( + new CrawlerUrlDTO( + String.format( + ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE)))); + } return results; } + + private boolean isRelevant(final JsonObject teaserObject) { + if (teaserObject.has(ELEMENT_PUBLICATION_SERVICE)) { + final JsonObject publicationService = + teaserObject.get(ELEMENT_PUBLICATION_SERVICE).getAsJsonObject(); + final Optional attributeAsString = + JsonUtils.getAttributeAsString(publicationService, ATTRIBUTE_NAME); + if (attributeAsString.isPresent()) { + + return !Arrays.stream(IGNORED_SENDER) + .anyMatch(sender -> sender.equalsIgnoreCase(attributeAsString.get())); + } + } + + return true; + } }