Skip to content

Commit

Permalink
ard: new day page url
Browse files Browse the repository at this point in the history
  • Loading branch information
pidoubleyou committed Oct 6, 2024
1 parent 7795f3e commit 9be54fe
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 15 deletions.
3 changes: 1 addition & 2 deletions src/main/java/mServer/crawler/sender/ard/ArdConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ public class ArdConstants {
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false";
public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s";
public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d";
public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d";
public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel";

public static final int DAY_PAGE_SIZE = 100;
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
public static final int TOPIC_PAGE_SIZE = 50;

Expand Down
13 changes: 10 additions & 3 deletions src/main/java/mServer/crawler/sender/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
public class ArdCrawler extends MediathekCrawler {

public static final String SENDERNAME = Const.ARD;
private static final int MAX_DAYS_PAST = 2;
private static final int MAX_DAYS_PAST_AVAILABLE = 6;
private static final int MAX_DAYS_PAST = 7;
private static final int MAX_DAYS_FUTURE = 7;
private static final int MAX_DAYS_PAST_AVAILABLE = 7;
private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER
= DateTimeFormatter.ofPattern("yyyy-MM-dd");

Expand Down Expand Up @@ -71,6 +72,12 @@ private ConcurrentLinkedQueue<CrawlerUrlDTO> createDayUrlsToCrawl() {
addDayUrls(dayUrlsToCrawl, now.minusDays(i));
}

if (CrawlerTool.loadLongMax()) {
for (int i = 0; i < MAX_DAYS_FUTURE; i++) {
addDayUrls(dayUrlsToCrawl, now.plusDays(i));
}
}

addSpecialDays(dayUrlsToCrawl);

return dayUrlsToCrawl;
Expand All @@ -79,7 +86,7 @@ private ConcurrentLinkedQueue<CrawlerUrlDTO> createDayUrlsToCrawl() {
private void addDayUrls(ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl, LocalDateTime day) {
final String formattedDay = day.format(DAY_PAGE_DATE_FORMATTER);
for (String client : ArdConstants.CLIENTS) {
final String url = String.format(ArdConstants.DAY_PAGE_URL, client, formattedDay, formattedDay, ArdConstants.DAY_PAGE_SIZE);
final String url = String.format(ArdConstants.DAY_PAGE_URL, formattedDay, client);
dayUrlsToCrawl.offer(new CrawlerUrlDTO(url));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,63 @@

import com.google.gson.*;

import mServer.crawler.sender.ard.ArdConstants;
import mServer.crawler.sender.ard.ArdFilmInfoDto;
import mServer.crawler.sender.base.JsonUtils;

import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;

public class ArdDayPageDeserializer extends ArdTeasersDeserializer
implements JsonDeserializer<Set<ArdFilmInfoDto>> {
public class ArdDayPageDeserializer implements JsonDeserializer<Set<ArdFilmInfoDto>> {

private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_CHANNELS = "channels";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";
private static final String ELEMENT_TIMESLOTS = "timeSlots";
private static final String ATTRIBUTE_URL_ID = "urlId";

@Override
public Set<ArdFilmInfoDto> deserialize(
final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) {
final Set<ArdFilmInfoDto> results = new HashSet<>();

if (!jsonElement.isJsonArray()) {
return results;
final JsonObject jsonObject = jsonElement.getAsJsonObject();
if (jsonObject.has(ELEMENT_CHANNELS)) {
final JsonArray channels = jsonObject.get(ELEMENT_CHANNELS).getAsJsonArray();
results.addAll(parseChannels(channels));
}

final JsonObject firstElement = jsonElement.getAsJsonArray().get(0).getAsJsonObject();
return results;
}

if (firstElement.has(ELEMENT_TEASERS)) {
final JsonArray teasers = firstElement.get(ELEMENT_TEASERS).getAsJsonArray();
results.addAll(parseTeasers(teasers));
private Set<ArdFilmInfoDto> parseChannels(JsonArray channels) {
Set<ArdFilmInfoDto> entries = new HashSet<>();
for (JsonElement channel : channels) {
final JsonArray timeSlots = channel.getAsJsonObject().get(ELEMENT_TIMESLOTS).getAsJsonArray();
for (JsonElement timeSlot : timeSlots) {
for (JsonElement entry : timeSlot.getAsJsonArray()) {
final JsonObject entryObject = entry.getAsJsonObject();
final Optional<String> id = toId(entryObject);
id.ifPresent(s -> entries.add(createFilmInfo(s, 1)));
}
}
}
return entries;
}

return results;
private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
final String url = String.format(ArdConstants.ITEM_URL, id);
return new ArdFilmInfoDto(id, url, numberOfClips);
}

private Optional<String> toId(final JsonObject teaserObject) {
if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) {
final JsonObject targetObject =
teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject();
return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_URL_ID);
}
return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_URL_ID);
}
}

0 comments on commit 9be54fe

Please sign in to comment.