Skip to content

Commit

Permalink
Merge pull request #1013 from mediathekview/feature/1005
Browse files Browse the repository at this point in the history
ard: new day page url
  • Loading branch information
pidoubleyou authored Oct 6, 2024
2 parents d85c395 + 40d77ba commit 564a1de
Show file tree
Hide file tree
Showing 6 changed files with 1,251 additions and 561 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ public class ArdConstants {
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false";
public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s";
public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d";
public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d";
public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel";

public static final int DAY_PAGE_SIZE = 100;
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
public static final int TOPIC_PAGE_SIZE = 50;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ private Queue<CrawlerUrlDTO> createDayUrlsToCrawl() {

for (final String client : ArdConstants.CLIENTS) {
final String url =
String.format(ArdConstants.DAY_PAGE_URL, client, day, day, ArdConstants.DAY_PAGE_SIZE);
String.format(ArdConstants.DAY_PAGE_URL, day, client);
dayUrlsToCrawl.offer(new CrawlerUrlDTO(url));
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,33 +1,63 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.*;
import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;

import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;

public class ArdDayPageDeserializer extends ArdTeasersDeserializer
implements JsonDeserializer<Set<ArdFilmInfoDto>> {
public class ArdDayPageDeserializer implements JsonDeserializer<Set<ArdFilmInfoDto>> {

private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_CHANNELS = "channels";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";
private static final String ELEMENT_TIMESLOTS = "timeSlots";
private static final String ATTRIBUTE_URL_ID = "urlId";

@Override
public Set<ArdFilmInfoDto> deserialize(
final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) {
final Set<ArdFilmInfoDto> results = new HashSet<>();

if (!jsonElement.isJsonArray()) {
return results;
final JsonObject jsonObject = jsonElement.getAsJsonObject();
if (jsonObject.has(ELEMENT_CHANNELS)) {
final JsonArray channels = jsonObject.get(ELEMENT_CHANNELS).getAsJsonArray();
results.addAll(parseChannels(channels));
}

final JsonObject firstElement = jsonElement.getAsJsonArray().get(0).getAsJsonObject();
return results;
}

if (firstElement.has(ELEMENT_TEASERS)) {
final JsonArray teasers = firstElement.get(ELEMENT_TEASERS).getAsJsonArray();
results.addAll(parseTeasers(teasers));
private Set<ArdFilmInfoDto> parseChannels(JsonArray channels) {
Set<ArdFilmInfoDto> entries = new HashSet<>();
for (JsonElement channel : channels) {
final JsonArray timeSlots = channel.getAsJsonObject().get(ELEMENT_TIMESLOTS).getAsJsonArray();
for (JsonElement timeSlot : timeSlots) {
for (JsonElement entry : timeSlot.getAsJsonArray()) {
final JsonObject entryObject = entry.getAsJsonObject();
final Optional<String> id = toId(entryObject);
id.ifPresent(s -> entries.add(createFilmInfo(s, 1)));
}
}
}
return entries;
}

return results;
private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) {
final String url = String.format(ArdConstants.ITEM_URL, id);
return new ArdFilmInfoDto(id, url, numberOfClips);
}

private Optional<String> toId(final JsonObject teaserObject) {
if (JsonUtils.checkTreePath(teaserObject, null, ELEMENT_LINKS, ELEMENT_TARGET)) {
final JsonObject targetObject =
teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject();
return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_URL_ID);
}
return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_URL_ID);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package de.mediathekview.mserver.crawler.ard.json;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;
import de.mediathekview.mserver.testhelper.JsonFileReader;
Expand All @@ -16,41 +17,63 @@ public class ArdDayPageDeserializerTest {

@Test
public void testDeserialize() {
final JsonArray jsonElement = JsonFileReader.readJsonArray("/ard/ard_day_page11.json");
final JsonElement jsonElement = JsonFileReader.readJson("/ard/ard_day_page.json");

final ArdFilmInfoDto[] expected =
new ArdFilmInfoDto[] {
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5Nzc",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5Nzc"),
"Y3JpZDovL3JiYl8xY2RjODJjMy01ZTIyLTQ0MDctODEwZi0yMWMwYTBhY2NjMmNfcHVibGljYXRpb24",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3JiYl8xY2RjODJjMy01ZTIyLTQ0MDctODEwZi0yMWMwYTBhY2NjMmNfcHVibGljYXRpb24"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5Nzg",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5Nzg"),
"Y3JpZDovL3JiYl9hN2RkMDNjMC0yMmU5LTRmYzEtYmNiOC1kYTg0Y2RjOWMxMWZfcHVibGljYXRpb24",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3JiYl9hN2RkMDNjMC0yMmU5LTRmYzEtYmNiOC1kYTg0Y2RjOWMxMWZfcHVibGljYXRpb24"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5ODI",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5ODI"),
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzMtMzg1Mjgw",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzMtMzg1Mjgw"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE4MjA",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE4MjA"),
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzQtMzg1Mjgx",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MDQ4MzQtMzg1Mjgx"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDEyMDM",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDEyMDM"),
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNC0wMC1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNC0wMC1NRVNa"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5OTI",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5OTI"),
7),
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNS0zMC1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2Zlcm5zZWhmaWxtZSBpbSBlcnN0ZW4vMjAyNC0wOS0yOF8xNS0zMC1NRVNa"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtMmIwZDg4NDMtMzQ0YS00OTZmLTlhNDYtNGY3ODk5MjE2MmFi",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtMmIwZDg4NDMtMzQ0YS00OTZmLTlhNDYtNGY3ODk5MjE2MmFi"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5NzI",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5NzI"),
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2RpZS1zdGlsbGVuLW1vZXJkZXIvMjAyNC0wOS0yOF8yMC0xNS1NRVNa",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2RpZS1zdGlsbGVuLW1vZXJkZXIvMjAyNC0wOS0yOF8yMC0xNS1NRVNa"),
1),
new ArdFilmInfoDto(
"Y3JpZDovL2hyLW9ubGluZS8xMDE5NzE",
String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2hyLW9ubGluZS8xMDE5NzE"),
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2hhcnR3aWctc2VlbGVyLzIwMjQtMDktMjhfMjEtNDUtTUVTWg",
String.format(
ArdConstants.ITEM_URL,
"Y3JpZDovL2Rhc2Vyc3RlLmRlL2hhcnR3aWctc2VlbGVyLzIwMjQtMDktMjhfMjEtNDUtTUVTWg"),
1)
};

Expand Down
Loading

0 comments on commit 564a1de

Please sign in to comment.