Skip to content

Commit

Permalink
arte: use recent list instead of categories
Browse files Browse the repository at this point in the history
  • Loading branch information
pidoubleyou committed Feb 11, 2024
1 parent 8f562be commit f957639
Show file tree
Hide file tree
Showing 10 changed files with 1,245 additions and 36 deletions.
Original file line number Diff line number Diff line change
@@ -1,24 +1,19 @@
package mServer.crawler.sender.arte;

import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.*;
import de.mediathekview.mlib.tool.Log;

import java.lang.reflect.Type;
import java.util.Optional;

/**
* Deserialisiert Ergebnisse der Anfrage den Filmen einer Kategorie.
* Beispiel-URL:
* https://www.arte.tv/guide/api/api/zones/de/web/videos_subcategory_CMG/?page=1&limit=100
*/
public class ArteCategoryFilmListDeserializer implements JsonDeserializer<ArteCategoryFilmsDTO> {
public class ArteCategoryFilmListDeserializer extends ArteListBaseDeserializer implements JsonDeserializer<ArteCategoryFilmsDTO> {

private static final String JSON_ELEMENT_CONTENT = "content";
private static final String JSON_ELEMENT_DATA = "data";
private static final String JSON_ELEMENT_PROGRAMID = "programId";
private static final String JSON_ELEMENT_VALUE = "value";
private static final String JSON_ELEMENT_ZONES = "zones";

Expand All @@ -39,30 +34,13 @@ public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, Js
for (JsonElement jsonElement : zoneElement.getAsJsonArray()) {
if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) {
final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject();
if (contentObject.has(JSON_ELEMENT_DATA)) {
for(JsonElement dataElement : contentObject.get(JSON_ELEMENT_DATA).getAsJsonArray()) {
if (!dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).isJsonNull()) {
String programId = dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString();
if (programId != null) {
if (programId.startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
}
} else {
dto.addProgramId(programId);
}
}
}
}
}
extractProgramIdFromData(contentObject, dto);

Optional<String> url = parsePagination(contentObject);
url.ifPresent(dto::setNextPageUrl);
}
}

dto.setNextPage(false);

return dto;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class ArteCategoryFilmsDTO {
private final Set<String> programIds = new HashSet<>();
private final Set<String> collectionIds = new HashSet<>();

private boolean hasNextPage;
private String nextPageUrl;

public void addProgramId(String aProgramId) {
programIds.add(aProgramId);
Expand All @@ -25,10 +25,13 @@ public Set<String> getCollectionIds() {
}

public boolean hasNextPage() {
return hasNextPage;
return nextPageUrl != null && !nextPageUrl.isEmpty();
}

public void setNextPage(boolean aNextPage) {
hasNextPage = aNextPage;
public String getNextPageUrl() {
return nextPageUrl;
}
public void setNextPageUrl(String url) {
nextPageUrl = url;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package mServer.crawler.sender.arte;

import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import de.mediathekview.mlib.tool.Log;
import mServer.crawler.sender.base.JsonUtils;
import mServer.crawler.sender.base.UrlUtils;

import java.util.Optional;

public abstract class ArteListBaseDeserializer {

private static final String JSON_ELEMENT_DATA = "data";
private static final String JSON_ELEMENT_PROGRAMID = "programId";
private static final String JSON_ELEMENT_PAGINATION = "pagination";
private static final String JSON_ELEMENT_LINKS = "links";
private static final String JSON_ELEMENT_NEXT = "next";

protected Optional<String> parsePagination(JsonObject jsonObject) {
if (jsonObject.has(JSON_ELEMENT_PAGINATION) && !jsonObject.get(JSON_ELEMENT_PAGINATION).isJsonNull()) {
final JsonObject pagionationObject = jsonObject.get(JSON_ELEMENT_PAGINATION).getAsJsonObject();
if(pagionationObject.has(JSON_ELEMENT_LINKS)) {
final JsonObject linksObject = pagionationObject.get(JSON_ELEMENT_LINKS).getAsJsonObject();
final Optional<String> nextUrl = JsonUtils.getAttributeAsString(linksObject, JSON_ELEMENT_NEXT);
if (nextUrl.isPresent()) {
return Optional.of(UrlUtils.addDomainIfMissing(nextUrl.get().replace("/api/emac/", "/api/rproxy/emac/"), "https://www.arte.tv"));
}
}
}
return Optional.empty();
}


protected void extractProgramIdFromData(JsonObject jsonObectWithData, ArteCategoryFilmsDTO dto) {
if (jsonObectWithData.has(JSON_ELEMENT_DATA)) {
for(JsonElement dataElement : jsonObectWithData.get(JSON_ELEMENT_DATA).getAsJsonArray()) {
if (!dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).isJsonNull()) {
Optional<String> programId = JsonUtils.getAttributeAsString(dataElement.getAsJsonObject(), JSON_ELEMENT_PROGRAMID);
if (programId.isPresent()) {
if (programId.get().startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.get().replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
}
} else {
dto.addProgramId(programId.get());
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package mServer.crawler.sender.arte;

import com.google.gson.*;

import java.lang.reflect.Type;
import java.util.Optional;

public class ArteSubPageDeserializer extends ArteListBaseDeserializer implements JsonDeserializer<ArteCategoryFilmsDTO> {
private static final String JSON_ELEMENT_VALUE = "value";

@Override
public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException {
final ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO();

JsonElement rootElement = aJsonElement;
if (aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) {
rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE);
}

JsonObject rootObject = rootElement.getAsJsonObject();
extractProgramIdFromData(rootObject, dto);

Optional<String> url = parsePagination(rootObject);
url.ifPresent(dto::setNextPageUrl);

return dto;
}
}
40 changes: 39 additions & 1 deletion src/main/java/mServer/crawler/sender/arte/MediathekArte.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ public void addToList() {
meldungThreadUndFertig();
} else {
if (CrawlerTool.loadLongMax()) {
addCategories();
addRecentList();
//addCategories();
meldungAddMax(listeThemen.size());

for (int t = 0; t < getMaxThreadLaufen(); ++t) {
Expand Down Expand Up @@ -151,6 +152,13 @@ private void addCategories() {
});
}

private void addRecentList() {
senderLanguages.forEach((sender, langCode) -> {
String u = String.format("https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/MOST_RECENT/", langCode);
listeThemen.add(new String[]{sender, langCode, "recent", u});
});
}

private void addTage() {
senderLanguages.forEach((sender, langCode) -> {
// http://www.arte.tv/guide/de/plus7/videos?day=-2&page=1&isLoading=true&sort=newest&country=DE
Expand Down Expand Up @@ -203,6 +211,7 @@ private void addFilmeForTag(String sender, String aUrl) {
*/
class CategoryLoader extends Thread {

private int subPage = 0;

@Override
public void run() {
Expand All @@ -229,6 +238,8 @@ private void loadCategory(String sender, String langCode, String aCategory, Stri
Gson gsonCollectionChild = new GsonBuilder()
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionChildDeserializer())
.create();
Gson gsonNextPage =new GsonBuilder()
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteSubPageDeserializer()).create();

ArteCategoryFilmsDTO dto = loadSubCategoryPage(gson, sender, aUrl);
if (dto != null) {
Expand All @@ -238,9 +249,36 @@ private void loadCategory(String sender, String langCode, String aCategory, Stri
ListeFilme loadedFilme = loadPrograms(sender, langCode, dto);
loadedFilme.forEach(film -> addFilm(film));
Log.sysLog(String.format("%s: category %s: %d Filme", sender, aCategory, loadedFilme.size()));
if (dto.hasNextPage()) {
loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage);
}
}
}

private void loadNextPage(String sender, String langCode, String aCategory, String url, Gson gsonCollectionParent, Gson gsonCollectionChild, Gson gsonNextPage) {
subPage++;
ArteCategoryFilmsDTO dto = loadSubCategoryPage(gsonNextPage, sender, url);
if (dto != null) {
loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto);
Log.sysLog(String.format("%s: category %s: %d programs, %d collections", sender, aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size()));
// alle programIds verarbeiten
ListeFilme loadedFilme = loadPrograms(sender, langCode, dto);
loadedFilme.forEach(film -> addFilm(film));
Log.sysLog(String.format("%s: category %s - page %d: %d Filme", sender, aCategory, subPage, loadedFilme.size()));
if (dto.hasNextPage() && shouldLoadNextPage(sender)) {
loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage);
}
}
}

private boolean shouldLoadNextPage(String sender) {
if (sender == Const.ARTE_DE) {
return true;
}

return subPage < 5;
}

private void loadCollections(String sender, String langCode, Gson gsonParent, Gson gsonChild, ArteCategoryFilmsDTO dto) {
dto.getCollectionIds().forEach(collectionId -> {
final String url = String.format(COLLECTION_URL, langCode, collectionId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,21 @@ public class ArteCategoryFilmListDeserializerTest {
private final String jsonFile;
private final String[] expectedProgramIds;
private final boolean expectedHasNextPage;
private final String expectedNextPageUrl;
private final ArteCategoryFilmListDeserializer target;
public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage) {
public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) {
jsonFile = aJsonFile;
expectedProgramIds = aProgramIds;
expectedHasNextPage = aNextPage;
expectedNextPageUrl = nextPageUrl;
this.target = new ArteCategoryFilmListDeserializer();
}

@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][]{
{"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false},
{"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false, null},
{"/arte/arte_video_list1.json", new String[]{"033559-000-A","078154-000-A", "101398-000-A", "109332-000-A", "111063-000-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=2&pageId=MOST_RECENT&zoneIndexInPage=0"}
});
}

Expand All @@ -47,5 +50,6 @@ public void testDeserialize() {
assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage));
Set<String> actualProgramIds = actual.getProgramIds();
assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds));
assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package mServer.crawler.sender.arte;

import com.google.gson.JsonObject;
import mServer.test.JsonFileReader;
import org.hamcrest.Matchers;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.util.Arrays;
import java.util.Collection;
import java.util.Set;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.junit.Assert.assertThat;

@RunWith(Parameterized.class)
public class ArteSubPageDeserializerTest {

private final String jsonFile;
private final String[] expectedProgramIds;
private final boolean expectedHasNextPage;
private final String expectedNextPageUrl;
private final ArteSubPageDeserializer target;
public ArteSubPageDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) {
jsonFile = aJsonFile;
expectedProgramIds = aProgramIds;
expectedHasNextPage = aNextPage;
expectedNextPageUrl = nextPageUrl;
this.target = new ArteSubPageDeserializer();
}

@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][]{
{"/arte/arte_video_list2.json", new String[]{"099708-000-A", "098846-000-A", "111648-001-A", "112235-000-A", "113043-139-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=3&pageId=MOST_VIEWED&zoneIndexInPage=0"},
{"/arte/arte_video_list_last.json", new String[]{"102805-000-A","104017-000-A", "106273-006-A"}, false, null}
});
}

@Test
public void testDeserialize() {

JsonObject jsonObject = JsonFileReader.readJson(jsonFile);

ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null);

assertThat(actual, notNullValue());
assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage));
Set<String> actualProgramIds = actual.getProgramIds();
assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds));
assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl));
}
}
Loading

0 comments on commit f957639

Please sign in to comment.