Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add date to title for regular topics #944

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
package de.mediathekview.mserver.crawler.dw;

import java.util.Arrays;
import java.util.List;

public class DwConstants {
private DwConstants() {}

public static final String URL_BASE = "https://api.dw.com/api";

public static final String URL_OVERVIEW = "/list/mediacenter/1?pageIndex=1";

public static final List<String> REGULAR_TOPICS = Arrays.asList("Euromaxx", "Shift", "Fokus Europa", "Projekt Zukunft", "Global Us");

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import de.mediathekview.mlib.daten.Sender;
import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import de.mediathekview.mserver.crawler.dw.DwConstants;
import de.mediathekview.mserver.crawler.dw.DwVideoDto;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -127,7 +128,10 @@ public Optional<Film> deserialize(
final JsonArray jsonObjectMainContentSources =
jsonObjectMainContent.get(ELEMENT_MAINCONTENT_SOURCES).getAsJsonArray();
getVideos(title.get(), jsonObjectMainContentSources).ifPresent(film::addAllUrls);
//
// Euromaxx always has the same title and we do not get the subtitle
if (DwConstants.REGULAR_TOPICS.contains(film.getThema())) {
film.setTitel(film.getTitel() + " " + film.getTime().format(DateTimeFormatter.ofPattern("yyyy-MM-dd")));
}
return Optional.of(film);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import java.util.concurrent.ConcurrentLinkedQueue;

public class DWOverviewTask extends DWTaskBase<CrawlerUrlDTO, CrawlerUrlDTO> {

private static final long serialVersionUID = 4050423702709695861L;
private static final Type OPTIONAL_OVERVIEW_DTO_TYPE_TOKEN =
new TypeToken<Optional<PagedElementListDTO<CrawlerUrlDTO>>>() {}.getType();
private final int subpage;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,20 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg
filmDetailDtoOptional = deserializeOptional(aTarget, OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN);
} catch (Exception e) {
LOG.error("error processing {} ", aDTO.getUrl(), e);
crawler.incrementAndGetErrorCount();
crawler.updateProgress();
}
if (filmDetailDtoOptional.isEmpty()) {
crawler.incrementAndGetErrorCount();
crawler.updateProgress();
return;
}
this.taskResults.add(filmDetailDtoOptional.get());
crawler.incrementAndGetActualCount();
if (!this.taskResults.add(filmDetailDtoOptional.get())) {
crawler.incrementAndGetErrorCount();
LOG.warn("Entry was rejected because existing {}", filmDetailDtoOptional.get());
} else {
crawler.incrementAndGetActualCount();
}
crawler.updateProgress();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public static Collection<Object[]> data() {
},
{
"/dw/dw_film_detail_five_video_urls.json",
"Energiezukunft? Schwimmende Windkraftanlagen",
"Energiezukunft? Schwimmende Windkraftanlagen 2022-11-12",
"Projekt Zukunft",
"https://p.dw.com/p/4JNwb",
Duration.ofSeconds(385),
Expand Down
Loading