Skip to content

Commit

Permalink
Merge branch 'release/2.5.0'
Browse files Browse the repository at this point in the history
MServer ZDF Crawler verbessert.

closed #31
closed #32
closed #39
closed #48
closed #51
closed #52
closed #54
  • Loading branch information
alex1702 committed Feb 1, 2017
2 parents c24e672 + ddadbf2 commit f676d36
Show file tree
Hide file tree
Showing 45 changed files with 1,659 additions and 679 deletions.
8 changes: 6 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.apache.tools.ant.filters.ReplaceTokens
sourceCompatibility = 1.8
targetCompatibility = 1.8
group = 'de.mediathekview'
version = '2.4.0'
version = '2.5.0'

def jarName = 'MServer.jar'
def mainClass = 'mServer.Main'
Expand Down Expand Up @@ -40,7 +40,11 @@ dependencies {
compile 'org.apache.commons:commons-compress:1.12'
compile 'org.apache.commons:commons-lang3:3.5'
compile 'org.tukaani:xz:1.5'
compile 'com.fasterxml.jackson.core:jackson-core:2.7.0'
compile 'com.google.code.gson:gson:2.8.0'
compile 'com.sun.jersey:jersey-client:1.8'

compile group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.7'

}

ext {
Expand Down
4 changes: 3 additions & 1 deletion dist/live-streams.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
"X" : [ "3Sat", "Livestream", "3Sat Livestream", "", "", "", "", "", "http://zdf0910-lh.akamaihd.net/i/dach10_v1@392872/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARD", "Livestream", "ARD Livestream", "", "", "", "", "", "http://daserste_live-lh.akamaihd.net/i/daserste_de@91204/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARD", "Livestream", "ARD Alpha Livestream", "", "", "", "", "", "http://livestreams.br.de/i/bralpha_germany@119899/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARD", "Livestream", "ARD ONE Livestream", "", "", "", "", "", "http://wdr_einsfestival-lh.akamaihd.net/i/wdr_einsfestival@328300/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARD", "Livestream", "ARD Tagesschau Livestream", "", "", "", "", "", "http://tagesschau-lh.akamaihd.net/i/tagesschau_1@119231/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARTE.DE", "Livestream", "ARTE.DE Livestream", "", "", "", "", "", "http://artelive-lh.akamaihd.net/i/artelive_de@393591/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ARTE.FR", "Livestream", "ARTE.FR Livestream", "", "", "", "", "", "http://artelive-lh.akamaihd.net/i/artelive_fr@344805/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "BR", "Livestream", "BR Livestream", "", "", "", "", "", "http://livestreams.br.de/i/bfsnord_germany@119898/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "DW", "Livestream", "DW Livestream", "", "", "", "", "", "http://dwstream72-lh.akamaihd.net/i/dwstream72_live@123556/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "HR", "Livestream", "HR Livestream", "", "", "", "", "", "http://live1_hr-lh.akamaihd.net/i/hr_fernsehen@75910/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "KiKA", "Livestream", "KiKA Livestream", "", "", "", "", "", "http://kika_geo-lh.akamaihd.net/i/livetvkika_de@75114/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "MDR", "Livestream", "MDR Livestream", "", "", "", "", "", "http://mdr_th_hls-lh.akamaihd.net/i/livetvmdrthueringen_de@106903/master.m3u8", "http://www.ardmediathek.de/tv/live?kanal=Alle", "", "", "", "", "", "", "", "", "", "" ],
Expand All @@ -22,7 +25,6 @@
"X" : [ "ZDF", "Livestream", "ZDF Livestream", "", "", "", "", "", "http://zdf1314-lh.akamaihd.net/i/de14_v1@392878/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ZDF", "Livestream", "ZDF.info Livestream", "", "", "", "", "", "http://zdf1112-lh.akamaihd.net/i/de12_v1@392882/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ZDF", "Livestream", "ZDF.neo Livestream", "", "", "", "", "", "http://zdf1314-lh.akamaihd.net/i/de13_v1@392877/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ZDF", "Livestream", "ZDF.kultur Livestream", "", "", "", "", "", "http://zdf1112-lh.akamaihd.net/i/de11_v1@392881/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "ZDF", "Livestream", "ZDF.heute Livestream", "", "", "", "", "", "http://zdf0102-lh.akamaihd.net/i/none01_v1@392849/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ],
"X" : [ "PHOENIX", "Livestream", "PHOENIX Livestream", "", "", "", "", "", "http://zdf0910-lh.akamaihd.net/i/de09_v1@392871/master.m3u8", "http://www.zdf.de/ZDFmediathek/hauptnavigation/live", "", "", "", "", "", "", "", "", "", "" ]
}
2 changes: 1 addition & 1 deletion dist/mserver.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<system-user-agent>MServer</system-user-agent>

<!-- URLs von fertigen Filmlisten die importiert werden -->
<system-filmliste-import-url-1>http://verteiler1.mediathekview.de/f/Filmliste-diff.xz</system-filmliste-import-url-1>
<system-filmliste-import-url-1>http://verteiler1.mediathekview.de/Filmliste-diff.xz</system-filmliste-import-url-1>
<!-- <system-filmliste-import-url-2>http://m2.picn.de/f/Filmliste-akt.xz</system-filmliste-import-url-2> -->

<!-- eine alte Liste importieren, nur bei einem "langen" Suchlauf -->
Expand Down
127 changes: 51 additions & 76 deletions src/main/java/mServer/crawler/AddToFilmlist.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,33 @@
*/
package mServer.crawler;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;
import java.util.concurrent.atomic.AtomicInteger;

import mSearch.Config;
import mSearch.daten.DatenFilm;
import mSearch.daten.ListeFilme;
import mSearch.tool.FileSize;
import mSearch.tool.Log;



/**
*
* @author emil
*/
public class AddToFilmlist {

private static final int MIN_SIZE_ADD_OLD = 5;

final int COUNTER_MAX = 20;
int counter = 0;
int treffer = 0;
AtomicInteger treffer = new AtomicInteger(0);
ListeFilme vonListe;
ListeFilme listeEinsortieren;

Expand Down Expand Up @@ -52,102 +62,67 @@ public synchronized int addOldList() {
// in eine vorhandene Liste soll eine andere Filmliste einsortiert werden
// es werden nur Filme die noch nicht vorhanden sind, einsortiert
counter = 0;
treffer = 0;
int size = listeEinsortieren.size();

HashSet<String> hash = new HashSet<>(listeEinsortieren.size() + 1, 1);
treffer = new AtomicInteger(0);

// ==============================================
// nach "Thema-Titel" suchen
vonListe.stream().forEach((f) -> hash.add(f.getIndexAddOld()));
listeEinsortieren.removeIf((f) -> hash.contains(f.getIndexAddOld()));
hash.clear();

Collection<DatenFilm> filteredTopicTitle = new CopyOnWriteArrayList<>();
filteredTopicTitle.addAll(listeEinsortieren.parallelStream()
.filter(film -> (null == vonListe.istInFilmListe(film.arr[DatenFilm.FILM_SENDER], film.arr[DatenFilm.FILM_THEMA], film.arr[DatenFilm.FILM_TITEL])))
.collect(Collectors.toList()));

Log.sysLog("===== Liste einsortieren Hash =====");
Log.sysLog("Liste einsortieren, Anzahl: " + size);
Log.sysLog("Liste einsortieren, entfernt: " + (size - listeEinsortieren.size()));
Log.sysLog("Liste einsortieren, noch einsortieren: " + listeEinsortieren.size());
Log.sysLog("Liste einsortieren, Anzahl: " + listeEinsortieren.size());
Log.sysLog("Liste einsortieren, entfernt: " + (listeEinsortieren.size() - filteredTopicTitle.size()));
Log.sysLog("");
size = listeEinsortieren.size();

// ==============================================
// nach "URL" suchen
vonListe.stream().forEach((f) -> hash.add(DatenFilm.getUrl(f)));
listeEinsortieren.removeIf((f) -> hash.contains(DatenFilm.getUrl(f)));
hash.clear();
Collection<DatenFilm> filteredUrl = new CopyOnWriteArrayList<>();

Collection<String> filmUrls = vonListe.parallelStream()
.map(DatenFilm::getUrl)
.collect(Collectors.toList());

int size = filteredTopicTitle.size();

filteredUrl.addAll(filteredTopicTitle.parallelStream()
.filter(film -> !filmUrls.contains(DatenFilm.getUrl(film)))
.collect(Collectors.toList()));

Log.sysLog("===== Liste einsortieren URL =====");
Log.sysLog("Liste einsortieren, Anzahl: " + size);
Log.sysLog("Liste einsortieren, entfernt: " + (size - listeEinsortieren.size()));
Log.sysLog("Liste einsortieren, noch einsortieren: " + listeEinsortieren.size());
Log.sysLog("Liste einsortieren, entfernt: " + (size - filteredUrl.size()));
Log.sysLog("");
size = listeEinsortieren.size();

// Rest nehmen wir wenn noch online
for (int i = 0; i < COUNTER_MAX; ++i) {
new Thread(new AddOld(listeEinsortieren)).start();
}
int count = 0;
final int COUNT_MAX = 300; // 10 Minuten
stopOld = false;
while (!Config.getStop() && counter > 0) {
try {
System.out.println("s: " + 2 * (count++) + " Liste: " + listeEinsortieren.size() + " Treffer: " + treffer + " Threads: " + counter);
if (count > COUNT_MAX) {
// dann haben wir mehr als 10 Minuten und: Stop
Log.sysLog("===== Liste einsortieren: ABBRUCH =====");
Log.sysLog("COUNT_MAX erreicht [s]: " + COUNT_MAX * 2);
Log.sysLog("");
stopOld = true;
}
wait(2000);
} catch (InterruptedException ignored) {
// Prüfung auf online erst am Ende durchführen, damit jeder Film nur einmalig geprüft wird
Collection<DatenFilm> filteredOnline = new CopyOnWriteArrayList<>();
filteredOnline.addAll(filteredUrl.parallelStream().filter(f ->
!Config.getStop() && FileSize.laengeLong(f.arr[DatenFilm.FILM_URL]) > MIN_SIZE_ADD_OLD
)
.collect(Collectors.toList()));
filteredOnline.parallelStream().forEach(f ->{
if(!Config.getStop())
{
initFilm(f);
}
}
});
vonListe.addAll(filteredOnline);


Log.sysLog("===== Liste einsortieren: Noch online =====");
Log.sysLog("Liste einsortieren, Anzahl: " + size);
Log.sysLog("Liste einsortieren, entfernt: " + (size - treffer));
Log.sysLog("Liste einsortieren, Anzahl: " + filteredOnline.size());
Log.sysLog("Liste einsortieren, entfernt: " + (filteredOnline.size() - treffer.get()));
Log.sysLog("");
Log.sysLog("In Liste einsortiert: " + treffer);
Log.sysLog("In Liste einsortiert: " + treffer.get());
Log.sysLog("");
return treffer;
}
private boolean stopOld = false;

private class AddOld implements Runnable {

private DatenFilm film;
private final ListeFilme listeOld;
private final int MIN_SIZE_ADD_OLD = 5; //REST eh nur Trailer

public AddOld(ListeFilme listeOld) {
this.listeOld = listeOld;
++counter;
}

@Override
public void run() {
while (!stopOld && (film = popOld(listeOld)) != null) {
long size = FileSize.laengeLong(film.arr[DatenFilm.FILM_URL]);
if (size > MIN_SIZE_ADD_OLD) {
addOld(film);
}
}
--counter;
}
}

private synchronized DatenFilm popOld(ListeFilme listeOld) {
if (listeOld.size() > 0) {
return listeOld.remove(0);
}
return null;
return treffer.get();
}

private synchronized boolean addOld(DatenFilm film) {
++treffer;
private void initFilm(DatenFilm film) {
treffer.getAndIncrement();
film.init();
return vonListe.add(film);
}

}
4 changes: 4 additions & 0 deletions src/main/java/mServer/crawler/CrawlerTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ public static String getPathFilmlist_json_akt(boolean aktDate) {
public static void setGeo(DatenFilm film) {
switch (film.arr[DatenFilm.FILM_SENDER]) {
case Const.ARD:
if (film.arr[DatenFilm.FILM_URL].startsWith("http://pd-videos.daserste.de/de/")) {
film.arr[DatenFilm.FILM_GEO] = DatenFilm.GEO_DE;
}
break;
case Const.SWR:
case Const.MDR:
case Const.BR:
Expand Down
1 change: 1 addition & 0 deletions src/main/java/mServer/crawler/FilmeSuchen.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import mSearch.filmeSuchen.ListenerFilmeLaden;
import mSearch.filmeSuchen.ListenerFilmeLadenEvent;
import mSearch.tool.Log;
import mServer.crawler.CrawlerConfig;
import mServer.crawler.sender.*;

/**
Expand Down
1 change: 1 addition & 0 deletions src/main/java/mServer/crawler/GetUrl.java
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ private synchronized MSStringBuilder getUri(String sender, String addr, MSString
}

try {
Log.sysLog("Durchsuche: "+addr);
// conn = url.openConnection(Proxy.NO_PROXY);
conn = (HttpURLConnection) new URL(addr).openConnection();
conn.setRequestProperty("User-Agent", Config.getUserAgent());
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/mServer/crawler/sender/ConfigReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package mServer.crawler.sender;

/**
* Loads the config Details.
*/
public class ConfigReader
{

//TODO
}
8 changes: 8 additions & 0 deletions src/main/java/mServer/crawler/sender/CrawlerConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package mServer.crawler.sender;


public class CrawlerConfig
{

//TODO POJO which hols all needed codfigs.
}
28 changes: 28 additions & 0 deletions src/main/java/mServer/crawler/sender/Film.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package mServer.crawler.sender;

import mServer.crawler.sender.newsearch.Qualities;

import java.net.URI;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
* Represents a found film.
*/
public class Film
{
private Map<Qualities,URI> urls;

public Film()
{
urls = new ConcurrentHashMap<>();
}

public void addUrl(Qualities aQuality, URI aUrl)
{
if(aQuality != null && aUrl != null)
{
urls.put(aQuality,aUrl);
}
}
}
Loading

0 comments on commit f676d36

Please sign in to comment.