Skip to content

Commit e28231d

Browse files
committed
new arte crwaler
1 parent 48cab27 commit e28231d

38 files changed

+1212
-1704
lines changed

src/main/java/de/mediathekview/mlib/Const.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ public class Const {
4444
public static final String ARD = "ARD";
4545
public static final String ARD_ALPHA = "ARD-alpha";
4646
public static final String ARTE_DE = "ARTE.DE";
47+
public static final String ARTE_EN = "ARTE.EN";
48+
public static final String ARTE_ES = "ARTE.ES";
4749
public static final String ARTE_FR = "ARTE.FR";
50+
public static final String ARTE_IT = "ARTE.IT";
51+
public static final String ARTE_PL = "ARTE.PL";
4852
public static final String BR = "BR";
4953
public static final String DW = "DW";
5054
public static final String HR = "HR";

src/main/java/mServer/crawler/FilmeSuchen.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import de.mediathekview.mlib.tool.Log;
2828
import mServer.crawler.sender.*;
2929
import mServer.crawler.sender.ard.ArdCrawler;
30-
import mServer.crawler.sender.arte.MediathekArte;
30+
import mServer.crawler.sender.arte.*;
3131
import mServer.crawler.sender.dreisat.DreiSatCrawler;
3232
import mServer.crawler.sender.dw.DwCrawler;
3333
import mServer.crawler.sender.kika.KikaApiCrawler;
@@ -88,7 +88,12 @@ public FilmeSuchen() {
8888
mediathekListe.add(new ZdfCrawler(this, 0));
8989
}
9090
if (crawlerList.contains("ARTE")) {
91-
mediathekListe.add(new MediathekArte(this, 0));
91+
mediathekListe.add(new ArteCrawler(this, 0));
92+
mediathekListe.add(new ArteCrawler_FR(this, 0));
93+
mediathekListe.add(new ArteCrawler_EN(this, 1));
94+
mediathekListe.add(new ArteCrawler_ES(this, 1));
95+
mediathekListe.add(new ArteCrawler_PL(this, 1));
96+
mediathekListe.add(new ArteCrawler_IT(this, 1));
9297
}
9398
if (crawlerList.contains("DW")) {
9499
mediathekListe.add(new DwCrawler(this, 0));
@@ -97,7 +102,7 @@ public FilmeSuchen() {
97102
mediathekListe.add(new KikaApiCrawler(this, 0));
98103
}
99104
if (crawlerList.contains("3SAT")) {
100-
mediathekListe.add(new DreiSatCrawler(this, 1));
105+
mediathekListe.add(new DreiSatCrawler(this, 0));
101106
}
102107
if (crawlerList.contains("SR")) {
103108
mediathekListe.add(new SrCrawler(this, 1));
@@ -112,7 +117,7 @@ public FilmeSuchen() {
112117
mediathekListe.add(new OrfOnCrawler(this, 1));
113118
}
114119
if (crawlerList.contains("PHONIX")) {
115-
mediathekListe.add(new PhoenixCrawler(this, 1));
120+
mediathekListe.add(new PhoenixCrawler(this, 0));
116121
}
117122

118123
}

src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java

Lines changed: 0 additions & 46 deletions
This file was deleted.

src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java

Lines changed: 0 additions & 37 deletions
This file was deleted.

src/main/java/mServer/crawler/sender/arte/ArteCollectionChildDeserializer.java

Lines changed: 0 additions & 42 deletions
This file was deleted.

src/main/java/mServer/crawler/sender/arte/ArteCollectionParentDeserializer.java

Lines changed: 0 additions & 41 deletions
This file was deleted.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package mServer.crawler.sender.arte;
2+
3+
public class ArteConstants {
4+
public static final String VIDEOS_URL ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=-creationDate&language=%s";
5+
public static final String VIDEOS_URL_ALT ="https://api.arte.tv/api/opa/v3/videos?limit=100&page=%s&sort=creationDate&language=%s";
6+
public static final String VIDEO_URL ="https://www.arte.tv/hbbtvv2/services/web/index.php/OPA/v3/streams/%s/%s/%s"; //PROGRAMID/KIND/LANG
7+
public static final String API_TOKEN = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA";
8+
private ArteConstants() {}
9+
10+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package mServer.crawler.sender.arte;
2+
3+
import com.google.gson.JsonElement;
4+
import de.mediathekview.mlib.Config;
5+
import de.mediathekview.mlib.Const;
6+
import de.mediathekview.mlib.daten.DatenFilm;
7+
import de.mediathekview.mlib.tool.Log;
8+
import mServer.crawler.CrawlerTool;
9+
import mServer.crawler.FilmeSuchen;
10+
import mServer.crawler.sender.MediathekCrawler;
11+
import mServer.crawler.sender.arte.json.ArteVideoInfoDto;
12+
import mServer.crawler.sender.arte.tasks.ArteDtoVideo2FilmTask;
13+
import mServer.crawler.sender.arte.tasks.ArteVideoInfoTask;
14+
import mServer.crawler.sender.arte.tasks.ArteVideoLinkTask;
15+
import mServer.crawler.sender.base.JsonUtils;
16+
import mServer.crawler.sender.base.JsoupConnection;
17+
import mServer.crawler.sender.base.TopicUrlDTO;
18+
import org.apache.logging.log4j.LogManager;
19+
import org.apache.logging.log4j.Logger;
20+
21+
import java.io.IOException;
22+
import java.util.Map;
23+
import java.util.Optional;
24+
import java.util.Set;
25+
import java.util.concurrent.ConcurrentLinkedQueue;
26+
import java.util.concurrent.RecursiveTask;
27+
28+
public class ArteCrawler extends MediathekCrawler {
29+
private static final Logger LOG = LogManager.getLogger(ArteCrawler.class);
30+
private final JsoupConnection jsoupConnection;
31+
32+
public ArteCrawler(FilmeSuchen ssearch, int startPrio) {
33+
this(ssearch, startPrio, Const.ARTE_DE);
34+
}
35+
36+
protected ArteCrawler(FilmeSuchen ssearch, int startPrio, String sender) {
37+
super(ssearch, sender,/* threads */ 1, /* urlWarten */ 200, startPrio);
38+
this.jsoupConnection = new JsoupConnection(60, 4);
39+
}
40+
41+
protected ArteLanguage getLanguage() {
42+
return ArteLanguage.DE;
43+
}
44+
45+
@Override
46+
protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() {
47+
48+
try {
49+
final ArteVideoInfoTask aArteRestVideoInfoTask;
50+
// DO NOT overload - maximumUrlsPerTask used to reduce threads to 4
51+
aArteRestVideoInfoTask = new ArteVideoInfoTask(this, createVideosQueue());
52+
final ConcurrentLinkedQueue<ArteVideoInfoDto> videos = new ConcurrentLinkedQueue<>();
53+
videos.addAll(aArteRestVideoInfoTask.fork().join());
54+
//
55+
Log.sysLog(getSendername() + " Anzahl video info: " + videos.size());
56+
//
57+
final ConcurrentLinkedQueue<ArteVideoInfoDto> videosWithLink = new ConcurrentLinkedQueue<>();
58+
final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask(this, videos);
59+
videosWithLink.addAll(aArteRestVideosTask.fork().join());
60+
//
61+
Log.sysLog(getSendername() + " Anzahl video links: " + videosWithLink.size());
62+
//
63+
return new ArteDtoVideo2FilmTask(this, new ConcurrentLinkedQueue<>(videosWithLink), getSendername());
64+
65+
} catch (final Exception ex) {
66+
LOG.fatal("Exception in {} crawler.", getSendername(), ex);
67+
}
68+
return null;
69+
}
70+
71+
private ConcurrentLinkedQueue<TopicUrlDTO> createVideosQueue() {
72+
int maxPages = getMaxPagesForOverview();
73+
final ConcurrentLinkedQueue<TopicUrlDTO> root = new ConcurrentLinkedQueue<>();
74+
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase());
75+
root.add(new TopicUrlDTO("all videos1", rootUrl));
76+
if (maxPages >= 100) {
77+
String rootUrl2 = String.format(ArteConstants.VIDEOS_URL_ALT, 1, getLanguage().toString().toLowerCase());
78+
root.add(new TopicUrlDTO("all videos2", rootUrl2));
79+
}
80+
return root;
81+
}
82+
83+
private int getMaxPagesForOverview() {
84+
final int naturalLimit = Math.min(100, getMaximumSubpages());
85+
String rootUrl = String.format(ArteConstants.VIDEOS_URL, 1, getLanguage().toString().toLowerCase());
86+
String[] path = {"meta", "videos", "pages"};
87+
try {
88+
final Map<String, String> headers = Map.of(
89+
"Accept", "application/json",
90+
"Content-Type", "application/json",
91+
"Authorization", ArteConstants.API_TOKEN
92+
);
93+
JsonElement element = jsoupConnection.requestBodyAsJsonElement(rootUrl, headers);
94+
Optional<Integer> pages = JsonUtils.getElementValueAsInteger(element, path);
95+
if (pages.isPresent()) {
96+
return Math.min(pages.get(), naturalLimit);
97+
}
98+
} catch (IOException e) {
99+
LOG.error("getMaxPagesForOverview", e);
100+
}
101+
return naturalLimit;
102+
}
103+
104+
private int getMaximumSubpages() {
105+
if (CrawlerTool.loadLongMax()) {
106+
return 10;
107+
} else {
108+
return 1;
109+
}
110+
}
111+
}
112+
113+
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package mServer.crawler.sender.arte;
2+
3+
import de.mediathekview.mlib.Const;
4+
import mServer.crawler.FilmeSuchen;
5+
6+
public class ArteCrawler_EN extends ArteCrawler {
7+
8+
public ArteCrawler_EN(FilmeSuchen ssearch, int startPrio) {
9+
super(ssearch, startPrio, Const.ARTE_EN);
10+
}
11+
12+
@Override
13+
protected ArteLanguage getLanguage() {
14+
return ArteLanguage.EN;
15+
}
16+
17+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package mServer.crawler.sender.arte;
2+
3+
import de.mediathekview.mlib.Const;
4+
import mServer.crawler.FilmeSuchen;
5+
6+
public class ArteCrawler_ES extends ArteCrawler {
7+
8+
public ArteCrawler_ES(FilmeSuchen ssearch, int startPrio) {
9+
super(ssearch, startPrio, Const.ARTE_ES);
10+
}
11+
12+
@Override
13+
protected ArteLanguage getLanguage() {
14+
return ArteLanguage.ES;
15+
}
16+
17+
}

0 commit comments

Comments
 (0)