1+ package mServer .crawler .sender .arte ;
2+
3+ import com .google .gson .JsonElement ;
4+ import de .mediathekview .mlib .Config ;
5+ import de .mediathekview .mlib .Const ;
6+ import de .mediathekview .mlib .daten .DatenFilm ;
7+ import de .mediathekview .mlib .tool .Log ;
8+ import mServer .crawler .CrawlerTool ;
9+ import mServer .crawler .FilmeSuchen ;
10+ import mServer .crawler .sender .MediathekCrawler ;
11+ import mServer .crawler .sender .arte .json .ArteVideoInfoDto ;
12+ import mServer .crawler .sender .arte .tasks .ArteDtoVideo2FilmTask ;
13+ import mServer .crawler .sender .arte .tasks .ArteVideoInfoTask ;
14+ import mServer .crawler .sender .arte .tasks .ArteVideoLinkTask ;
15+ import mServer .crawler .sender .base .JsonUtils ;
16+ import mServer .crawler .sender .base .JsoupConnection ;
17+ import mServer .crawler .sender .base .TopicUrlDTO ;
18+ import org .apache .logging .log4j .LogManager ;
19+ import org .apache .logging .log4j .Logger ;
20+
21+ import java .io .IOException ;
22+ import java .util .Map ;
23+ import java .util .Optional ;
24+ import java .util .Set ;
25+ import java .util .concurrent .ConcurrentLinkedQueue ;
26+ import java .util .concurrent .RecursiveTask ;
27+
28+ public class ArteCrawler extends MediathekCrawler {
29+ private static final Logger LOG = LogManager .getLogger (ArteCrawler .class );
30+ private final JsoupConnection jsoupConnection ;
31+
32+ public ArteCrawler (FilmeSuchen ssearch , int startPrio ) {
33+ this (ssearch , startPrio , Const .ARTE_DE );
34+ }
35+
36+ protected ArteCrawler (FilmeSuchen ssearch , int startPrio , String sender ) {
37+ super (ssearch , sender ,/* threads */ 1 , /* urlWarten */ 200 , startPrio );
38+ this .jsoupConnection = new JsoupConnection (60 , 4 );
39+ }
40+
41+ protected ArteLanguage getLanguage () {
42+ return ArteLanguage .DE ;
43+ }
44+
45+ @ Override
46+ protected RecursiveTask <Set <DatenFilm >> createCrawlerTask () {
47+
48+ try {
49+ final ArteVideoInfoTask aArteRestVideoInfoTask ;
50+ // DO NOT overload - maximumUrlsPerTask used to reduce threads to 4
51+ aArteRestVideoInfoTask = new ArteVideoInfoTask (this , createVideosQueue ());
52+ final ConcurrentLinkedQueue <ArteVideoInfoDto > videos = new ConcurrentLinkedQueue <>();
53+ videos .addAll (aArteRestVideoInfoTask .fork ().join ());
54+ //
55+ Log .sysLog (getSendername () + " Anzahl video info: " + videos .size ());
56+ //
57+ final ConcurrentLinkedQueue <ArteVideoInfoDto > videosWithLink = new ConcurrentLinkedQueue <>();
58+ final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask (this , videos );
59+ videosWithLink .addAll (aArteRestVideosTask .fork ().join ());
60+ //
61+ Log .sysLog (getSendername () + " Anzahl video links: " + videosWithLink .size ());
62+ //
63+ return new ArteDtoVideo2FilmTask (this , new ConcurrentLinkedQueue <>(videosWithLink ), getSendername ());
64+
65+ } catch (final Exception ex ) {
66+ LOG .fatal ("Exception in {} crawler." , getSendername (), ex );
67+ }
68+ return null ;
69+ }
70+
71+ private ConcurrentLinkedQueue <TopicUrlDTO > createVideosQueue () {
72+ int maxPages = getMaxPagesForOverview ();
73+ final ConcurrentLinkedQueue <TopicUrlDTO > root = new ConcurrentLinkedQueue <>();
74+ String rootUrl = String .format (ArteConstants .VIDEOS_URL , 1 , getLanguage ().toString ().toLowerCase ());
75+ root .add (new TopicUrlDTO ("all videos1" , rootUrl ));
76+ if (maxPages >= 100 ) {
77+ String rootUrl2 = String .format (ArteConstants .VIDEOS_URL_ALT , 1 , getLanguage ().toString ().toLowerCase ());
78+ root .add (new TopicUrlDTO ("all videos2" , rootUrl2 ));
79+ }
80+ return root ;
81+ }
82+
83+ private int getMaxPagesForOverview () {
84+ final int naturalLimit = Math .min (100 , getMaximumSubpages ());
85+ String rootUrl = String .format (ArteConstants .VIDEOS_URL , 1 , getLanguage ().toString ().toLowerCase ());
86+ String [] path = {"meta" , "videos" , "pages" };
87+ try {
88+ final Map <String , String > headers = Map .of (
89+ "Accept" , "application/json" ,
90+ "Content-Type" , "application/json" ,
91+ "Authorization" , ArteConstants .API_TOKEN
92+ );
93+ JsonElement element = jsoupConnection .requestBodyAsJsonElement (rootUrl , headers );
94+ Optional <Integer > pages = JsonUtils .getElementValueAsInteger (element , path );
95+ if (pages .isPresent ()) {
96+ return Math .min (pages .get (), naturalLimit );
97+ }
98+ } catch (IOException e ) {
99+ LOG .error ("getMaxPagesForOverview" , e );
100+ }
101+ return naturalLimit ;
102+ }
103+
104+ private int getMaximumSubpages () {
105+ if (CrawlerTool .loadLongMax ()) {
106+ return 10 ;
107+ } else {
108+ return 1 ;
109+ }
110+ }
111+ }
112+
113+
0 commit comments