From 9b306bee138dcdec049db011a406a6c01eba639b Mon Sep 17 00:00:00 2001 From: Johannes Wong Date: Sat, 4 Mar 2023 17:33:19 -0800 Subject: [PATCH 1/2] Updated bilibili extractor to support new site code parsing when using cookies --- src/you_get/extractors/bilibili.py | 62 +++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 6335e6dd87..4ac82a26f9 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -338,21 +338,47 @@ def prepare(self, **kwargs): # bangumi elif sort == 'bangumi': + ep_id = "" + avid = "" + cid = "" + initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME - initial_state = json.loads(initial_state_text) + if (initial_state_text): + initial_state = json.loads(initial_state_text) + + # warn if this bangumi has more than 1 video + epn = len(initial_state['epList']) + if epn > 1 and not kwargs.get('playlist'): + log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn) - # warn if this bangumi has more than 1 video - epn = len(initial_state['epList']) - if epn > 1 and not kwargs.get('playlist'): - log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn) + # set video title + self.title = initial_state['h1Title'] - # set video title - self.title = initial_state['h1Title'] + # construct playinfos + ep_id = initial_state['epInfo']['id'] + avid = initial_state['epInfo']['aid'] + cid = initial_state['epInfo']['cid'] + else: + initial_state_text = match1(html_content, r'\"episodes\":(.*?)\,\"user_status') + pinitial_state = json.loads(initial_state_text) + + epn = len(pinitial_state) + if epn > 1 and not kwargs.get('playlist'): + log.w('This bangumi currently has %s videos. (use --playlist to download all videos.)' % epn) + + initial_state = {} + for dic in pinitial_state: + if dic['link'] == self.url.rstrip('/'): + initial_state = dic + break + + self.title = initial_state['share_copy'] + + # construct playinfos + ep_id = initial_state['id'] + avid = initial_state['aid'] + cid = initial_state['cid'] - # construct playinfos - ep_id = initial_state['epInfo']['id'] - avid = initial_state['epInfo']['aid'] - cid = initial_state['epInfo']['cid'] playinfos = [] api_url = self.bilibili_bangumi_api(avid, cid, ep_id) api_content = get_content(api_url, headers=self.bilibili_headers(referer=self.url)) @@ -716,10 +742,18 @@ def download_playlist_by_url(self, url, **kwargs): self.download(**kwargs) elif sort == 'bangumi': + episodes = [] initial_state_text = match1(html_content, r'__INITIAL_STATE__=(.*?);\(function\(\)') # FIXME - initial_state = json.loads(initial_state_text) - epn, i = len(initial_state['epList']), 0 - for ep in initial_state['epList']: + if (initial_state_text): + # initial_state = json.loads(initial_state_text) + episodes = json.loads(initial_state_text)['epList'] + else: + initial_state_text = match1(html_content, r'\"episodes\":(.*?)\,\"user_status') + # initial_state = json.loads(initial_state_text) + episodes = json.loads(initial_state_text) + + epn, i = len(episodes), 0 + for ep in episodes: i += 1; log.w('Extracting %s of %s videos ...' % (i, epn)) ep_id = ep['id'] epurl = 'https://www.bilibili.com/bangumi/play/ep%s/' % ep_id From e443642b81479c5c5a34d8b17b9cb9ba683b63ca Mon Sep 17 00:00:00 2001 From: Johannes Wong Date: Sun, 12 Mar 2023 09:46:57 -0700 Subject: [PATCH 2/2] Forgot to handle query parameters when parsing new bilibili site code --- src/you_get/extractors/bilibili.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/you_get/extractors/bilibili.py b/src/you_get/extractors/bilibili.py index 4ac82a26f9..2d748b70b6 100644 --- a/src/you_get/extractors/bilibili.py +++ b/src/you_get/extractors/bilibili.py @@ -368,7 +368,15 @@ def prepare(self, **kwargs): initial_state = {} for dic in pinitial_state: - if dic['link'] == self.url.rstrip('/'): + query_start = self.url.find('?') + url = "" + + if query_start > 0: + url = self.url[0:query_start].rstrip('/') + else: + url = self.url.rstrip('/') + + if dic['link'] == url: initial_state = dic break