Skip to content

Commit 1ec50dd

Browse files
authored
Merge branch 'master' into pull-requests/co-uk-fixes
2 parents c987702 + 8f0a91a commit 1ec50dd

File tree

1 file changed

+47
-16
lines changed

1 file changed

+47
-16
lines changed

Contents/Code/__init__.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414

1515
def json_decode(output):
1616
try:
17-
return json.loads(output)
17+
return json.loads(output,encoding="utf-8")
1818
except:
1919
return None
2020

2121

2222
# URLs
23-
VERSION_NO = '1.2017.11.10.1'
23+
VERSION_NO = '1.2017.12.21.1'
2424

2525
REQUEST_DELAY = 0 # Delay used when requesting HTML, may be good to have to prevent being banned from the site
2626

@@ -31,10 +31,10 @@ def json_decode(output):
3131
THREAD_MAX = 20
3232

3333
intl_sites={
34-
'en' : { 'url': 'www.audible.com' , 'rel_date' : u'Release Date' , 'nar_by' : u'Narrated By' , 'nar_by2': u'Narrated by'},
35-
'fr' : { 'url': 'www.audible.fr' , 'rel_date' : u'Date de publication' , 'nar_by' : u'Narrateur(s)' , 'nar_by2': u'Lu par'},
36-
'de' : { 'url': 'www.audible.de' , 'rel_date' : u'Erscheinungsdatum' , 'nar_by' : u'Gesprochen von', 'rel_date2': u'Veröffentlicht'},
37-
'it' : { 'url': 'www.audible.it' , 'rel_date' : u'Data di Pubblicazione', 'nar_by' : u'Narratore' },
34+
'en' : { 'url': 'www.audible.com' , 'urltitle' : u'title=' , 'rel_date' : u'Release date' , 'nar_by' : u'Narrated By' , 'nar_by2': u'Narrated by'},
35+
'fr' : { 'url': 'www.audible.fr' , 'urltitle' : u'searchTitle=' , 'rel_date' : u'Date de publication' , 'nar_by' : u'Narrateur(s)' , 'nar_by2': u'Lu par'},
36+
'de' : { 'url': 'www.audible.de' , 'urltitle' : u'searchTitle=' , 'rel_date' : u'Erscheinungsdatum' , 'nar_by' : u'Gesprochen von', 'rel_date2': u'Veröffentlicht'},
37+
'it' : { 'url': 'www.audible.it' , 'urltitle' : u'searchTitle=' , 'rel_date' : u'Data di Pubblicazione', 'nar_by' : u'Narratore' },
3838
#'jp' : { 'url': 'www.audible.co.jp', 'rel_date' : u'N/A', 'nar_by' : u'ナレーター' }, # untested
3939
}
4040

@@ -57,6 +57,7 @@ def SetupUrls(sitetype, base, lang='en'):
5757
Log('Pulling language from sites array')
5858
lang=sites_langs[base]['lang']
5959
if lang in intl_sites :
60+
urlsearchtitle=intl_sites[lang]['urltitle']
6061
ctx['REL_DATE']=intl_sites[lang]['rel_date']
6162
ctx['NAR_BY' ]=intl_sites[lang]['nar_by']
6263
if 'rel_date2' in intl_sites[lang]:
@@ -68,7 +69,7 @@ def SetupUrls(sitetype, base, lang='en'):
6869
else:
6970
ctx['NAR_BY_INFO' ]=ctx['NAR_BY' ]
7071
else:
71-
ctx['REL_DATE' ]='Release Date'
72+
ctx['REL_DATE' ]='Release date'
7273
ctx['REL_DATE_INFO']=ctx['REL_DATE']
7374
ctx['NAR_BY' ]='Narrated By'
7475
ctx['NAR_BY_INFO' ]='Narrated by'
@@ -86,6 +87,7 @@ def SetupUrls(sitetype, base, lang='en'):
8687
base='www.audible.com'
8788
if lang in intl_sites :
8889
base=intl_sites[lang]['url']
90+
urlsearchtitle=intl_sites[lang]['urltitle']
8991
ctx['REL_DATE']=intl_sites[lang]['rel_date']
9092
ctx['NAR_BY' ]=intl_sites[lang]['nar_by']
9193
if 'rel_date2' in intl_sites[lang]:
@@ -97,18 +99,19 @@ def SetupUrls(sitetype, base, lang='en'):
9799
else:
98100
ctx['NAR_BY_INFO' ]=ctx['NAR_BY' ]
99101
else:
100-
ctx['REL_DATE' ]='Release Date'
102+
ctx['REL_DATE' ]='Release date'
101103
ctx['REL_DATE_INFO']=ctx['REL_DATE']
102104
ctx['NAR_BY' ]='Narrated By'
103105
ctx['NAR_BY_INFO' ]='Narrated by'
104106

105107

106108
AUD_BASE_URL='https://' + str(base) + '/'
109+
AUD_TITLE_URL=urlsearchtitle
107110
ctx['AUD_BOOK_INFO' ]=AUD_BASE_URL + 'pd/%s?ipRedirectOverride=true'
108111
ctx['AUD_ARTIST_SEARCH_URL' ]=AUD_BASE_URL + 'search?searchAuthor=%s&ipRedirectOverride=true'
109-
ctx['AUD_ALBUM_SEARCH_URL' ]=AUD_BASE_URL + 'search?searchTitle=%s&x=41&ipRedirectOverride=true'
112+
ctx['AUD_ALBUM_SEARCH_URL' ]=AUD_BASE_URL + 'search?' + AUD_TITLE_URL + '%s&x=41&ipRedirectOverride=true'
110113
ctx['AUD_KEYWORD_SEARCH_URL']=AUD_BASE_URL + 'search?filterby=field-keywords&advsearchKeywords=%s&x=41&ipRedirectOverride=true'
111-
ctx['AUD_SEARCH_URL' ]=AUD_BASE_URL + 'search?searchTitle={0}&searchAuthor={1}&x=41&ipRedirectOverride=true'
114+
ctx['AUD_SEARCH_URL' ]=AUD_BASE_URL + 'search?' + AUD_TITLE_URL + '{0}&searchAuthor={1}&x=41&ipRedirectOverride=true'
112115
return ctx
113116

114117

@@ -169,6 +172,7 @@ def doSearch(self, url, ctx):
169172
html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY)
170173

171174
found = []
175+
172176
for r in html.xpath('//div[a/img[@class="yborder"]]'):
173177
date = self.getDateFromString(self.getStringContentFromXPath(r, 'text()[1]'))
174178
title = self.getStringContentFromXPath(r, 'a[2]')
@@ -264,7 +268,22 @@ def findDateInTitle(self, title):
264268
def doSearch(self, url, ctx):
265269
html = HTML.ElementFromURL(url, sleep=REQUEST_DELAY)
266270
found = []
267-
271+
self.Log('-----------------------------------------just before new xpath line--------------------')
272+
for r in html.xpath('//ul//li[contains(@class,"productListItem")]'):
273+
datetext = self.getStringContentFromXPath(r, 'div/div/div/div/div/div/span/ul/li[contains (@class,"releaseDateLabel")]/span'.decode('utf-8'))
274+
datetext=re.sub(r'[^0-9\-]', '',datetext)
275+
date=self.getDateFromString(datetext)
276+
title = self.getStringContentFromXPath(r, 'div/div/div/div/div/div/span/ul//a[1]')
277+
murl = self.getAnchorUrlFromXPath(r, 'div/div/div/div/div/div/span/ul/li/h3//a[1]')
278+
thumb = self.getImageUrlFromXPath(r, 'div/div/div/div/div/div/div[contains(@class,"responsive-product-square")]/div/a/img')
279+
author = self.getStringContentFromXPath(r, 'div/div/div/div/div/div/span/ul/li[contains (@class,"authorLabel")]/span/a[1]')
280+
narrator = self.getStringContentFromXPath(r, 'div/div/div/div/div/div/span/ul/li[contains (@class,"narratorLabel")]/span//a[1]'.format(ctx['NAR_BY']).decode('utf-8'))
281+
self.Log('---------------------------------------XPATH SEARCH HIT-----------------------------------------------')
282+
283+
found.append({'url': murl, 'title': title, 'date': date, 'thumb': thumb, 'author': author, 'narrator': narrator})
284+
285+
self.Log('-----------------------------------------just after new xpath line--------------------')
286+
268287
for r in html.xpath('//div[contains (@class, "adbl-search-result")]'):
269288
date = self.getDateFromString(self.getStringContentFromXPath(r, 'div/div/ul/li[contains (., "{0}")]/span[2]//text()'.format(ctx['REL_DATE']).decode('utf-8')))
270289
title = self.getStringContentFromXPath(r, 'div/div/div/div/a[1]')
@@ -351,7 +370,7 @@ def search(self, results, media, lang, manual):
351370
self.Log('Found %s result(s) for query "%s"', len(found), normalizedName)
352371
i = 1
353372
for f in found:
354-
self.Log(' %s. (title) %s (url)[%s] (date)(%s) (thumb){%s}', i, f['title'], f['url'], str(f['date']), f['thumb'])
373+
self.Log(' %s. (title) %s (author) %s (url)[%s] (date)(%s) (thumb){%s}', i, f['title'], f['author'], f['url'], str(f['date']), f['thumb'])
355374
i += 1
356375

357376
self.Log('-----------------------------------------------------------------------')
@@ -363,11 +382,17 @@ def search(self, results, media, lang, manual):
363382
self.Log('URL For Breakdown: %s', url)
364383

365384
# Get the id
385+
# for itemId in url.split('/') :
366386
for itemId in url.split('/') :
367387
if re.match(r'B0[0-9A-Z]{8,8}', itemId):
368388
break
369389
itemId=None
370390

391+
#New Search results contain question marks after the ID
392+
for itemId in itemId.split('?') :
393+
if re.match(r'B0[0-9A-Z]{8,8}', itemId):
394+
break
395+
371396
if len(itemId) == 0:
372397
Log('No Match: %s', url)
373398
continue
@@ -446,10 +471,10 @@ def update(self, metadata, media, lang, force=False):
446471
pass
447472

448473
date=None
474+
rating=None
449475
series=''
450476
genre1=None
451477
genre2=None
452-
rating=0
453478

454479
for r in html.xpath('//div[contains (@id, "adbl_page_content")]'):
455480
date = self.getDateFromString(self.getStringContentFromXPath(r, '//li[contains (., "{0}")]/span[2]//text()'.format(ctx['REL_DATE_INFO']).decode('utf-8')))
@@ -466,10 +491,16 @@ def update(self, metadata, media, lang, force=False):
466491
self.Log('---------------------------------------XPATH SEARCH HIT-----------------------------------------------')
467492

468493
if date is None :
494+
#for r in html.xpath('//div[contains (@class,"slot bottomSlot")]/script[contains (@type, "application/ld+json")]'):
469495
for r in html.xpath('//script[contains (@type, "application/ld+json")]'):
470496
page_content = r.text_content()
471-
page_content = page_content.replace('\n', '') # Remove and new lines. JSON doesn't like them.
472-
page_content = re.sub(r'\\(?![bfnrtv\'\"\\])', '', page_content) # Remove any backslashes that aren't escaping a character JSON needs escaped
497+
page_content = page_content.replace('\n', '')
498+
#page_content = page_content.replace('\'', '\\\'')
499+
#page_content = re.sub(r'\\(?![bfnrtv\'\"\\])', '', page_content)
500+
# Remove any backslashes that aren't escaping a character JSON needs escaped
501+
remove_inv_json_esc=re.compile(r'([^\\])(\\(?![bfnrt\'\"\\/]|u[A-Fa-f0-9]{4}))')
502+
page_content=remove_inv_json_esc.sub(r'\1\\\2', page_content)
503+
self.Log(page_content)
473504
json_data=json_decode(page_content)
474505
for json_data in json_data:
475506
if 'datePublished' in json_data:
@@ -499,7 +530,7 @@ def update(self, metadata, media, lang, force=False):
499530
#for key in json_data:
500531
# Log('{0}:{1}'.format(key, json_data[key]))
501532
genre1=json_data['itemListElement'][1]['item']['name']
502-
try: # Not all books have two genre tags.
533+
try:
503534
genre2=json_data['itemListElement'][2]['item']['name']
504535
except:
505536
continue

0 commit comments

Comments
 (0)