Skip to content

Commit 0f5db94

Browse files
authored
Cleanup erroneous backslashes that kill JSON
Finally figured out a regex that will remove any backslashes that aren't followed by a character that JSON needs escaped. This should keep a backslash from breaking JSON.
1 parent 83abf05 commit 0f5db94

File tree

1 file changed

+3
-4
lines changed

1 file changed

+3
-4
lines changed

Contents/Code/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def json_decode(output):
2020

2121

2222
# URLs
23-
VERSION_NO = '1.2017.11.08.2'
23+
VERSION_NO = '1.2017.11.09.5'
2424

2525
REQUEST_DELAY = 0 # Delay used when requesting HTML, may be good to have to prevent being banned from the site
2626

@@ -468,9 +468,8 @@ def update(self, metadata, media, lang, force=False):
468468
if date is None :
469469
for r in html.xpath('//script[contains (@type, "application/ld+json")]'):
470470
page_content = r.text_content()
471-
page_content = page_content.replace('\n', '')
472-
page_content = page_content.replace('\)', ')') #remove an esacpe from a one-off book
473-
#page_content = page_content.replace('\\', '\\\\')
471+
page_content = page_content.replace('\n', '') # Remove and new lines. JSON doesn't like them.
472+
page_content = re.sub(r'\\(?![bfnrtv\'\"\\])', '', page_content) # Remove any backslashes that aren't escaping a character JSON needs escaped
474473
json_data=json_decode(page_content)
475474
for json_data in json_data:
476475
if 'datePublished' in json_data:

0 commit comments

Comments
 (0)