Skip to content

Commit e98773f

Browse files
authored
Merge pull request #11320 from pymedusa/release/release-1.0.16
Release/release 1.0.16
2 parents d45f98c + 8c6f701 commit e98773f

File tree

141 files changed

+4413
-2693
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+4413
-2693
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
## 1.0.16 (27-05-2023)
2+
3+
#### Improvements
4+
- Raise warning when TVDB returns malformed data
5+
- Update many JavaScript and Python dependencies
6+
7+
-----
8+
19
## 1.0.15 (21-05-2023)
210

311
#### Fixes

ext/bs4/__init__.py

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"""
1616

1717
__author__ = "Leonard Richardson ([email protected])"
18-
__version__ = "4.11.2"
18+
__version__ = "4.12.2"
1919
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
2020
# Use of this source code is governed by the MIT license.
2121
__license__ = "MIT"
@@ -38,11 +38,13 @@
3838
builder_registry,
3939
ParserRejectedMarkup,
4040
XMLParsedAsHTMLWarning,
41+
HTMLParserTreeBuilder
4142
)
4243
from .dammit import UnicodeDammit
4344
from .element import (
4445
CData,
4546
Comment,
47+
CSS,
4648
DEFAULT_OUTPUT_ENCODING,
4749
Declaration,
4850
Doctype,
@@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
116118
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
117119

118120
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
119-
121+
120122
def __init__(self, markup="", features=None, builder=None,
121123
parse_only=None, from_encoding=None, exclude_encodings=None,
122124
element_classes=None, **kwargs):
@@ -348,25 +350,49 @@ def deprecated_argument(old_name, new_name):
348350
self.markup = None
349351
self.builder.soup = None
350352

351-
def __copy__(self):
352-
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
353-
copy = type(self)(
354-
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
355-
)
353+
def _clone(self):
354+
"""Create a new BeautifulSoup object with the same TreeBuilder,
355+
but not associated with any markup.
356356
357-
# Although we encoded the tree to UTF-8, that may not have
358-
# been the encoding of the original markup. Set the copy's
359-
# .original_encoding to reflect the original object's
360-
# .original_encoding.
361-
copy.original_encoding = self.original_encoding
362-
return copy
357+
This is the first step of the deepcopy process.
358+
"""
359+
clone = type(self)("", None, self.builder)
363360

361+
# Keep track of the encoding of the original document,
362+
# since we won't be parsing it again.
363+
clone.original_encoding = self.original_encoding
364+
return clone
365+
364366
def __getstate__(self):
365367
# Frequently a tree builder can't be pickled.
366368
d = dict(self.__dict__)
367369
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
368-
d['builder'] = None
370+
d['builder'] = type(self.builder)
371+
# Store the contents as a Unicode string.
372+
d['contents'] = []
373+
d['markup'] = self.decode()
374+
375+
# If _most_recent_element is present, it's a Tag object left
376+
# over from initial parse. It might not be picklable and we
377+
# don't need it.
378+
if '_most_recent_element' in d:
379+
del d['_most_recent_element']
369380
return d
381+
382+
def __setstate__(self, state):
383+
# If necessary, restore the TreeBuilder by looking it up.
384+
self.__dict__ = state
385+
if isinstance(self.builder, type):
386+
self.builder = self.builder()
387+
elif not self.builder:
388+
# We don't know which builder was used to build this
389+
# parse tree, so use a default we know is always available.
390+
self.builder = HTMLParserTreeBuilder()
391+
self.builder.soup = self
392+
self.reset()
393+
self._feed()
394+
return state
395+
370396

371397
@classmethod
372398
def _decode_markup(cls, markup):
@@ -468,6 +494,7 @@ def reset(self):
468494
self.open_tag_counter = Counter()
469495
self.preserve_whitespace_tag_stack = []
470496
self.string_container_stack = []
497+
self._most_recent_element = None
471498
self.pushTag(self)
472499

473500
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@@ -749,7 +776,7 @@ def handle_data(self, data):
749776

750777
def decode(self, pretty_print=False,
751778
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
752-
formatter="minimal"):
779+
formatter="minimal", iterator=None):
753780
"""Returns a string or Unicode representation of the parse tree
754781
as an HTML or XML document.
755782
@@ -776,7 +803,7 @@ def decode(self, pretty_print=False,
776803
else:
777804
indent_level = 0
778805
return prefix + super(BeautifulSoup, self).decode(
779-
indent_level, eventual_encoding, formatter)
806+
indent_level, eventual_encoding, formatter, iterator)
780807

781808
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
782809
_s = BeautifulSoup

ext/bs4/builder/_htmlparser.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from bs4.builder import (
2626
DetectsXMLParsedAsHTML,
27+
ParserRejectedMarkup,
2728
HTML,
2829
HTMLTreeBuilder,
2930
STRICT,
@@ -70,6 +71,22 @@ def __init__(self, *args, **kwargs):
7071

7172
self._initialize_xml_detector()
7273

74+
def error(self, message):
75+
# NOTE: This method is required so long as Python 3.9 is
76+
# supported. The corresponding code is removed from HTMLParser
77+
# in 3.5, but not removed from ParserBase until 3.10.
78+
# https://github.com/python/cpython/issues/76025
79+
#
80+
# The original implementation turned the error into a warning,
81+
# but in every case I discovered, this made HTMLParser
82+
# immediately crash with an error message that was less
83+
# helpful than the warning. The new implementation makes it
84+
# more clear that html.parser just can't parse this
85+
# markup. The 3.10 implementation does the same, though it
86+
# raises AssertionError rather than calling a method. (We
87+
# catch this error and wrap it in a ParserRejectedMarkup.)
88+
raise ParserRejectedMarkup(message)
89+
7390
def handle_startendtag(self, name, attrs):
7491
"""Handle an incoming empty-element tag.
7592
@@ -359,6 +376,12 @@ def feed(self, markup):
359376
args, kwargs = self.parser_args
360377
parser = BeautifulSoupHTMLParser(*args, **kwargs)
361378
parser.soup = self.soup
362-
parser.feed(markup)
379+
try:
380+
parser.feed(markup)
381+
except AssertionError as e:
382+
# html.parser raises AssertionError in rare cases to
383+
# indicate a fatal problem with the markup, especially
384+
# when there's an error in the doctype declaration.
385+
raise ParserRejectedMarkup(e)
363386
parser.close()
364387
parser.already_closed_empty_element = []

0 commit comments

Comments
 (0)