From da0329ce8d0094e17481d49d877d90423e8a1033 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:12:17 +0500 Subject: [PATCH 1/4] Add Python 3.14 RC1 to CI. --- .github/workflows/tests-macos.yml | 2 +- .github/workflows/tests-ubuntu.yml | 2 +- .github/workflows/tests-windows.yml | 2 +- pyproject.toml | 1 + tox.ini | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests-macos.yml b/.github/workflows/tests-macos.yml index 851a40e..7b1bcb7 100644 --- a/.github/workflows/tests-macos.yml +++ b/.github/workflows/tests-macos.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-ubuntu.yml b/.github/workflows/tests-ubuntu.yml index 75a06bd..95b0839 100644 --- a/.github/workflows/tests-ubuntu.yml +++ b/.github/workflows/tests-ubuntu.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1", "pypy3.10"] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/tests-windows.yml b/.github/workflows/tests-windows.yml index e56da2e..11e5917 100644 --- a/.github/workflows/tests-windows.yml +++ b/.github/workflows/tests-windows.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.1"] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index 782657e..0dc257c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] diff --git a/tox.ini b/tox.ini index 026741a..a0a9413 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ deps = commands = pytest --cov=cssselect \ --cov-report=term-missing --cov-report=html --cov-report=xml \ - --verbose {posargs: cssselect tests docs} + {posargs: cssselect tests docs} [testenv:pylint] deps = From 6c9d2191048e19fa7a2bb9346647d2096075e523 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:22:11 +0500 Subject: [PATCH 2/4] Bump ruff. --- .pre-commit-config.yaml | 4 +- cssselect/parser.py | 98 ++++++++++++++++++++--------------------- docs/conf.py | 2 +- pyproject.toml | 15 ++++++- tests/test_cssselect.py | 51 ++++++++++++--------- 5 files changed, 96 insertions(+), 74 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c92c4d..a8eebd9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.11.2 + rev: v0.12.5 hooks: - - id: ruff + - id: ruff-check args: [ --fix ] - id: ruff-format diff --git a/cssselect/parser.py b/cssselect/parser.py index e970a1b..5bca712 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -441,9 +441,9 @@ class Hash: Represents selector#id """ - def __init__(self, selector: Tree, id: str) -> None: + def __init__(self, selector: Tree, id_: str) -> None: self.selector = selector - self.id = id + self.id = id_ def __repr__(self) -> str: return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]" @@ -660,13 +660,13 @@ def parse_simple_selector( argument, argument_pseudo_element = parse_simple_selector( stream, inside_negation=True ) - next = stream.next() + next_ = stream.next() if argument_pseudo_element: raise SelectorSyntaxError( - f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}" + f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next_.pos}" ) - if next != ("DELIM", ")"): - raise SelectorSyntaxError(f"Expected ')', got {next}") + if next_ != ("DELIM", ")"): + raise SelectorSyntaxError(f"Expected ')', got {next_}") result = Negation(result, argument) elif ident.lower() == "has": combinator, arguments = parse_relative_selector(stream) @@ -687,46 +687,46 @@ def parse_simple_selector( return result, pseudo_element -def parse_arguments(stream: TokenStream) -> list[Token]: +def parse_arguments(stream: TokenStream) -> list[Token]: # noqa: RET503 arguments: list[Token] = [] - while 1: # noqa: RET503 + while 1: stream.skip_whitespace() - next = stream.next() - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + next_ = stream.next() + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "+"), ("DELIM", "-"), ]: - arguments.append(next) - elif next == ("DELIM", ")"): + arguments.append(next_) + elif next_ == ("DELIM", ")"): return arguments else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") + raise SelectorSyntaxError(f"Expected an argument, got {next_}") -def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: +def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]: # noqa: RET503 stream.skip_whitespace() subselector = "" - next = stream.next() + next_ = stream.next() - if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: - combinator = next + if next_ in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next_ stream.skip_whitespace() - next = stream.next() + next_ = stream.next() else: combinator = Token("DELIM", " ", pos=0) - while 1: # noqa: RET503 - if next.type in ("IDENT", "STRING", "NUMBER") or next in [ + while 1: + if next_.type in ("IDENT", "STRING", "NUMBER") or next_ in [ ("DELIM", "."), ("DELIM", "*"), ]: - subselector += cast("str", next.value) - elif next == ("DELIM", ")"): + subselector += cast("str", next_.value) + elif next_ == ("DELIM", ")"): result = parse(subselector) return combinator, result[0] else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") - next = stream.next() + raise SelectorSyntaxError(f"Expected an argument, got {next_}") + next_ = stream.next() def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: @@ -738,16 +738,16 @@ def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]: f"Got pseudo-element ::{pseudo_element} inside function" ) stream.skip_whitespace() - next = stream.next() - if next in (("EOF", None), ("DELIM", ",")): + next_ = stream.next() + if next_ in (("EOF", None), ("DELIM", ",")): stream.next() stream.skip_whitespace() arguments.append(result) - elif next == ("DELIM", ")"): + elif next_ == ("DELIM", ")"): arguments.append(result) break else: - raise SelectorSyntaxError(f"Expected an argument, got {next}") + raise SelectorSyntaxError(f"Expected an argument, got {next_}") return arguments @@ -772,26 +772,26 @@ def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib: namespace = op = None if op is None: stream.skip_whitespace() - next = stream.next() - if next == ("DELIM", "]"): + next_ = stream.next() + if next_ == ("DELIM", "]"): return Attrib(selector, namespace, cast("str", attrib), "exists", None) - if next == ("DELIM", "="): + if next_ == ("DELIM", "="): op = "=" - elif next.is_delim("^", "$", "*", "~", "|", "!") and ( + elif next_.is_delim("^", "$", "*", "~", "|", "!") and ( stream.peek() == ("DELIM", "=") ): - op = cast("str", next.value) + "=" + op = cast("str", next_.value) + "=" stream.next() else: - raise SelectorSyntaxError(f"Operator expected, got {next}") + raise SelectorSyntaxError(f"Operator expected, got {next_}") stream.skip_whitespace() value = stream.next() if value.type not in ("IDENT", "STRING"): raise SelectorSyntaxError(f"Expected string or ident, got {value}") stream.skip_whitespace() - next = stream.next() - if next != ("DELIM", "]"): - raise SelectorSyntaxError(f"Expected ']', got {next}") + next_ = stream.next() + if next_ != ("DELIM", "]"): + raise SelectorSyntaxError(f"Expected ']', got {next_}") return Attrib(selector, namespace, cast("str", attrib), op, value) @@ -1015,9 +1015,9 @@ def next(self) -> Token: assert self.peeked is not None self.used.append(self.peeked) return self.peeked - next = self.next_token() - self.used.append(next) - return next + next_ = self.next_token() + self.used.append(next_) + return next_ def peek(self) -> Token: if not self._peeking: @@ -1027,18 +1027,18 @@ def peek(self) -> Token: return self.peeked def next_ident(self) -> str: - next = self.next() - if next.type != "IDENT": - raise SelectorSyntaxError(f"Expected ident, got {next}") - return cast("str", next.value) + next_ = self.next() + if next_.type != "IDENT": + raise SelectorSyntaxError(f"Expected ident, got {next_}") + return cast("str", next_.value) def next_ident_or_star(self) -> str | None: - next = self.next() - if next.type == "IDENT": - return next.value - if next == ("DELIM", "*"): + next_ = self.next() + if next_.type == "IDENT": + return next_.value + if next_ == ("DELIM", "*"): return None - raise SelectorSyntaxError(f"Expected ident or '*', got {next}") + raise SelectorSyntaxError(f"Expected ident or '*', got {next_}") def skip_whitespace(self) -> None: peek = self.peek() diff --git a/docs/conf.py b/docs/conf.py index ceeb2d2..5713d17 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ # General information about the project. project = "cssselect" -copyright = "2012-2017, Simon Sapin, Scrapy developers" +project_copyright = "2012-2017, Simon Sapin, Scrapy developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/pyproject.toml b/pyproject.toml index 0dc257c..8506c66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,10 +105,16 @@ testpaths = ["tests"] [tool.ruff.lint] extend-select = [ + # flake8-builtins + "A", + # flake8-async + "ASYNC", # flake8-bugbear "B", # flake8-comprehensions "C4", + # flake8-commas + "COM", # pydocstyle "D", # flake8-future-annotations @@ -131,6 +137,8 @@ extend-select = [ "PIE", # pylint "PL", + # flake8-pytest-style + "PT", # flake8-use-pathlib "PTH", # flake8-pyi @@ -161,6 +169,8 @@ extend-select = [ "YTT", ] ignore = [ + # Trailing comma missing + "COM812", # Missing docstring in public module "D100", # Missing docstring in public class @@ -213,9 +223,10 @@ ignore = [ "RUF012", # Use of `assert` detected "S101", - # Using lxml to parse untrusted data is known to be vulnerable to XML attacks - "S320", ] +[tool.ruff.lint.isort] +split-on-trailing-comma = false + [tool.ruff.lint.pydocstyle] convention = "pep257" diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index 2b89b6f..dc67bb7 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -23,6 +23,7 @@ import unittest from typing import TYPE_CHECKING +import pytest from lxml import etree, html from cssselect import ( @@ -268,12 +269,8 @@ def test_pseudo_repr(css: str) -> str: (selector,) = parse("e::foo") assert selector.pseudo_element == "foo" assert tr.selector_to_xpath(selector, prefix="") == "e" - self.assertRaises( - ExpressionError, - tr.selector_to_xpath, - selector, - translate_pseudo_elements=True, - ) + with pytest.raises(ExpressionError): + tr.selector_to_xpath(selector, translate_pseudo_elements=True) # Special test for the unicode symbols and ':scope' element if check # Errors if use repr() instead of __repr__() @@ -567,19 +564,32 @@ def xpath(css: str) -> str: assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]") # h\xa0ref assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]") - self.assertRaises(ExpressionError, xpath, ":fİrst-child") - self.assertRaises(ExpressionError, xpath, ":first-of-type") - self.assertRaises(ExpressionError, xpath, ":only-of-type") - self.assertRaises(ExpressionError, xpath, ":last-of-type") - self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)") - self.assertRaises(ExpressionError, xpath, ":nth-child(n-)") - self.assertRaises(ExpressionError, xpath, ":after") - self.assertRaises(ExpressionError, xpath, ":lorem-ipsum") - self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)") - self.assertRaises(ExpressionError, xpath, "::lorem-ipsum") - self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4) - self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo") + with pytest.raises(ExpressionError): + xpath(":fİrst-child") + with pytest.raises(ExpressionError): + xpath(":first-of-type") + with pytest.raises(ExpressionError): + xpath(":only-of-type") + with pytest.raises(ExpressionError): + xpath(":last-of-type") + with pytest.raises(ExpressionError): + xpath(":nth-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-last-of-type(1)") + with pytest.raises(ExpressionError): + xpath(":nth-child(n-)") + with pytest.raises(ExpressionError): + xpath(":after") + with pytest.raises(ExpressionError): + xpath(":lorem-ipsum") + with pytest.raises(ExpressionError): + xpath(":lorem(ipsum)") + with pytest.raises(ExpressionError): + xpath("::lorem-ipsum") + with pytest.raises(TypeError): + GenericTranslator().css_to_xpath(4) # type: ignore[arg-type] + with pytest.raises(TypeError): + GenericTranslator().selector_to_xpath("foo") # type: ignore[arg-type] def test_unicode(self) -> None: css = ".a\xc1b" @@ -967,7 +977,8 @@ def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]: assert pcss("span:only-child") == ["foobar-span"] assert pcss("li div:only-child") == ["li-div"] assert pcss("div *:only-child") == ["li-div", "foobar-span"] - self.assertRaises(ExpressionError, pcss, "p *:only-of-type") + with pytest.raises(ExpressionError): + pcss("p *:only-of-type") assert pcss("p:only-of-type") == ["paragraph"] assert pcss("a:empty", "a:EMpty") == ["name-anchor"] assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"] From 0ee48e6317fa5a39ce1b399b3517d3121e742276 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:29:50 +0500 Subject: [PATCH 3/4] Bump mypy and pylint. --- cssselect/xpath.py | 8 ++++---- tox.ini | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cssselect/xpath.py b/cssselect/xpath.py index bc47dea..96eac3f 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -825,7 +825,7 @@ def __init__(self, xhtml: bool = False) -> None: self.lower_case_element_names = True self.lower_case_attribute_names = True - def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr: # FIXME: is this really all the elements? return xpath.add_condition( "(@selected and name(.) = 'option') or " @@ -850,7 +850,7 @@ def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr f"'-'), {arg})]" ) - def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: return xpath.add_condition( "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')" ) @@ -858,7 +858,7 @@ def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[over # Links are never visited, the implementation for :visited is the same # as in GenericTranslator - def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ @@ -888,7 +888,7 @@ def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[ # FIXME: in the second half, add "and is not a descendant of that # fieldset element's first legend element child, if any." - def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # type: ignore[override] + def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr: # http://www.w3.org/TR/html5/section-index.html#attributes-1 return xpath.add_condition( """ diff --git a/tox.ini b/tox.ini index a0a9413..01794d6 100644 --- a/tox.ini +++ b/tox.ini @@ -16,7 +16,7 @@ commands = [testenv:pylint] deps = {[testenv]deps} - pylint==3.3.5 + pylint==3.3.7 commands = pylint {posargs: cssselect tests docs} @@ -30,8 +30,8 @@ commands = [testenv:typing] deps = {[testenv]deps} - mypy==1.15.0 - types-lxml==2025.3.4 + mypy==1.17.0 + types-lxml==2025.3.30 commands = mypy --strict {posargs: cssselect tests} From 52aabe2e7f974c8eda16190c62450cd4c32efb1e Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 26 Jul 2025 21:30:29 +0500 Subject: [PATCH 4/4] Add more linters. --- .git-blame-ignore-revs | 2 +- .github/workflows/checks.yml | 2 +- .pre-commit-config.yaml | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 9d2c8f6..bb4f6e1 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,2 @@ # applying pre-commit hooks to the project -e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb \ No newline at end of file +e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 666aaba..1607756 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -35,7 +35,7 @@ jobs: pip install -U pip pip install -U tox tox - + pre-commit: runs-on: ubuntu-latest steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8eebd9..119b328 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,3 +5,18 @@ repos: - id: ruff-check args: [ --fix ] - id: ruff-format +- repo: https://github.com/adamchainz/blacken-docs + rev: 1.19.1 + hooks: + - id: blacken-docs + additional_dependencies: + - black==25.1.0 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v1.0.0 + hooks: + - id: sphinx-lint