explosion · cclauss · Aug 31, 2025
diff --git a/extra/DEVELOPER_DOCS/Listeners.md b/extra/DEVELOPER_DOCS/Listeners.md
@@ -194,7 +194,7 @@ model = chain(
 )
 ```
 
-but the standalone `Tok2VecTransformer` has an additional `split_trf_batch` chained inbetween the model
+but the standalone `Tok2VecTransformer` has an additional `split_trf_batch` chained in between the model
 and `trfs2arrays`:
 
 ```

diff --git a/extra/DEVELOPER_DOCS/Satellite Packages.md b/extra/DEVELOPER_DOCS/Satellite Packages.md
@@ -6,7 +6,7 @@ This is a list of all the active repos relevant to spaCy besides the main one, w
 
 These packages are always pulled in when you install spaCy. Most of them are direct dependencies, but some are transitive dependencies through other packages.
 
-- [spacy-legacy](https://github.com/explosion/spacy-legacy): When an architecture in spaCy changes enough to get a new version, the old version is frozen and moved to spacy-legacy. This allows us to keep the core library slim while also preserving backwards compatability.
+- [spacy-legacy](https://github.com/explosion/spacy-legacy): When an architecture in spaCy changes enough to get a new version, the old version is frozen and moved to spacy-legacy. This allows us to keep the core library slim while also preserving backwards compatibility.
 - [thinc](https://github.com/explosion/thinc): Thinc is the machine learning library that powers trainable components in spaCy. It wraps backends like Numpy, PyTorch, and Tensorflow to provide a functional interface for specifying architectures.
 - [catalogue](https://github.com/explosion/catalogue): Small library for adding function registries, like those used for model architectures in spaCy.
 - [confection](https://github.com/explosion/confection): This library contains the functionality for config parsing that was formerly contained directly in Thinc.
@@ -67,7 +67,7 @@ These repos are used to support the spaCy docs or otherwise present information
 
 These repos are used for organizing data around spaCy, but are not something an end user would need to install as part of using the library.
 
-- [spacy-models](https://github.com/explosion/spacy-models): This repo contains metadata (but not training data) for all the spaCy models. This includes information about where their training data came from, version compatability, and performance information. It also includes tests for the model packages, and the built models are hosted as releases of this repo.
+- [spacy-models](https://github.com/explosion/spacy-models): This repo contains metadata (but not training data) for all the spaCy models. This includes information about where their training data came from, version compatibility, and performance information. It also includes tests for the model packages, and the built models are hosted as releases of this repo.
 - [wheelwright](https://github.com/explosion/wheelwright): A tool for automating our PyPI builds and releases.
 - [ec2buildwheel](https://github.com/explosion/ec2buildwheel): A small project that allows you to build Python packages in the manner of cibuildwheel, but on any EC2 image. Used by wheelwright.
 

diff --git a/extra/DEVELOPER_DOCS/StringStore-Vocab.md b/extra/DEVELOPER_DOCS/StringStore-Vocab.md
@@ -145,7 +145,7 @@ These are things stored in the vocab:
 - `get_noun_chunks`: a syntax iterator
 - lex attribute getters: functions like `is_punct`, set in language defaults
 - `cfg`: **not** the pipeline config, this is mostly unused
-- `_unused_object`: Formerly an unused object, kept around until v4 for compatability
+- `_unused_object`: Formerly an unused object, kept around until v4 for compatibility
 
 Some of these, like the Morphology and Vectors, are complex enough that they
 need their own explanations. Here we'll just look at Vocab-specific items.

diff --git a/extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt b/extra/example_data/textcat_example_data/CC_BY-SA-3.0.txt
@@ -34,7 +34,7 @@ CONDITIONS.
     Collection will not be considered an Adaptation for the purpose of
     this License. For the avoidance of doubt, where the Work is a musical
     work, performance or phonogram, the synchronization of the Work in
-    timed-relation with a moving image ("synching") will be considered an
+    timed-relation with a moving image ("syncing") will be considered an
     Adaptation for the purpose of this License.
  b. "Collection" means a collection of literary or artistic works, such as
     encyclopedias and anthologies, or performances, phonograms or
@@ -264,7 +264,7 @@ subject to and limited by the following restrictions:
 UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING, LICENSOR
 OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY
 KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE,
-INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY,
+INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF
 LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS,
 WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION

diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
@@ -99,7 +99,7 @@ def parse_config_overrides(
     RETURNS (Dict[str, Any]): The parsed dict, keyed by nested config setting.
     """
     env_string = os.environ.get(env_var, "") if env_var else ""
-    env_overrides = _parse_overrides(split_arg_string(env_string))
+    env_overrides = _parse_overrides(split_arg_string(env_string))  # type: ignore[operator]
     cli_overrides = _parse_overrides(args, is_cli=True)
     if cli_overrides:
         keys = [k for k in cli_overrides if k not in env_overrides]

diff --git a/spacy/cli/info.py b/spacy/cli/info.py
@@ -84,7 +84,7 @@ def info(
 
 
 def info_spacy() -> Dict[str, Any]:
-    """Generate info about the current spaCy intallation.
+    """Generate info about the current spaCy installation.
 
     RETURNS (dict): The spaCy info.
     """

diff --git a/spacy/glossary.py b/spacy/glossary.py
@@ -354,7 +354,7 @@ def explain(term):
     # https://github.com/ltgoslo/norne
     "EVT": "Festivals, cultural events, sports events, weather phenomena, wars, etc.",
     "PROD": "Product, i.e. artificially produced entities including speeches, radio shows, programming languages, contracts, laws and ideas",
-    "DRV": "Words (and phrases?) that are dervied from a name, but not a name in themselves, e.g. 'Oslo-mannen' ('the man from Oslo')",
+    "DRV": "Words (and phrases?) that are derived from a name, but not a name in themselves, e.g. 'Oslo-mannen' ('the man from Oslo')",
     "GPE_LOC": "Geo-political entity, with a locative sense, e.g. 'John lives in Spain'",
     "GPE_ORG": "Geo-political entity, with an organisation sense, e.g. 'Spain declined to meet with Belgium'",
 }
diff --git a/spacy/lang/ht/__init__.py b/spacy/lang/ht/__init__.py
@@ -5,11 +5,11 @@
 from ...language import BaseDefaults, Language
 from .lemmatizer import HaitianCreoleLemmatizer
 from .lex_attrs import LEX_ATTRS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .tag_map import TAG_MAP
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 
 
 class HaitianCreoleDefaults(BaseDefaults):
@@ -22,10 +22,12 @@ class HaitianCreoleDefaults(BaseDefaults):
     stop_words = STOP_WORDS
     tag_map = TAG_MAP
 
+
 class HaitianCreole(Language):
     lang = "ht"
     Defaults = HaitianCreoleDefaults
 
+
 @HaitianCreole.factory(
     "lemmatizer",
     assigns=["token.lemma"],
@@ -49,4 +51,5 @@ def make_lemmatizer(
         nlp.vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
     )
 
+
 __all__ = ["HaitianCreole"]
diff --git a/spacy/lang/ht/lemmatizer.py b/spacy/lang/ht/lemmatizer.py
@@ -1,8 +1,8 @@
 from typing import List, Tuple
 
+from ...lookups import Lookups
 from ...pipeline import Lemmatizer
 from ...tokens import Token
-from ...lookups import Lookups
 
 
 class HaitianCreoleLemmatizer(Lemmatizer):

diff --git a/spacy/lang/ht/lex_attrs.py b/spacy/lang/ht/lex_attrs.py
@@ -49,6 +49,7 @@
     "P": "Pa",
 }
 
+
 def like_num(text):
     text = text.strip().lower()
     if text.startswith(("+", "-", "±", "~")):
@@ -69,9 +70,11 @@ def like_num(text):
         return True
     return False
 
+
 def norm_custom(text):
     return NORM_MAP.get(text, text.lower())
 
+
 LEX_ATTRS = {
     LIKE_NUM: like_num,
     NORM: norm_custom,

diff --git a/spacy/lang/ht/punctuation.py b/spacy/lang/ht/punctuation.py
@@ -4,10 +4,10 @@
     ALPHA_UPPER,
     CONCAT_QUOTES,
     HYPHENS,
-    LIST_PUNCT,
-    LIST_QUOTES,
     LIST_ELLIPSES,
     LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
     merge_chars,
 )
 
@@ -16,28 +16,43 @@
 _prefixes_elision = "m n l y t k w"
 _prefixes_elision += " " + _prefixes_elision.upper()
 
-TOKENIZER_PREFIXES = LIST_PUNCT + LIST_QUOTES + [
-    r"(?:({pe})[{el}])(?=[{a}])".format(
-        a=ALPHA, el=ELISION, pe=merge_chars(_prefixes_elision)
-    )
-]
+TOKENIZER_PREFIXES = (
+    LIST_PUNCT
+    + LIST_QUOTES
+    + [
+        r"(?:({pe})[{el}])(?=[{a}])".format(
+            a=ALPHA, el=ELISION, pe=merge_chars(_prefixes_elision)
+        )
+    ]
+)
 
-TOKENIZER_SUFFIXES = LIST_PUNCT + LIST_QUOTES + LIST_ELLIPSES + [
-    r"(?<=[0-9])%",  # numbers like 10%
-    r"(?<=[0-9])(?:{h})".format(h=HYPHENS),  # hyphens after numbers
-    r"(?<=[{a}])['’]".format(a=ALPHA),  # apostrophes after letters
-    r"(?<=[{a}])['’][mwlnytk](?=\s|$)".format(a=ALPHA),  # contractions
-    r"(?<=[{a}0-9])\)",  # right parenthesis after letter/number
-    r"(?<=[{a}])\.(?=\s|$)".format(a=ALPHA),  # period after letter if space or end of string
-    r"(?<=\))[\.\?!]",  # punctuation immediately after right parenthesis
-]
+TOKENIZER_SUFFIXES = (
+    LIST_PUNCT
+    + LIST_QUOTES
+    + LIST_ELLIPSES
+    + [
+        r"(?<=[0-9])%",  # numbers like 10%
+        r"(?<=[0-9])(?:{h})".format(h=HYPHENS),  # hyphens after numbers
+        r"(?<=[{a}])['’]".format(a=ALPHA),  # apostrophes after letters
+        r"(?<=[{a}])['’][mwlnytk](?=\s|$)".format(a=ALPHA),  # contractions
+        r"(?<=[{a}0-9])\)",  # right parenthesis after letter/number
+        r"(?<=[{a}])\.(?=\s|$)".format(
+            a=ALPHA
+        ),  # period after letter if space or end of string
+        r"(?<=\))[\.\?!]",  # punctuation immediately after right parenthesis
+    ]
+)
 
-TOKENIZER_INFIXES = LIST_ELLIPSES + LIST_ICONS + [
-    r"(?<=[0-9])[+\-\*^](?=[0-9-])",
-    r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
-        al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
-    ),
-    r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-    r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
-    r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION),
-]
+TOKENIZER_INFIXES = (
+    LIST_ELLIPSES
+    + LIST_ICONS
+    + [
+        r"(?<=[0-9])[+\-\*^](?=[0-9-])",
+        r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
+            al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
+        ),
+        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
+        r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
+        r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION),
+    ]
+)
diff --git a/spacy/lang/ht/stop_words.py b/spacy/lang/ht/stop_words.py
@@ -39,8 +39,7 @@
 
 men mèsi oswa osinon
 
-"""
-.split()
+""".split()
 )
 
 # Add common contractions, with and without apostrophe variants

diff --git a/spacy/lang/ht/tag_map.py b/spacy/lang/ht/tag_map.py
@@ -1,4 +1,22 @@
-from spacy.symbols import NOUN, VERB, AUX, ADJ, ADV, PRON, DET, ADP, SCONJ, CCONJ, PART, INTJ, NUM, PROPN, PUNCT, SYM, X
+from spacy.symbols import (
+    ADJ,
+    ADP,
+    ADV,
+    AUX,
+    CCONJ,
+    DET,
+    INTJ,
+    NOUN,
+    NUM,
+    PART,
+    PRON,
+    PROPN,
+    PUNCT,
+    SCONJ,
+    SYM,
+    VERB,
+    X,
+)
 
 TAG_MAP = {
     "NOUN": {"pos": NOUN},
-Original file line number
+Diff line change
@@ Expand Up / @@ -194,7 +194,7 @@ model = chain( @@
     )
     ```
-    but the standalone `Tok2VecTransformer` has an additional `split_trf_batch` chained inbetween the model
+    but the standalone `Tok2VecTransformer` has an additional `split_trf_batch` chained in between the model
     and `trfs2arrays`:
     ```
@@ Expand Down @@