beetbox · snejus · May 18, 2025 · May 8, 2025 · May 17, 2025
@@ -37,6 +37,7 @@
 
 import beets
 from beets import logging
+from beets.util.id_extractors import extract_release_id
 
 if sys.version_info >= (3, 10):
     from typing import ParamSpec
@@ -56,7 +57,6 @@
     from beets.importer import ImportSession, ImportTask
     from beets.library import Album, Item, Library
     from beets.ui import Subcommand
-    from beets.util.id_extractors import RegexDict
 
     # TYPE_CHECKING guard is needed for any derived type
     # which uses an import from `beets.library` and `beets.imported`
@@ -778,11 +778,6 @@ def __init__(self):
         super().__init__()
         self.config.add({"source_weight": 0.5})
 
-    @property
-    @abc.abstractmethod
-    def id_regex(self) -> RegexDict:
-        raise NotImplementedError
-
     @property
     @abc.abstractmethod
     def data_source(self) -> str:
@@ -872,24 +867,9 @@ def get_artist(
 
         return artist_string, artist_id
 
-    @staticmethod
-    def _get_id(url_type: str, id_: str, id_regex: RegexDict) -> str | None:
-        """Parse an ID from its URL if necessary.
-
-        :param url_type: Type of URL. Either 'album' or 'track'.
-        :param id_: Album/track ID or URL.
-        :param id_regex: A dictionary containing a regular expression
-            extracting an ID from an URL (if it's not an ID already) in
-            'pattern' and the number of the match group in 'match_group'.
-        :return: Album/track ID.
-        """
-        log.debug("Extracting {} ID from '{}'", url_type, id_)
-        match = re.search(id_regex["pattern"].format(url_type), str(id_))
-        if match:
-            id_ = match.group(id_regex["match_group"])
-            if id_:
-                return id_
-        return None
+    def _get_id(self, id_string: str) -> str | None:
+        """Parse release ID from the given ID string."""
+        return extract_release_id(self.data_source.lower(), id_string)
 
     def candidates(
         self,

@@ -14,63 +14,35 @@
 
 """Helpers around the extraction of album/track ID's from metadata sources."""
 
-import re
-from typing import TypedDict
-
-
-class RegexDict(TypedDict):
-    """A dictionary containing a regex pattern and the number of the
-    match group.
-    """
-
-    pattern: str
-    match_group: int
-
-
-# Spotify IDs consist of 22 alphanumeric characters
-# (zero-left-padded base62 representation of randomly generated UUID4)
-spotify_id_regex: RegexDict = {
-    "pattern": r"(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})",
-    "match_group": 2,
-}
-
-deezer_id_regex: RegexDict = {
-    "pattern": r"(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)",
-    "match_group": 4,
-}
-
-beatport_id_regex: RegexDict = {
-    "pattern": r"(^|beatport\.com/release/.+/)(\d+)$",
-    "match_group": 2,
-}
-
-# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
-# the URL can be used as the identifier. The Bandcamp metadata source plugin
-# works that way - https://github.com/snejus/beetcamp. Bandcamp album
-# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
+from __future__ import annotations
 
+import re
 
-def extract_discogs_id_regex(album_id):
-    """Returns the Discogs_id or None."""
-    # Discogs-IDs are simple integers. In order to avoid confusion with
-    # other metadata plugins, we only look for very specific formats of the
-    # input string:
+PATTERN_BY_SOURCE = {
+    "spotify": re.compile(r"(?:^|open\.spotify\.com/[^/]+/)([0-9A-Za-z]{22})"),
+    "deezer": re.compile(r"(?:^|deezer\.com/)(?:[a-z]*/)?(?:[^/]+/)?(\d+)"),
+    "beatport": re.compile(r"(?:^|beatport\.com/release/.+/)(\d+)$"),
+    "musicbrainz": re.compile(r"(\w{8}(?:-\w{4}){3}-\w{12})"),
     # - plain integer, optionally wrapped in brackets and prefixed by an
     #   'r', as this is how discogs displays the release ID on its webpage.
     # - legacy url format: discogs.com/<name of release>/release/<id>
     # - legacy url short format: discogs.com/release/<id>
     # - current url format: discogs.com/release/<id>-<name of release>
     # See #291, #4080 and #4085 for the discussions leading up to these
     # patterns.
-    # Regex has been tested here https://regex101.com/r/TOu7kw/1
+    "discogs": re.compile(
+        r"(?:^|\[?r|discogs\.com/(?:[^/]+/)?release/)(\d+)\b"
+    ),
+    # There is no such thing as a Bandcamp album or artist ID, the URL can be
+    # used as the identifier. The Bandcamp metadata source plugin works that way
+    # - https://github.com/snejus/beetcamp. Bandcamp album URLs usually look
+    # like: https://nameofartist.bandcamp.com/album/nameofalbum
+    "bandcamp": re.compile(r"(.+)"),
+    "tidal": re.compile(r"([^/]+)$"),
+}
 
-    for pattern in [
-        r"^\[?r?(?P<id>\d+)\]?$",
-        r"discogs\.com/release/(?P<id>\d+)-?",
-        r"discogs\.com/[^/]+/release/(?P<id>\d+)",
-    ]:
-        match = re.search(pattern, album_id)
-        if match:
-            return int(match.group("id"))
 
+def extract_release_id(source: str, id_: str) -> str | None:
+    if m := PATTERN_BY_SOURCE[source].search(str(id_)):
+        return m[1]
     return None
@@ -30,7 +30,6 @@
 import beets.ui
 from beets.autotag.hooks import AlbumInfo, TrackInfo
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
-from beets.util.id_extractors import beatport_id_regex
 
 AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing)
 USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
@@ -282,7 +281,6 @@ def __init__(self, data):
 
 class BeatportPlugin(BeetsPlugin):
     data_source = "Beatport"
-    id_regex = beatport_id_regex
 
     def __init__(self):
         super().__init__()
@@ -394,8 +392,7 @@ def album_for_id(self, release_id):
         """
         self._log.debug("Searching for release {0}", release_id)
 
-        release_id = self._get_id("album", release_id, self.id_regex)
-        if release_id is None:
+        if not (release_id := self._get_id(release_id)):
             self._log.debug("Not a valid Beatport release ID.")
             return None
 

@@ -14,6 +14,8 @@
 
 """Adds Deezer release and track search support to the autotagger"""
 
+from __future__ import annotations
+
 import collections
 import time
 
@@ -25,10 +27,9 @@
 from beets.dbcore import types
 from beets.library import DateType
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin
-from beets.util.id_extractors import deezer_id_regex
 
 
 class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):
    data_source = "Deezer"

    item_types = {
@@ -43,8 +44,6 @@
     album_url = "https://api.deezer.com/album/"
     track_url = "https://api.deezer.com/track/"
 
-    id_regex = deezer_id_regex
-
     def __init__(self):
         super().__init__()
 
@@ -75,21 +74,15 @@
             return None
         return data
 
-    def album_for_id(self, album_id):
-        """Fetch an album by its Deezer ID or URL and return an
-        AlbumInfo object or None if the album is not found.
-
-        :param album_id: Deezer ID or URL for the album.
-        :type album_id: str
-        :return: AlbumInfo object for album.
-        :rtype: beets.autotag.hooks.AlbumInfo or None
-        """
-        deezer_id = self._get_id("album", album_id, self.id_regex)
-        if deezer_id is None:
+    def album_for_id(self, album_id: str) -> AlbumInfo | None:
+        """Fetch an album by its Deezer ID or URL."""
+        if not (deezer_id := self._get_id(album_id)):
             return None
-        album_data = self.fetch_data(self.album_url + deezer_id)
-        if album_data is None:
+
+        album_url = f"{self.album_url}{deezer_id}"
+        if not (album_data := self.fetch_data(album_url)):
             return None
+
         contributors = album_data.get("contributors")
         if contributors is not None:
             artist, artist_id = self.get_artist(contributors)
@@ -132,7 +125,7 @@
             tracks_data.extend(tracks_obj["data"])
 
         tracks = []
-        medium_totals = collections.defaultdict(int)
+        medium_totals: dict[int | None, int] = collections.defaultdict(int)
         for i, track_data in enumerate(tracks_data, start=1):
             track = self._get_track(track_data)
             track.index = i
@@ -150,13 +143,15 @@
             artist_id=artist_id,
             tracks=tracks,
             albumtype=album_data["record_type"],
-            va=len(album_data["contributors"]) == 1
-            and artist.lower() == "various artists",
+            va=(
+                len(album_data["contributors"]) == 1
+                and (artist or "").lower() == "various artists"
+            ),
             year=year,
             month=month,
             day=day,
             label=album_data["label"],
-            mediums=max(medium_totals.keys()),
+            mediums=max(filter(None, medium_totals.keys())),
             data_source=self.data_source,
             data_url=album_data["link"],
             cover_art_url=album_data.get("cover_xl"),
@@ -204,12 +199,11 @@
         :rtype: beets.autotag.hooks.TrackInfo or None
         """
         if track_data is None:
-            deezer_id = self._get_id("track", track_id, self.id_regex)
-            if deezer_id is None:
-                return None
-            track_data = self.fetch_data(self.track_url + deezer_id)
-            if track_data is None:
+            if not (deezer_id := self._get_id(track_id)) or not (
+                track_data := self.fetch_data(f"{self.track_url}{deezer_id}")
+            ):
                 return None
+
         track = self._get_track(track_data)
 
         # Get album's tracks to set `track.index` (position on the entire

@@ -38,7 +38,7 @@
 from beets import config
 from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
 from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
-from beets.util.id_extractors import extract_discogs_id_regex
+from beets.util.id_extractors import extract_release_id
 
 USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
 API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
@@ -266,7 +266,7 @@ def album_for_id(self, album_id):
         """
         self._log.debug("Searching for release {0}", album_id)
 
-        discogs_id = extract_discogs_id_regex(album_id)
+        discogs_id = extract_release_id("discogs", album_id)
 
         if not discogs_id:
             return None
@@ -401,7 +401,7 @@ def get_album_info(self, result):
         else:
             genre = base_genre
 
-        discogs_albumid = extract_discogs_id_regex(result.data.get("uri"))
+        discogs_albumid = extract_release_id("discogs", result.data.get("uri"))
 
         # Extract information for the optional AlbumInfo fields that are
         # contained on nested discogs fields.

@@ -16,7 +16,6 @@
 
 from __future__ import annotations
 
-import re
 import traceback
 from collections import Counter
 from itertools import product
@@ -28,13 +27,8 @@
 import beets
 import beets.autotag.hooks
 from beets import config, plugins, util
-from beets.plugins import BeetsPlugin, MetadataSourcePlugin
-from beets.util.id_extractors import (
-    beatport_id_regex,
-    deezer_id_regex,
-    extract_discogs_id_regex,
-    spotify_id_regex,
-)
+from beets.plugins import BeetsPlugin
+from beets.util.id_extractors import extract_release_id
 
 if TYPE_CHECKING:
     from collections.abc import Iterator, Sequence
@@ -302,17 +296,6 @@ def _set_date_str(
                 setattr(info, key, date_num)
 
 
-def _parse_id(s: str) -> str | None:
-    """Search for a MusicBrainz ID in the given string and return it. If
-    no ID can be found, return None.
-    """
-    # Find the first thing that looks like a UUID/MBID.
-    match = re.search("[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}", s)
-    if match is not None:
-        return match.group() if match else None
-    return None
-
-
 def _is_translation(r):
     _trans_key = "transl-tracklisting"
     return r["type"] == _trans_key and r["direction"] == "backward"
@@ -753,24 +736,10 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo:
                         source.capitalize(),
                     )
 
-            if "discogs" in urls:
-                info.discogs_albumid = extract_discogs_id_regex(urls["discogs"])
-            if "bandcamp" in urls:
-                info.bandcamp_album_id = urls["bandcamp"]
-            if "spotify" in urls:
-                info.spotify_album_id = MetadataSourcePlugin._get_id(
-                    "album", urls["spotify"], spotify_id_regex
+            for source, url in urls.items():
+                setattr(
+                    info, f"{source}_album_id", extract_release_id(source, url)
                 )
-            if "deezer" in urls:
-                info.deezer_album_id = MetadataSourcePlugin._get_id(
-                    "album", urls["deezer"], deezer_id_regex
-                )
-            if "beatport" in urls:
-                info.beatport_album_id = MetadataSourcePlugin._get_id(
-                    "album", urls["beatport"], beatport_id_regex
-                )
-            if "tidal" in urls:
-                info.tidal_album_id = urls["tidal"].split("/")[-1]
 
         extra_albumdatas = plugins.send("mb_album_extract", data=release)
         for extra_albumdata in extra_albumdatas:
@@ -869,10 +838,10 @@ def album_for_id(
         MusicBrainzAPIError.
         """
         self._log.debug("Requesting MusicBrainz release {}", album_id)
-        albumid = _parse_id(album_id)
-        if not albumid:
+        if not (albumid := extract_release_id("musicbrainz", album_id)):
             self._log.debug("Invalid MBID ({0}).", album_id)
             return None
+
         try:
             res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES)
 
@@ -906,10 +875,10 @@ def track_for_id(
         """Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
         or None if no track is found. May raise a MusicBrainzAPIError.
         """
-        trackid = _parse_id(track_id)
-        if not trackid:
+        if not (trackid := extract_release_id("musicbrainz", track_id)):
             self._log.debug("Invalid MBID ({0}).", track_id)
             return None
+
         try:
             res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
         except musicbrainzngs.ResponseError: