Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 4 additions & 24 deletions beets/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

import beets
from beets import logging
from beets.util.id_extractors import extract_release_id

if sys.version_info >= (3, 10):
from typing import ParamSpec
Expand All @@ -56,7 +57,6 @@
from beets.importer import ImportSession, ImportTask
from beets.library import Album, Item, Library
from beets.ui import Subcommand
from beets.util.id_extractors import RegexDict

# TYPE_CHECKING guard is needed for any derived type
# which uses an import from `beets.library` and `beets.imported`
Expand Down Expand Up @@ -778,11 +778,6 @@ def __init__(self):
super().__init__()
self.config.add({"source_weight": 0.5})

@property
@abc.abstractmethod
def id_regex(self) -> RegexDict:
raise NotImplementedError

@property
@abc.abstractmethod
def data_source(self) -> str:
Expand Down Expand Up @@ -872,24 +867,9 @@ def get_artist(

return artist_string, artist_id

@staticmethod
def _get_id(url_type: str, id_: str, id_regex: RegexDict) -> str | None:
"""Parse an ID from its URL if necessary.

:param url_type: Type of URL. Either 'album' or 'track'.
:param id_: Album/track ID or URL.
:param id_regex: A dictionary containing a regular expression
extracting an ID from an URL (if it's not an ID already) in
'pattern' and the number of the match group in 'match_group'.
:return: Album/track ID.
"""
log.debug("Extracting {} ID from '{}'", url_type, id_)
match = re.search(id_regex["pattern"].format(url_type), str(id_))
if match:
id_ = match.group(id_regex["match_group"])
if id_:
return id_
return None
def _get_id(self, id_string: str) -> str | None:
"""Parse release ID from the given ID string."""
return extract_release_id(self.data_source.lower(), id_string)

def candidates(
self,
Expand Down
68 changes: 20 additions & 48 deletions beets/util/id_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,63 +14,35 @@

"""Helpers around the extraction of album/track ID's from metadata sources."""

import re
from typing import TypedDict


class RegexDict(TypedDict):
"""A dictionary containing a regex pattern and the number of the
match group.
"""

pattern: str
match_group: int


# Spotify IDs consist of 22 alphanumeric characters
# (zero-left-padded base62 representation of randomly generated UUID4)
spotify_id_regex: RegexDict = {
"pattern": r"(^|open\.spotify\.com/{}/)([0-9A-Za-z]{{22}})",
"match_group": 2,
}

deezer_id_regex: RegexDict = {
"pattern": r"(^|deezer\.com/)([a-z]*/)?({}/)?(\d+)",
"match_group": 4,
}

beatport_id_regex: RegexDict = {
"pattern": r"(^|beatport\.com/release/.+/)(\d+)$",
"match_group": 2,
}

# A note on Bandcamp: There is no such thing as a Bandcamp album or artist ID,
# the URL can be used as the identifier. The Bandcamp metadata source plugin
# works that way - https://github.com/snejus/beetcamp. Bandcamp album
# URLs usually look like: https://nameofartist.bandcamp.com/album/nameofalbum
from __future__ import annotations

import re

def extract_discogs_id_regex(album_id):
"""Returns the Discogs_id or None."""
# Discogs-IDs are simple integers. In order to avoid confusion with
# other metadata plugins, we only look for very specific formats of the
# input string:
PATTERN_BY_SOURCE = {
"spotify": re.compile(r"(?:^|open\.spotify\.com/[^/]+/)([0-9A-Za-z]{22})"),
"deezer": re.compile(r"(?:^|deezer\.com/)(?:[a-z]*/)?(?:[^/]+/)?(\d+)"),
"beatport": re.compile(r"(?:^|beatport\.com/release/.+/)(\d+)$"),
"musicbrainz": re.compile(r"(\w{8}(?:-\w{4}){3}-\w{12})"),
# - plain integer, optionally wrapped in brackets and prefixed by an
# 'r', as this is how discogs displays the release ID on its webpage.
# - legacy url format: discogs.com/<name of release>/release/<id>
# - legacy url short format: discogs.com/release/<id>
# - current url format: discogs.com/release/<id>-<name of release>
# See #291, #4080 and #4085 for the discussions leading up to these
# patterns.
# Regex has been tested here https://regex101.com/r/TOu7kw/1
"discogs": re.compile(
r"(?:^|\[?r|discogs\.com/(?:[^/]+/)?release/)(\d+)\b"
),
# There is no such thing as a Bandcamp album or artist ID, the URL can be
# used as the identifier. The Bandcamp metadata source plugin works that way
# - https://github.com/snejus/beetcamp. Bandcamp album URLs usually look
# like: https://nameofartist.bandcamp.com/album/nameofalbum
"bandcamp": re.compile(r"(.+)"),
"tidal": re.compile(r"([^/]+)$"),
}

for pattern in [
r"^\[?r?(?P<id>\d+)\]?$",
r"discogs\.com/release/(?P<id>\d+)-?",
r"discogs\.com/[^/]+/release/(?P<id>\d+)",
]:
match = re.search(pattern, album_id)
if match:
return int(match.group("id"))

def extract_release_id(source: str, id_: str) -> str | None:
if m := PATTERN_BY_SOURCE[source].search(str(id_)):
return m[1]
return None
5 changes: 1 addition & 4 deletions beetsplug/beatport.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import beets.ui
from beets.autotag.hooks import AlbumInfo, TrackInfo
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
from beets.util.id_extractors import beatport_id_regex

AUTH_ERRORS = (TokenRequestDenied, TokenMissing, VerifierMissing)
USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
Expand Down Expand Up @@ -282,7 +281,6 @@ def __init__(self, data):

class BeatportPlugin(BeetsPlugin):
data_source = "Beatport"
id_regex = beatport_id_regex

def __init__(self):
super().__init__()
Expand Down Expand Up @@ -394,8 +392,7 @@ def album_for_id(self, release_id):
"""
self._log.debug("Searching for release {0}", release_id)

release_id = self._get_id("album", release_id, self.id_regex)
if release_id is None:
if not (release_id := self._get_id(release_id)):
self._log.debug("Not a valid Beatport release ID.")
return None

Expand Down
44 changes: 19 additions & 25 deletions beetsplug/deezer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

"""Adds Deezer release and track search support to the autotagger"""

from __future__ import annotations

import collections
import time

Expand All @@ -25,10 +27,9 @@
from beets.dbcore import types
from beets.library import DateType
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
from beets.util.id_extractors import deezer_id_regex


class DeezerPlugin(MetadataSourcePlugin, BeetsPlugin):

Check failure on line 32 in beetsplug/deezer.py

View workflow job for this annotation

GitHub Actions / Check types with mypy

Missing type parameters for generic type "MetadataSourcePlugin"
data_source = "Deezer"

item_types = {
Expand All @@ -43,8 +44,6 @@
album_url = "https://api.deezer.com/album/"
track_url = "https://api.deezer.com/track/"

id_regex = deezer_id_regex

def __init__(self):
super().__init__()

Expand Down Expand Up @@ -75,21 +74,15 @@
return None
return data

def album_for_id(self, album_id):
"""Fetch an album by its Deezer ID or URL and return an
AlbumInfo object or None if the album is not found.

:param album_id: Deezer ID or URL for the album.
:type album_id: str
:return: AlbumInfo object for album.
:rtype: beets.autotag.hooks.AlbumInfo or None
"""
deezer_id = self._get_id("album", album_id, self.id_regex)
if deezer_id is None:
def album_for_id(self, album_id: str) -> AlbumInfo | None:
"""Fetch an album by its Deezer ID or URL."""
if not (deezer_id := self._get_id(album_id)):
return None
album_data = self.fetch_data(self.album_url + deezer_id)
if album_data is None:

album_url = f"{self.album_url}{deezer_id}"
if not (album_data := self.fetch_data(album_url)):
return None

contributors = album_data.get("contributors")
if contributors is not None:
artist, artist_id = self.get_artist(contributors)
Expand Down Expand Up @@ -132,7 +125,7 @@
tracks_data.extend(tracks_obj["data"])

tracks = []
medium_totals = collections.defaultdict(int)
medium_totals: dict[int | None, int] = collections.defaultdict(int)
for i, track_data in enumerate(tracks_data, start=1):
track = self._get_track(track_data)
track.index = i
Expand All @@ -150,13 +143,15 @@
artist_id=artist_id,
tracks=tracks,
albumtype=album_data["record_type"],
va=len(album_data["contributors"]) == 1
and artist.lower() == "various artists",
va=(
len(album_data["contributors"]) == 1
and (artist or "").lower() == "various artists"
),
year=year,
month=month,
day=day,
label=album_data["label"],
mediums=max(medium_totals.keys()),
mediums=max(filter(None, medium_totals.keys())),
data_source=self.data_source,
data_url=album_data["link"],
cover_art_url=album_data.get("cover_xl"),
Expand Down Expand Up @@ -204,12 +199,11 @@
:rtype: beets.autotag.hooks.TrackInfo or None
"""
if track_data is None:
deezer_id = self._get_id("track", track_id, self.id_regex)
if deezer_id is None:
return None
track_data = self.fetch_data(self.track_url + deezer_id)
if track_data is None:
if not (deezer_id := self._get_id(track_id)) or not (
track_data := self.fetch_data(f"{self.track_url}{deezer_id}")
):
return None

track = self._get_track(track_data)

# Get album's tracks to set `track.index` (position on the entire
Expand Down
6 changes: 3 additions & 3 deletions beetsplug/discogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from beets import config
from beets.autotag.hooks import AlbumInfo, TrackInfo, string_dist
from beets.plugins import BeetsPlugin, MetadataSourcePlugin, get_distance
from beets.util.id_extractors import extract_discogs_id_regex
from beets.util.id_extractors import extract_release_id

USER_AGENT = f"beets/{beets.__version__} +https://beets.io/"
API_KEY = "rAzVUQYRaoFjeBjyWuWZ"
Expand Down Expand Up @@ -266,7 +266,7 @@ def album_for_id(self, album_id):
"""
self._log.debug("Searching for release {0}", album_id)

discogs_id = extract_discogs_id_regex(album_id)
discogs_id = extract_release_id("discogs", album_id)

if not discogs_id:
return None
Expand Down Expand Up @@ -401,7 +401,7 @@ def get_album_info(self, result):
else:
genre = base_genre

discogs_albumid = extract_discogs_id_regex(result.data.get("uri"))
discogs_albumid = extract_release_id("discogs", result.data.get("uri"))

# Extract information for the optional AlbumInfo fields that are
# contained on nested discogs fields.
Expand Down
49 changes: 9 additions & 40 deletions beetsplug/musicbrainz.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from __future__ import annotations

import re
import traceback
from collections import Counter
from itertools import product
Expand All @@ -28,13 +27,8 @@
import beets
import beets.autotag.hooks
from beets import config, plugins, util
from beets.plugins import BeetsPlugin, MetadataSourcePlugin
from beets.util.id_extractors import (
beatport_id_regex,
deezer_id_regex,
extract_discogs_id_regex,
spotify_id_regex,
)
from beets.plugins import BeetsPlugin
from beets.util.id_extractors import extract_release_id

if TYPE_CHECKING:
from collections.abc import Iterator, Sequence
Expand Down Expand Up @@ -302,17 +296,6 @@ def _set_date_str(
setattr(info, key, date_num)


def _parse_id(s: str) -> str | None:
"""Search for a MusicBrainz ID in the given string and return it. If
no ID can be found, return None.
"""
# Find the first thing that looks like a UUID/MBID.
match = re.search("[a-f0-9]{8}(-[a-f0-9]{4}){3}-[a-f0-9]{12}", s)
if match is not None:
return match.group() if match else None
return None


def _is_translation(r):
_trans_key = "transl-tracklisting"
return r["type"] == _trans_key and r["direction"] == "backward"
Expand Down Expand Up @@ -753,24 +736,10 @@ def album_info(self, release: JSONDict) -> beets.autotag.hooks.AlbumInfo:
source.capitalize(),
)

if "discogs" in urls:
info.discogs_albumid = extract_discogs_id_regex(urls["discogs"])
if "bandcamp" in urls:
info.bandcamp_album_id = urls["bandcamp"]
if "spotify" in urls:
info.spotify_album_id = MetadataSourcePlugin._get_id(
"album", urls["spotify"], spotify_id_regex
for source, url in urls.items():
setattr(
info, f"{source}_album_id", extract_release_id(source, url)
)
if "deezer" in urls:
info.deezer_album_id = MetadataSourcePlugin._get_id(
"album", urls["deezer"], deezer_id_regex
)
if "beatport" in urls:
info.beatport_album_id = MetadataSourcePlugin._get_id(
"album", urls["beatport"], beatport_id_regex
)
if "tidal" in urls:
info.tidal_album_id = urls["tidal"].split("/")[-1]

extra_albumdatas = plugins.send("mb_album_extract", data=release)
for extra_albumdata in extra_albumdatas:
Expand Down Expand Up @@ -869,10 +838,10 @@ def album_for_id(
MusicBrainzAPIError.
"""
self._log.debug("Requesting MusicBrainz release {}", album_id)
albumid = _parse_id(album_id)
if not albumid:
if not (albumid := extract_release_id("musicbrainz", album_id)):
self._log.debug("Invalid MBID ({0}).", album_id)
return None

try:
res = musicbrainzngs.get_release_by_id(albumid, RELEASE_INCLUDES)

Expand Down Expand Up @@ -906,10 +875,10 @@ def track_for_id(
"""Fetches a track by its MusicBrainz ID. Returns a TrackInfo object
or None if no track is found. May raise a MusicBrainzAPIError.
"""
trackid = _parse_id(track_id)
if not trackid:
if not (trackid := extract_release_id("musicbrainz", track_id)):
self._log.debug("Invalid MBID ({0}).", track_id)
return None

try:
res = musicbrainzngs.get_recording_by_id(trackid, TRACK_INCLUDES)
except musicbrainzngs.ResponseError:
Expand Down
Loading
Loading