Skip to content

Commit

Permalink
Parse soundcloud album
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed May 16, 2024
1 parent 10e051d commit 04218cf
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 66 deletions.
41 changes: 27 additions & 14 deletions beetsplug/bandcamp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
import json
import logging
import re
from contextlib import suppress
from functools import lru_cache, partial
from itertools import chain
from operator import itemgetter, truth
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Sequence, Union

import requests
Expand Down Expand Up @@ -198,6 +200,28 @@ def candidates(self, items, artist, album, va_likely, extra_tags=None):
"""Return a sequence of AlbumInfo objects that match the
album whose items are provided or are being searched.
"""
from pprint import pprint

url = items[0].comments
parent_dir = Path(items[0].path.decode()).parent
with suppress(StopIteration):
playlist_info_path = next(parent_dir.glob("Playlist_*"))
with open(playlist_info_path) as f:
playlist_info = json.load(f)

playlist_info["tracks"] = []
for track_info_path in set(parent_dir.glob("*.info.json")) - {
playlist_info_path
}:
with open(track_info_path) as f:
track_data = {**json.load(f), "path": str(track_info_path)}
playlist_info["tracks"].append(track_data)

pprint(playlist_info)

# if url.startswith("https://"):
# yield from self.get_album_info(url)

label = ""
if items and album == items[0].album and artist == items[0].albumartist:
label = items[0].label
Expand Down Expand Up @@ -260,16 +284,6 @@ def track_for_id(self, track_id: str) -> Optional[TrackInfo]:
self._info("Not a bandcamp URL, skipping")
return None

def handle(self, guru: Metaguru, attr: str, _id: str) -> Any:
try:
return getattr(guru, attr)
except (KeyError, ValueError, AttributeError, IndexError):
self._info("Failed obtaining {}", _id)
except Exception: # pylint: disable=broad-except
url = "https://github.com/snejus/beetcamp/issues/new"
self._exc("Unexpected error obtaining {}, please report at {}", _id, url)
return None

def get_album_info(self, url: str) -> Optional[List[AlbumInfo]]:
"""Return an AlbumInfo object for a bandcamp album page.
If track url is given by mistake, find and fetch the album url instead.
Expand All @@ -296,8 +310,8 @@ def _get_soundcloud_data(self, url: str) -> AlbumInfo | TrackInfo | None:
sc_data_key = "sound"
method = get_soundcloud_track

self._info("Fetching data from soundcloud url {} as {}", url, _type)
data = re.search(r"\[\{[^<]+[^;<)]", self._get(url))
self._info("Fetching data from soundcloud url {}", url)
data = re.search(r"\[.*hydratable.*\]", self._get(url))
if not data:
return None

Expand All @@ -311,8 +325,7 @@ def get_track_info(self, url: str) -> Optional[TrackInfo]:
if track:
return track

guru = self.guru(url, "singleton")
return self.handle(guru, "singleton", url) if guru else None
return self.guru(url, "singleton")

def _search(self, data: JSONDict) -> Iterable[JSONDict]:
"""Return a list of track/album URLs of type search_type matching the query."""
Expand Down
47 changes: 43 additions & 4 deletions beetsplug/bandcamp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Dict, Iterable, List, NamedTuple, Pattern

from beets.autotag.hooks import AlbumInfo
from beets.ui import log
from ordered_set import OrderedSet as ordset

from .genres_lookup import GENRES
Expand Down Expand Up @@ -262,6 +263,10 @@ def valid_for_mode(kw: str) -> bool:
return valid_mb_genre(kw) or valid_mb_genre(list(words)[-1])

unique_genres: ordset[str] = ordset()
keywords = set(keywords)
for kw in list(keywords):
keywords.add(kw.replace(" ", "-"))
keywords.add(kw.replace("-", " "))
# expand badly delimited keywords
split_kw = partial(re.split, r"[.] | #| - ")
for kw in it.chain.from_iterable(map(split_kw, keywords)):
Expand All @@ -280,13 +285,17 @@ def duplicate(genre: str) -> bool:
others = others.union(x.replace(" ", "").replace("-", "") for x in others) # type: ignore[attr-defined] # noqa
return any(genre in x for x in others)

return it.filterfalse(duplicate, unique_genres)
return list(it.filterfalse(duplicate, unique_genres))

@staticmethod
def unpack_props(obj: JSONDict) -> JSONDict:
def unpack_props(obj: Any) -> Any:
"""Add all 'additionalProperty'-ies to the parent dictionary."""
for prop in obj.get("additionalProperty") or []:
obj[prop["name"]] = prop["value"]
if isinstance(obj, dict):
for prop in obj.pop("additionalProperty", []):
obj[prop["name"]] = prop["value"]
return {k: Helpers.unpack_props(v) for k, v in obj.items()}
if isinstance(obj, list):
return [Helpers.unpack_props(item) for item in obj]
return obj

@staticmethod
Expand Down Expand Up @@ -363,3 +372,33 @@ def get_medium_total(medium: int) -> int:
else:
medium_index += 1
return album

@staticmethod
def parse_additional_fields(meta: str, field_patterns: JSONDict) -> JSONDict:
additional_fields = {}
for field, pattern_item in field_patterns.items():
# log.debug("Parsing [b]{}[/]", field)
try:
pat = pattern_item["pattern"]
if len(pat.splitlines()) > 1:
matches = list(re.finditer(pat, meta, re.VERBOSE))
else:
matches = list(re.finditer(pat, meta))
# log.debug("\n".join(map(str, matches)))
if matches:
if "replace" in pattern_item:
log.info(str(matches[0].expand(pattern_item["replace"])))
value = matches[0].expand(pattern_item["replace"])
elif "replace_expr" in pattern_item:
value = eval(
pattern_item["replace_expr"],
{"matches": matches, "match": matches[0]},
)
else:
value = matches[0].group()
if isinstance(value, str):
value = value.replace("\r", "").strip()
additional_fields[field] = value
except Exception:
log.error("Failed parsing {}", field, exc_info=True)
return additional_fields
40 changes: 32 additions & 8 deletions beetsplug/bandcamp/metaguru.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Module for parsing bandcamp metadata."""

import itertools as it
import json
import operator as op
import re
import sys
from collections import Counter
from datetime import date, datetime
from functools import partial
from functools import partial, singledispatch
from typing import Any, Dict, Iterable, List, Optional, Set
from unicodedata import normalize

Expand All @@ -15,9 +16,9 @@
from beets.autotag.hooks import AlbumInfo, TrackInfo
from pycountry import countries, subdivisions

from .album import AlbumName
from .helpers import PATTERNS, Helpers, MediaInfo
from .tracks import Track, Tracks
from .album import AlbumName

if sys.version_info.minor > 7:
from functools import cached_property # pylint: disable=ungrouped-imports
Expand Down Expand Up @@ -73,7 +74,7 @@ def from_html(cls, html: str, config: Optional[JSONDict] = None) -> "Metaguru":
except AttributeError as exc:
raise AttributeError("Could not find release metadata JSON") from exc
else:
return cls(json.loads(meta), config)
return cls(cls.unpack_props(json.loads(meta)), config)

@cached_property
def excluded_fields(self) -> Set[str]:
Expand Down Expand Up @@ -322,11 +323,11 @@ def is_comp(self) -> bool:
def first_one(artist: str) -> str:
return PATTERNS["split_artists"].split(artist.replace(" & ", ", "))[0]

truly_unique = set(map(first_one, self.tracks.artists))
return (
artist_count = len(set(map(first_one, self.tracks.artists)))
return artist_count > 1 and (
self._album_name.mentions_compilation
or self._search_albumtype("compilation")
or (len(truly_unique) > 3 and len(self.tracks) > 4)
or (artist_count > 3 and len(self.tracks) > 4)
)

@cached_property
Expand All @@ -348,7 +349,7 @@ def albumtype(self) -> str:
return "album"

@cached_property
def albumtypes(self) -> str:
def albumtypes(self) -> list[str]:
albumtypes = {self.albumtype}
if self.is_comp:
if self.albumtype == "ep":
Expand All @@ -365,7 +366,7 @@ def albumtypes(self) -> str:
if len(self.tracks.remixers) == len(self.tracks):
albumtypes.add("remix")

return "; ".join(sorted(albumtypes))
return sorted(albumtypes)

@cached_property
def va(self) -> bool:
Expand Down Expand Up @@ -416,16 +417,39 @@ def get_fields(self, fields: Iterable[str], src: object = None) -> JSONDict:
return {field: getattr(self, field)}
return dict(zip(fields, iter(op.attrgetter(*fields)(src or self))))

@cached_property
def parseable_meta(self) -> str:
@singledispatch
def to_text(x: Any, key: str = "") -> str:
return f"{key}: {x}".replace("\r", "") + "\r\n"

@to_text.register(dict)
def _(x: JSONDict, key: str = "") -> str:
return "".join([to_text(v, f"{key}.{k}") for k, v in x.items()])

@to_text.register(list)
def _(x: List[Any], key: str = "") -> str:
return "".join([to_text(v, f"{key}[{i}]") for i, v in enumerate(x)])

return to_text(self.meta)

@property
def _common_album(self) -> JSONDict:
common_data: JSONDict = {"album": self.album_name}
fields = ["label", "catalognum", "albumtype", "country"]
if NEW_BEETS:
fields.extend(["genre", "style", "comments", "albumtypes"])

common_data.update(self.get_fields(fields))
reldate = self.release_date
if reldate:
common_data.update(self.get_fields(["year", "month", "day"], reldate))
if "field_patterns" in self.config:
common_data.update(
self.parse_additional_fields(
self.parseable_meta, self.config["field_patterns"]
)
)

return common_data

Expand Down
Loading

0 comments on commit 04218cf

Please sign in to comment.