Skip to content

Commit

Permalink
Split release track names parsing into TrackNames class
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Mar 18, 2024
1 parent 5f04de1 commit 92451c1
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 229 deletions.
10 changes: 5 additions & 5 deletions beetsplug/bandcamp/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
"""Module with a Helpers class that contains various static, independent functions."""

from __future__ import annotations

import itertools as it
import operator as op
import re
Expand Down Expand Up @@ -108,7 +106,9 @@ class MediaInfo(NamedTuple):
r"CD ?\d+",
]

_remix_pat = r"(?P<remix>((?P<remixer>[^])]+) )?\b((re)?mix|edit|bootleg)\b[^])]*)"
REMIX = re.compile(
r"(?P<remix>((?P<remixer>[^])]+) )?\b((re)?mix|edit|bootleg)\b[^])]*)", re.I
)
CAMELCASE = re.compile(r"(?<=[a-z])(?=[A-Z])")


Expand All @@ -133,8 +133,8 @@ def split_artist_title(m: re.Match[str]) -> str:
(re.compile(r"(- )?\( *"), "("), # hi - ( bye) -> hi (bye)
(re.compile(r" \)+|(\)+$)"), ")"), # hi (bye )) -> hi (bye)
(re.compile(r"- Reworked"), "(Reworked)"), # bye - Reworked -> bye (Reworked) # noqa
(re.compile(rf"(\({_remix_pat})$", re.I), r"\1)"), # bye - (Some Mix -> bye - (Some Mix) # noqa
(re.compile(rf"- *({_remix_pat})$", re.I), r"(\1)"), # bye - Some Mix -> bye (Some Mix) # noqa
(re.compile(rf"(\({REMIX.pattern})$", re.I), r"\1)"), # bye - (Some Mix -> bye - (Some Mix) # noqa
(re.compile(rf"- *({REMIX.pattern})$", re.I), r"(\1)"), # bye - Some Mix -> bye (Some Mix) # noqa
(re.compile(r'(^|- )[“"]([^”"]+)[”"]( \(|$)'), r"\1\2\3"), # "bye" -> bye; hi - "bye" -> hi - bye # noqa
(re.compile(r"\((the )?(remixes)\)", re.I), r"\2"), # Album (Remixes) -> Album Remixes # noqa
(re.compile(r"examine-.+CD\d+_([^_-]+)[_-](.*)"), split_artist_title), # See https://examine-archive.bandcamp.com/album/va-examine-archive-international-sampler-xmn01 # noqa
Expand Down
5 changes: 3 additions & 2 deletions beetsplug/bandcamp/metaguru.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

from .album import AlbumName
from .helpers import PATTERNS, Helpers, MediaInfo
from .tracks import Track, Tracks
from .track import Track
from .tracks import Tracks

NEW_BEETS = int(beets_version.split(".")[1]) > 4

Expand Down Expand Up @@ -184,7 +185,7 @@ def mediums(self) -> int:
@cached_property
def general_catalognum(self) -> str:
"""Find catalog number in the media-agnostic release metadata and cache it."""
return self._tracks.single_catalognum or self.parse_catalognum(
return self._tracks.catalognum or self.parse_catalognum(
album=self.meta["name"],
description=self.comments,
label=self.label if not self._singleton else "",
Expand Down
93 changes: 46 additions & 47 deletions beetsplug/bandcamp/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from functools import cached_property
from typing import List, Optional, Tuple

from .helpers import CATNUM_PAT, PATTERNS, Helpers, JSONDict, _remix_pat
from .helpers import CATNUM_PAT, PATTERNS, REMIX, Helpers, JSONDict

digiwords = r"""
# must contain at least one of
Expand Down Expand Up @@ -33,7 +33,7 @@

@dataclass
class Remix:
PATTERN = re.compile(rf" *[\[(] *{_remix_pat}[])]", re.I)
PATTERN = re.compile(rf" *[\[(] *{REMIX.pattern}[])]", re.I)

delimited: str
remixer: str
Expand Down Expand Up @@ -66,22 +66,6 @@ class Track:
digi_only: bool = False
track_alt: Optional[str] = None

@classmethod
def from_json(cls, json: JSONDict, label: str) -> "Track":
try:
artist = json["inAlbum"]["byArtist"]["name"]
except KeyError:
artist = ""
artist = artist or json.get("byArtist", {}).get("name", "")
data = {
"json_item": json,
"json_artist": artist,
"track_id": json["@id"],
"index": json.get("position"),
"catalognum": json.get("catalognum"),
}
return cls(**cls.parse_name(data, json["name"], label))

@staticmethod
def clean_digi_name(name: str) -> Tuple[str, bool]:
"""Clean the track title from digi-only artifacts.
Expand Down Expand Up @@ -111,43 +95,59 @@ def find_featuring(data: JSONDict) -> JSONDict:
break
return data

@staticmethod
def parse_name(data: JSONDict, name: str, label: str) -> JSONDict:
# remove label from the end of the track name
# see https://gutterfunkuk.bandcamp.com/album/gutterfunk-all-subject-to-vibes-various-artists-lp # noqa
if name.endswith(label):
name = name.replace(label, "").strip(" -")

json_artist, artist_digi_only = Track.clean_digi_name(data["json_artist"])
name, name_digi_only = Track.clean_digi_name(name)
data["digi_only"] = name_digi_only or artist_digi_only

data["json_artist"] = Helpers.clean_name(json_artist) if json_artist else ""
@classmethod
def parse_name(cls, name: str, artist: str, index: Optional[int]) -> JSONDict:
result: JSONDict = {}
artist, artist_digi_only = cls.clean_digi_name(artist)
name, name_digi_only = cls.clean_digi_name(name)
result["digi_only"] = name_digi_only or artist_digi_only

if artist:
artist = Helpers.clean_name(artist)
name = Helpers.clean_name(name).strip().lstrip("-")

# find the track_alt and remove it from the name
m = PATTERNS["track_alt"].search(name)
if m:
data["track_alt"] = m.group(1).replace(".", "").upper()
result["track_alt"] = m.group(1).replace(".", "").upper()
name = name.replace(m.group(), "")

if not data.get("catalognum"):
# check whether track name contains the catalog number within parens
# or square brackets
# see https://objection999x.bandcamp.com/album/eruption-va-obj012
m = CATNUM_PAT["delimited"].search(name)
if m:
data["catalognum"] = m.group(1)
name = name.replace(m.group(), "").strip()
name = re.sub(rf"^0*{data.get('index', 0)}(?!\W\d)\W+", "", name)
# check whether track name contains the catalog number within parens
# or square brackets
# see https://objection999x.bandcamp.com/album/eruption-va-obj012
m = CATNUM_PAT["delimited"].search(name)
if m:
result["catalognum"] = m.group(1)
name = name.replace(m.group(), "").strip()

# Remove leading index
if index:
name = re.sub(rf"^0*{index}(?!\W\d)\W+", "", name)

# find the remixer and remove it from the name
remix = Remix.from_name(name)
if remix:
data.update(remix=remix)
result["remix"] = remix
name = name.replace(remix.delimited, "").rstrip()

data["name"] = name
data = Track.find_featuring(data)
return data
result["name"] = name
return Track.find_featuring({**result, "json_artist": artist})

@classmethod
def make(cls, json: JSONDict, name: str) -> "Track":
try:
artist = json["inAlbum"]["byArtist"]["name"]
except KeyError:
artist = json.get("byArtist", {}).get("name", "")

index = json.get("position")
data = {
"json_item": json,
"track_id": json["@id"],
"index": index,
**cls.parse_name(name, artist, index),
}
return cls(**data)

@cached_property
def duration(self) -> Optional[int]:
Expand Down Expand Up @@ -177,6 +177,7 @@ def full_name(self) -> str:
@cached_property
def title_without_remix(self) -> str:
"""Split the track name, deduce the title and return it.
The extra complexity here is to ensure that it does not cut off a title
that ends with ' - -', like in '(DJ) NICK JERSEY - 202memo - - -'.
"""
Expand All @@ -199,9 +200,7 @@ def title(self) -> str:

@cached_property
def artist(self) -> str:
"""Take the name, remove the title, ensure it does not duplicate any remixers
and return the resulting artist.
"""
"""Return name without the title and the remixer."""
title_start_idx = self.full_name.rfind(self.title_without_remix)
artist = Remix.PATTERN.sub("", self.full_name[:title_start_idx].strip(", -"))
if self.remix:
Expand Down
141 changes: 141 additions & 0 deletions beetsplug/bandcamp/track_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Module for parsing track names."""

import operator as op
import re
from collections import Counter
from contextlib import suppress
from dataclasses import dataclass
from functools import reduce
from typing import Iterator, List, Optional, Tuple

from ordered_set import OrderedSet

from .helpers import CATNUM_PAT, REMIX


@dataclass
class TrackNames:
"""Responsible for parsing track names in the entire release context."""

# Title [Some Album EP]
ALBUM_IN_TITLE = re.compile(r"[- ]*\[([^\]]+ [EL]P)\]+", re.I)
DELIMITER_PAT = re.compile(r" ([^\w&()+/[\] ]) ")
TITLE_IN_QUOTES = re.compile(r'^(.+[^ -])[ -]+"([^"]+)"$')

original: List[str]
names: List[str]
album: Optional[str] = None
catalognum: Optional[str] = None

def __iter__(self) -> Iterator[str]:
return iter(self.names)

@classmethod
def split_quoted_titles(cls, names: List[str]) -> List[str]:
if len(names) > 1:
matches = list(filter(None, map(cls.TITLE_IN_QUOTES.match, names)))
if len(matches) == len(names):
return [m.expand(r"\1 - \2") for m in matches]

return names

@classmethod
def find_common_track_delimiter(cls, names: List[str]) -> str:
"""Return the track parts delimiter that is in effect in the current release.
In some (rare) situations track parts are delimited by a pipe character
or some UTF-8 equivalent of a dash.
This checks every track for the first character (see the regex for exclusions)
that splits it. The character that splits the most and at least half of
the tracks is the character we need.
If no such character is found, or if we have just one track, return a dash '-'.
"""

def get_delim(string: str) -> str:
m = cls.DELIMITER_PAT.search(string)
return m.group(1) if m else "-"

delim, count = Counter(map(get_delim, names)).most_common(1).pop()
return delim if (len(names) == 1 or count > len(names) / 2) else "-"

@classmethod
def normalize_delimiter(cls, names: List[str]) -> List[str]:
"""Ensure the same delimiter splits artist and title in all names."""
delim = cls.find_common_track_delimiter(names)
return [n.replace(f" {delim} ", " - ") for n in names]

@staticmethod
def remove_label(names: List[str], label: str) -> List[str]:
"""Remove label name from the end of track names.
See https://gutterfunkuk.bandcamp.com/album/gutterfunk-all-subject-to-vibes-various-artists-lp
"""
return [
(n.replace(label, "").strip(" -") if n.endswith(label) else n)
for n in names
]

@staticmethod
def eject_common_catalognum(names: List[str]) -> Tuple[Optional[str], List[str]]:
"""Return catalognum found in every track title.
1. Split each track name into words
2. Find the list of words that are common to all tracks
3. Check the *first* and the *last* word for the catalog number
- If found, return it and remove it from every track name
"""
catalognum = None

names_tokens = map(str.split, names)
common_words = reduce(op.and_, [OrderedSet(x) for x in names_tokens])
if common_words:
matches = (CATNUM_PAT["anywhere"].search(common_words[i]) for i in [0, -1])
with suppress(StopIteration):
catalognum, word = next((m.group(1), m.string) for m in matches if m)
names = [n.replace(word, "").strip() for n in names]

return catalognum, names

@staticmethod
def parenthesize_remixes(names: List[str]) -> List[str]:
"""Reformat broken remix titles for an album with a single root title.
1. Check whether this release has a single root title
2. Find remixes that do not have parens around them
3. Add parens
"""
names_tokens = map(str.split, names)
common_words = reduce(op.and_, [OrderedSet(x) for x in names_tokens])
joined = " ".join(common_words)
if joined in names: # it is one of the track names (root title)
remix_parts = [n.replace(joined, "").lstrip() for n in names]
return [
(n.replace(rp, f"({rp})") if REMIX.fullmatch(rp) else n)
for n, rp in zip(names, remix_parts)
]

return names

@classmethod
def eject_album_name(cls, names: List[str]) -> Tuple[Optional[str], List[str]]:
matches = list(map(cls.ALBUM_IN_TITLE.search, names))
albums = {m.group(1).replace('"', "") for m in matches if m}
if len(albums) != 1:
return None, names

return albums.pop(), [
(n.replace(m.group(), "") if m else n) for m, n in zip(matches, names)
]

@classmethod
def make(cls, original: List[str], label: str) -> "TrackNames":
names = cls.parenthesize_remixes(
cls.remove_label(
cls.normalize_delimiter(cls.split_quoted_titles(original)), label
)
)
catalognum, names = cls.eject_common_catalognum(names)
album, names = cls.eject_album_name(names)
return cls(original, names, album=album, catalognum=catalognum)
Loading

0 comments on commit 92451c1

Please sign in to comment.