From d36f14ad536c5663aafe43a9d997dd38484b85c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0ar=C5=ABnas=20Nejus?= Date: Sun, 17 Mar 2024 03:35:45 +0000 Subject: [PATCH] internal: slightly clean up Album logic --- beetsplug/bandcamp/album.py | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/beetsplug/bandcamp/album.py b/beetsplug/bandcamp/album.py index 6b610a54..6de95c9a 100644 --- a/beetsplug/bandcamp/album.py +++ b/beetsplug/bandcamp/album.py @@ -18,8 +18,8 @@ class AlbumName: REMIX_IN_TITLE = re.compile(r"([(+]|incl)[^-]*?mix(?!ed)[^-)]*(\)|$)", re.I) CLEAN_EPLP = re.compile(r"(?:[([]|Double ){0,2}(\b[EL]P\b)\S?", re.I) EPLP_ALBUM = re.compile(r"\b((?:(?!VA|Various|-)[^ ]+ )+)([EL]P[^([-]*)") - IN_QUOTES = re.compile(r"((['\"])([^'\"]+)\2( VA\d+)*)( |$)") - WITHOUT_QUOTES = re.compile(r"^['\"](.+)['\"]$") + QUOTED_ALBUM = re.compile(r"(['\"])([^'\"]+)\1( VA\d+)*( |$)") + ALBUM_IN_DESC = re.compile(r"(?:Title: ?|Album(?::|/Single) )([^\n]+)") CLEAN_VA_EXCLUDE = re.compile(r"\w various artists \w", re.I) CLEAN_VA = re.compile( r""" @@ -29,6 +29,7 @@ class AlbumName: """, re.IGNORECASE + re.MULTILINE + re.VERBOSE, ) + COMPILATION_IN_TITLE = re.compile(r"compilation|best of|anniversary", re.I) original: str description: str = "" @@ -37,58 +38,56 @@ class AlbumName: remove_artists = True @cached_property - def in_description(self) -> str: - """Check description for the album name header and return whatever follows it - if found. - """ - m = re.search(r"(Title: ?|Album(:|/Single) )([^\n]+)", self.description) - if m: + def from_description(self) -> Optional[str]: + """Try finding album name in the release description.""" + if m := self.ALBUM_IN_DESC.search(self.description): self.remove_artists = False - return m.group(3).strip() - return "" + return m.group(1).strip() + + return None @cached_property def mentions_compilation(self) -> bool: - return bool(re.search(r"compilation|best of|anniversary", self.original, re.I)) + return bool(self.COMPILATION_IN_TITLE.search(self.original)) @cached_property - def parsed(self) -> str: + def from_title(self) -> Optional[str]: """Try to guess album name from the original title. - Return the first match from below: + Return the first match from below, defaulting to None: 1. If 'EP' or 'LP' is in the original name, album name is what precedes it. 2. If quotes are used in the title, they probably contain the album name. - 3. Original title """ - album = self.original - m = self.EPLP_ALBUM.search(album) - if m: - album = " ".join(i.strip(" '") for i in m.groups()) - else: - m = self.IN_QUOTES.search(album) - if m: - album = m.group(1) + if m := self.EPLP_ALBUM.search(self.original): + return " ".join(i.strip(" '") for i in m.groups()) - return self.WITHOUT_QUOTES.sub(r"\1", album) + if m := self.QUOTED_ALBUM.search(self.original): + return m.expand(r"\2\3") + + return None @cached_property - def album_sources(self) -> List[str]: + def album_names(self) -> List[str]: priority_list = [ self.from_track_titles, - self.in_description, - self.parsed, + self.from_description, + self.from_title, self.original, ] return list(filter(None, priority_list)) @cached_property def name(self) -> str: - return self.in_description or self.parsed or self.original + return next(iter(self.album_names)) @cached_property - def series(self) -> str: - m = self.SERIES.search("\n".join(self.album_sources)) - return m.group() if m else "" + def series(self) -> Optional[str]: + """Return series if it is found in any of the album names.""" + for name in self.album_names: + if m := self.SERIES.search(name): + return m.group() + + return None @staticmethod def format_series(m: re.Match) -> str: # type: ignore[type-arg] @@ -203,18 +202,19 @@ def get( artists: List[str], label: str, ) -> str: - album = self.name + original_album = self.name to_clean = [catalognum] if self.remove_artists: to_clean.extend(original_artists + artists) + to_clean.sort(key=len, reverse=True) - album = self.clean(album, sorted(to_clean, key=len, reverse=True), label) + album = self.clean(original_album, to_clean, label) if album.startswith("("): - album = self.name + album = original_album album = self.check_eplp(self.standardize_series(album)) if "split ep" in album.lower() or (not album and len(artists) == 2): album = " / ".join(artists) - return album or catalognum or self.name + return album or catalognum or original_album