snejus · snejus · May 7, 2024 · May 7, 2024 · May 7, 2024 · May 7, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 ## Unreleased
 
+## [0.19.0] 2024-05-07
+
+### Fixed
+
+- (#56) Support parsing URLs that do not end with **.com** in item comments when importing
+  music that was bought on Bandcamp.
+
+### Added
+
+- Add a new flag to the command line application for searching Bandcamp:
+  **`[-p PAGE, --page PAGE]`** to enable seeing further search results
 
 ## [0.18.0] 2024-04-28
 

diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ Navigate to your `beets` virtual environment and install the plug-in with
 The plugin exposes some of its functionality through a command-line application `beetcamp`:
 
 ```xml
-usage: beetcamp [-h] [-a] [-l] [-t] [release_url | query]
+usage: beetcamp [-h] [-a] [-l] [-t] [-o INDEX] [-p PAGE] (release_url | query)
 
 Get bandcamp release metadata from the given <release-url> or perform
 bandcamp search with <query>. Anything that does not start with https://
@@ -65,13 +65,14 @@ positional arguments:
   release_url  Release URL, starting with https:// OR
   query        Search query
 
-options:
+optional arguments:
   -h, --help   show this help message and exit
   -a, --album  Search albums
   -l, --label  Search labels and artists
   -t, --track  Search tracks
   -o INDEX, --open INDEX
                         Open search result indexed by INDEX in the browser
+  -p PAGE, --page PAGE  The results page to show, 1 by default
 ```
 
 - Use `beetcamp <bandcamp-release-url>` to return release metadata in JSON format.

diff --git a/beetsplug/bandcamp/__init__.py b/beetsplug/bandcamp/__init__.py
@@ -56,7 +56,7 @@
 }
 
 ALBUM_URL_IN_TRACK = re.compile(r'<a id="buyAlbumLink" href="([^"]+)')
-LABEL_URL_IN_COMMENT = re.compile(r"Visit (https:[\w/.-]+com)")
+LABEL_URL_IN_COMMENT = re.compile(r"Visit (https:[\w/.-]+\.[a-z]+)")
 USER_AGENT = f"beets/{__version__} +http://beets.radbox.org/"
 
 
@@ -169,6 +169,13 @@ def loaded(self) -> None:
                 plugin.sources = [bandcamp_fetchart, *plugin.sources]
                 break
 
+    @staticmethod
+    def parse_label_url(text: str) -> str | None:
+        if m := LABEL_URL_IN_COMMENT.match(text):
+            return m.group(1)
+
+        return None
+
     def _find_url_in_item(
         self, item: library.Item, name: str, _type: CandidateType
     ) -> str:
@@ -189,16 +196,14 @@ def _find_url_in_item(
               the number of previous releases that also did not have any valid
               alphanums. Therefore, we cannot make a reliable guess here.
         """
-        url = getattr(item, f"mb_{_type}id", "")
-        if _from_bandcamp(url):
+        if (url := getattr(item, f"mb_{_type}id", "")) and _from_bandcamp(url):
             self._info("Fetching the URL attached to the first item, {}", url)
             return url
 
-        if (m := LABEL_URL_IN_COMMENT.match(item.comments)) and (
+        if (label_url := self.parse_label_url(item.comments)) and (
             urlified_name := urlify(name)
         ):
-            label = m.group(1)
-            url = f"{label}/{_type}/{urlified_name}"
+            url = f"{label_url}/{_type}/{urlified_name}"
             self._info("Trying our guess {} before searching", url)
             return url
         return ""
@@ -207,15 +212,15 @@ def candidates(
         self, items: List[library.Item], artist: str, album: str, *_: Any, **__: Any
     ) -> Iterable[AlbumInfo]:
         """Return a sequence of album candidates matching given artist and album."""
+        item = items[0]
         label = ""
-        if items and album == items[0].album and artist == items[0].albumartist:
-            label = items[0].label
-            url = self._find_url_in_item(items[0], album, "album")
-            if url:
-                initial_guess = self.get_album_info(url)
-                if initial_guess:
-                    yield from initial_guess
-                    return
+        if items and album == item.album and artist == item.albumartist:
+            label = item.label
+            if (url := self._find_url_in_item(item, album, "album")) and (
+                initial_guess := self.get_album_info(url)
+            ):
+                yield from initial_guess
+                return
 
         if "various" in artist.lower():
             artist = ""
@@ -228,12 +233,12 @@ def item_candidates(
         self, item: library.Item, artist: str, title: str
     ) -> Iterable[TrackInfo]:
         """Return a sequence of singleton candidates matching given artist and title."""
-        url = self._find_url_in_item(item, title, "track")
         label = ""
         if item and title == item.title and artist == item.artist:
             label = item.label
-            initial_guess = self.get_track_info(url) if url else None
-            if initial_guess:
+            if (url := self._find_url_in_item(item, title, "track")) and (
+                initial_guess := self.get_track_info(url)
+            ):
                 yield initial_guess
                 return
 
@@ -350,6 +355,15 @@ def __call__(
         type=int,
         help="Open search result indexed by INDEX in the browser",
     )
+    parser.add_argument(
+        "-p",
+        "--page",
+        action="store",
+        dest="page",
+        type=int,
+        default=1,
+        help="The results page to show, 1 by default",
+    )
 
     return parser.parse_args()
 

diff --git a/beetsplug/bandcamp/search.py b/beetsplug/bandcamp/search.py
@@ -1,4 +1,5 @@
 """Module with bandcamp search functionality."""
+
 import re
 from difflib import SequenceMatcher
 from html import unescape
@@ -8,7 +9,7 @@
 import requests
 
 JSONDict = Dict[str, Any]
-SEARCH_URL = "https://bandcamp.com/search?q={}"
+SEARCH_URL = "https://bandcamp.com/search?page={}&q={}"
 
 
 def _f(field: str) -> str:
@@ -40,12 +41,14 @@ def to_ascii(string: str) -> str:
 
 def get_similarity(query: str, result: str) -> float:
     """Return the similarity between two strings normalized to [0, 1].
+
     We take into account how well the result matches the query, e.g.
-        query: "foo"
+        query: "foobar"
         result: "foo bar"
     Similarity is then:
-        (2 * (len("foo") / len("foo")) + len("foo") / len("foo bar")) / 3
-    2/3 of the result is how much of the query is found in the result,
+        (2 * (len("foo") / len("foobar")) + len("foo") / len("foo bar")) / 3
+
+    2/3 of the weight is how much of the query is found in the result,
     and 1/3 is a penalty for the non-matching part.
     """
     a, b = to_ascii(query), to_ascii(result)
@@ -70,8 +73,13 @@ def get_matches(text: str) -> JSONDict:
 
 
 def parse_and_sort_results(html: str, **kwargs: str) -> List[JSONDict]:
-    """Given the html string, parse metadata for each entity and sort them
-    by the field/value pairs given in kwargs.
+    """Extract search results from `html` and sort them by similarity to kwargs.
+
+    Bandcamp search may be unpredictable, therefore search results get sorted
+    regarding their similarity to what's being queried.
+
+    `kwargs` contains field and value pairs we compare the results with. Usually,
+    this has 'label', 'artist' and 'name' ('title' or 'album') fields.
     """
     results: List[JSONDict] = []
     for block in html.split("searchresult data-search")[1:]:
@@ -95,14 +103,12 @@ def get_url(url: str) -> str:
 def search_bandcamp(
     query: str = "",
     search_type: str = "",
+    page: int = 1,
     get: Callable[[str], str] = get_url,
     **kwargs: Any,
 ) -> List[JSONDict]:
-    """Return a list with item JSONs of type search_type matching the query.
-    Bandcamp search may be unpredictable, therefore search results get sorted
-    regarding their similarity to what's being queried.
-    """
-    url = SEARCH_URL.format(query)
+    """Return a list with item JSONs of type search_type matching the query."""
+    url = SEARCH_URL.format(page, query)
     if search_type:
         url += "&item_type=" + search_type
     kwargs["name"] = query