Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Album data mining fixes #530

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 68 additions & 24 deletions tests/mixins/test_browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,28 @@ def test_get_home(self, yt, yt_auth):
assert len(result) >= 15

def test_get_artist(self, yt):
results = yt.get_artist("MPLAUCmMUZbaYdNH0bEd1PAlAqsA")
assert len(results) == 14
artist = yt.get_artist("MPLAUCmMUZbaYdNH0bEd1PAlAqsA")
assert len(artist) == 16

# make sure artists are correctly filled for categories
for k in ["songs", "videos"]:
assert {"id": "UCmMUZbaYdNH0bEd1PAlAqsA", "name": "Oasis"} in artist[k]["results"][0]["artists"]
single = artist["singles"]["results"][0]
assert len(single["year"]) == 4 and single["year"].isnumeric()
assert single["type"] == "Single"

# test correctness of related artists
related = results["related"]["results"]
related = artist["related"]["results"]
assert len(
[x for x in related if set(x.keys()) == {"browseId", "subscribers", "title", "thumbnails"}]
[
x
for x in related
if set(x.keys()) == {"browseId", "subscribers", "title", "thumbnails", "sub_count"}
]
) == len(related)

results = yt.get_artist("UCLZ7tlKC06ResyDmEStSrOw") # no album year
assert len(results) >= 11
artist = yt.get_artist("UCLZ7tlKC06ResyDmEStSrOw") # no album year
assert len(artist) >= 11

def test_get_artist_albums(self, yt):
artist = yt.get_artist("UCAeLFBCQS7FvI8PvBrWvSBg")
Expand Down Expand Up @@ -67,24 +78,57 @@ def test_get_album_browse_id_issue_470(self, yt):
assert escaped_browse_id == "MPREb_scJdtUCpPE2"

def test_get_album(self, yt, yt_auth, sample_album):
results = yt_auth.get_album(sample_album)
assert len(results) >= 9
assert results["tracks"][0]["isExplicit"]
assert all(item["views"] is not None for item in results["tracks"])
assert all(item["album"] is not None for item in results["tracks"])
assert results["tracks"][0]["trackNumber"] == 1
assert "feedbackTokens" in results["tracks"][0]
assert len(results["other_versions"]) >= 1 # appears to be regional
results = yt.get_album("MPREb_BQZvl3BFGay")
assert len(results["tracks"]) == 7
assert len(results["tracks"][0]["artists"]) == 1
results = yt.get_album("MPREb_rqH94Zr3NN0")
assert len(results["tracks"][0]["artists"]) == 2
results = yt.get_album("MPREb_TPH4WqN5pUo") # album with tracks completely removed/missing
assert results["tracks"][0]["trackNumber"] == 3
assert results["tracks"][13]["trackNumber"] == 18
results = yt.get_album("MPREb_YuigcYm2erf") # album with track (#8) disabled/greyed out
assert results["tracks"][7]["trackNumber"] is None
album = yt_auth.get_album(sample_album)
assert len(album) >= 9
assert "isExplicit" in album
assert album["tracks"][0]["isExplicit"]
assert all(item["views"] is not None for item in album["tracks"])
assert all(item["album"] is not None for item in album["tracks"])
assert album["tracks"][0]["trackNumber"] == 1
assert "feedbackTokens" in album["tracks"][0]
album = yt.get_album("MPREb_BQZvl3BFGay")
assert len(album["tracks"]) == 7
assert len(album["tracks"][0]["artists"]) == 1
album = yt.get_album("MPREb_rqH94Zr3NN0")
assert len(album["tracks"][0]["artists"]) == 2
album = yt.get_album("MPREb_TPH4WqN5pUo") # album with tracks completely removed/missing
assert album["tracks"][0]["trackNumber"] == 3
assert album["tracks"][13]["trackNumber"] == 18
album = yt.get_album("MPREb_YuigcYm2erf") # album with track (#8) disabled/greyed out
assert album["tracks"][7]["trackNumber"] is None

def test_get_album_other_versions(self, yt):
# Eminem - Curtain Call: The Hits (Explicit Variant)
album = yt.get_album("MPREb_LQCAymzbaKJ")
assert len(variants := album["other_versions"]) >= 1 # appears to be regional
assert (variant := variants[0])["type"] == "Album"
assert len(variant["artists"]) == 1
assert variant["artists"][0] == {"name": "Eminem", "id": "UCedvOgsKFzcK3hA5taf3KoQ"}
assert variant["audioPlaylistId"] is not None

# album that's multi-artist, a single, and has clean version
# Cassö & RAYE - Prada
album = yt.get_album("MPREb_of3qfisa0yU")
assert not album["isExplicit"]
assert (variant := album["other_versions"][0])["type"] == "Single"
assert variant["isExplicit"]
assert len(variant["artists"]) == 3
assert variant["artists"][0]["id"] == "UCGWMNnI1Ky5bMcRlr73Cj2Q"
assert variant["artists"][1]["name"] == "RAYE"
assert variant["artists"][2] == {"id": "UCb7jnkQW94hzOoWkG14zs4w", "name": "D-Block Europe"}

def test_get_album_parsing(self, yt):
album = yt.get_album("MPREb_HLU4ajrAzcU") # Flume - Palaces
# album has a track with 3 artists, linked
assert len(targ := album["tracks"][3]["artists"]) == 3
# all artists should have ids
assert len([x["id"] for x in targ if x["id"]]) == 3

album = yt.get_album("MPREb_M4IdGHS6DyO") # IMANU - Unfold
# album has tracks with 3 unlinked artists
assert len(targ := album["tracks"][3]["artists"]) == 3
# test at least album artist is filled
assert len([x["id"] for x in targ if x["id"]]) >= 1

def test_get_song(self, config, yt, yt_oauth, sample_video):
song = yt_oauth.get_song(config["uploads"]["private_upload_id"]) # private upload
Expand Down
7 changes: 6 additions & 1 deletion ytmusicapi/mixins/browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ytmusicapi.parsers.playlists import parse_playlist_items

from ..navigation import *
from ..parsers._utils import parse_real_count # protected ?
from ._protocol import MixinProtocol
from ._utils import get_datestamp

Expand Down Expand Up @@ -234,6 +235,7 @@ def get_artist(self, channelId: str) -> Dict:
if "subheader" not in descriptionShelf
else descriptionShelf["subheader"]["runs"][0]["text"]
)
artist["view_count"] = parse_real_count(nav(descriptionShelf, ["subheader", "runs", 0], True))
subscription_button = header["subscriptionButton"]["subscribeButtonRenderer"]
artist["channelId"] = subscription_button["channelId"]
artist["shuffleId"] = nav(
Expand All @@ -243,6 +245,9 @@ def get_artist(self, channelId: str) -> Dict:
header, ["startRadioButton", "buttonRenderer", *NAVIGATION_WATCH_PLAYLIST_ID], True
)
artist["subscribers"] = nav(subscription_button, ["subscriberCountText", "runs", 0, "text"], True)
artist["sub_count"] = parse_real_count(
nav(subscription_button, ["subscriberCountText", "runs", 0], True)
)
artist["subscribed"] = subscription_button["subscribed"]
artist["thumbnails"] = nav(header, THUMBNAILS, True)
artist["songs"] = {"browseId": None}
Expand Down Expand Up @@ -495,7 +500,7 @@ def get_album(self, browseId: str) -> Dict:
response = self._send_request(endpoint, body)
album = parse_album_header(response)
results = nav(response, SINGLE_COLUMN_TAB + SECTION_LIST_ITEM + MUSIC_SHELF)
album["tracks"] = parse_playlist_items(results["contents"], is_album=True)
album["tracks"] = parse_playlist_items(results["contents"], by_artists=album["artists"])
results = nav(response, SINGLE_COLUMN_TAB + SECTION_LIST + [1] + CAROUSEL, True)
if results is not None:
album["other_versions"] = parse_content_list(results["contents"], parse_album)
Expand Down
16 changes: 12 additions & 4 deletions ytmusicapi/navigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from typing import Any, Dict, List, Literal, Optional, overload

CONTENT = ["contents", 0]
RUN_TEXT = ["runs", 0, "text"]
ZTEXT = [0, "text"]
TTEXT = [2, "text"]
RUN_TEXT = ["runs", *ZTEXT]
TAB_CONTENT = ["tabs", 0, "tabRenderer", "content"]
TAB_1_CONTENT = ["tabs", 1, "tabRenderer", "content"]
TWO_COLUMN_RENDERER = ["contents", "twoColumnBrowseResultsRenderer"]
Expand All @@ -21,15 +23,17 @@
MENU_LIKE_STATUS = [*MENU, "topLevelButtons", 0, "likeButtonRenderer", "likeStatus"]
MENU_SERVICE = ["menuServiceItemRenderer", "serviceEndpoint"]
TOGGLE_MENU = "toggleMenuServiceItemRenderer"
PLAY_BUTTON = ["overlay", "musicItemThumbnailOverlayRenderer", "content", "musicPlayButtonRenderer"]
OVERLAY_RENDERER = ["musicItemThumbnailOverlayRenderer", "content", "musicPlayButtonRenderer"]
PLAY_BUTTON = ["overlay", *OVERLAY_RENDERER]
NAVIGATION_BROWSE = ["navigationEndpoint", "browseEndpoint"]
NAVIGATION_BROWSE_ID = [*NAVIGATION_BROWSE, "browseId"]
PAGE_TYPE = ["browseEndpointContextSupportedConfigs", "browseEndpointContextMusicConfig", "pageType"]
WATCH_VIDEO_ID = ["watchEndpoint", "videoId"]
NAVIGATION_VIDEO_ID = ["navigationEndpoint", *WATCH_VIDEO_ID]
QUEUE_VIDEO_ID = ["queueAddEndpoint", "queueTarget", "videoId"]
NAVIGATION_PLAYLIST_ID = ["navigationEndpoint", "watchEndpoint", "playlistId"]
NAVIGATION_WATCH_PLAYLIST_ID = ["navigationEndpoint", "watchPlaylistEndpoint", "playlistId"]
WATCH_PID = ["watchPlaylistEndpoint", "playlistId"]
NAVIGATION_WATCH_PLAYLIST_ID = ["navigationEndpoint", *WATCH_PID]
NAVIGATION_VIDEO_TYPE = [
"watchEndpoint",
"watchEndpointMusicSupportedConfigs",
Expand All @@ -43,13 +47,17 @@
TEXT_RUNS = ["text", "runs"]
TEXT_RUN = [*TEXT_RUNS, 0]
TEXT_RUN_TEXT = [*TEXT_RUN, "text"]
LAST_RUN = ["runs", -1]
TEXT_LAST_RUN = ["text", *LAST_RUN]
LAST_SUB_RUN = ["subtitle", *LAST_RUN]
SUBTITLE = ["subtitle", *RUN_TEXT]
SUBTITLE_RUNS = ["subtitle", "runs"]
SUBTITLE2 = [*SUBTITLE_RUNS, 2, "text"]
SUBTITLE2 = [*SUBTITLE_RUNS, *TTEXT]
SUBTITLE3 = [*SUBTITLE_RUNS, 4, "text"]
THUMBNAIL = ["thumbnail", "thumbnails"]
THUMBNAILS = ["thumbnail", "musicThumbnailRenderer", *THUMBNAIL]
THUMBNAIL_RENDERER = ["thumbnailRenderer", "musicThumbnailRenderer", *THUMBNAIL]
THUMBNAIL_OVERLAY = ["thumbnailOverlay", *OVERLAY_RENDERER, "playNavigationEndpoint", *WATCH_PID]
THUMBNAIL_CROPPED = ["thumbnail", "croppedSquareThumbnailRenderer", *THUMBNAIL]
FEEDBACK_TOKEN = ["feedbackEndpoint", "feedbackToken"]
BADGE_PATH = [0, "musicInlineBadgeRenderer", "accessibilityData", "accessibilityData", "label"]
Expand Down
12 changes: 12 additions & 0 deletions ytmusicapi/parsers/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@ def get_dot_separator_index(runs):
return index


def parse_real_count(run):
"""Pull an int from views, plays, or subs"""
if not run or "text" not in run:
return -1
count = run["text"].split(" ")[0]
for fx in [("K", 1_000), ("M", 1_000_000), ("B", 1_000_000_000)]:
if fx[0] in count:
return int(float(count.replace(fx[0], "")) * fx[1])

return int(count.replace(",", ""))


def parse_duration(duration):
if duration is None:
return duration
Expand Down
2 changes: 2 additions & 0 deletions ytmusicapi/parsers/albums.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ def parse_album_header(response):
"title": nav(header, TITLE_TEXT),
"type": nav(header, SUBTITLE),
"thumbnails": nav(header, THUMBNAIL_CROPPED),
"isExplicit": nav(header, SUBTITLE_BADGE_LABEL, True) is not None,
}

if "description" in header:
album["description"] = header["description"]["runs"][0]["text"]

Expand Down
99 changes: 72 additions & 27 deletions ytmusicapi/parsers/browsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,22 +52,30 @@ def parse_content_list(results, parse_func, key=MTRIR):


def parse_album(result):
return {
album = {
"title": nav(result, TITLE_TEXT),
"year": nav(result, SUBTITLE2, True),
"browseId": nav(result, TITLE + NAVIGATION_BROWSE_ID),
"audioPlaylistId": nav(result, THUMBNAIL_OVERLAY, True),
"thumbnails": nav(result, THUMBNAIL_RENDERER),
"isExplicit": nav(result, SUBTITLE_BADGE_LABEL, True) is not None,
}

runs = nav(result, SUBTITLE_RUNS)
if len(runs) >= 2:
album["type"] = nav(runs, ZTEXT, True)

def parse_single(result):
return {
"title": nav(result, TITLE_TEXT),
"year": nav(result, SUBTITLE, True),
"browseId": nav(result, TITLE + NAVIGATION_BROWSE_ID),
"thumbnails": nav(result, THUMBNAIL_RENDERER),
}
# navigationEndpoint key is present when secondary runs are artists
if "navigationEndpoint" in runs[2]:
album["artists"] = artists_from_runs(runs)
else:
album["year"] = nav(runs, TTEXT, True)

# it's a single with just the year
else:
album["type"] = "Single"
album["year"] = nav(runs, ZTEXT, True)

return album


def parse_song(result):
Expand All @@ -86,15 +94,16 @@ def parse_song_flat(data):
song = {
"title": nav(columns[0], TEXT_RUN_TEXT),
"videoId": nav(columns[0], TEXT_RUN + NAVIGATION_VIDEO_ID, True),
"artists": parse_song_artists(data, 1),
"artists": parse_pl_song_artists(data, 1),
"thumbnails": nav(data, THUMBNAILS),
"isExplicit": nav(data, BADGE_LABEL, True) is not None,
}
if len(columns) > 2 and columns[2] is not None and "navigationEndpoint" in nav(columns[2], TEXT_RUN):
song["album"] = {
"name": nav(columns[2], TEXT_RUN_TEXT),
"id": nav(columns[2], TEXT_RUN + NAVIGATION_BROWSE_ID),
}
if (
len(columns) > 2
and columns[2] is not None
and "navigationEndpoint" in (targ := nav(columns[2], TEXT_RUN))
):
song["album"] = parse_id_name(targ)
else:
song["views"] = nav(columns[1], ["text", "runs", -1, "text"]).split(" ")[0]

Expand All @@ -103,34 +112,69 @@ def parse_song_flat(data):

def parse_video(result):
runs = nav(result, SUBTITLE_RUNS)
artists_len = get_dot_separator_index(runs)
# artists_len = get_dot_separator_index(runs)
videoId = nav(result, NAVIGATION_VIDEO_ID, True)
if not videoId:
# I believe this
videoId = next(
id for entry in nav(result, MENU_ITEMS) if nav(entry, MENU_SERVICE + QUEUE_VIDEO_ID, True)
)
return {
(
found
for entry in nav(result, MENU_ITEMS)
if (found := nav(entry, MENU_SERVICE + QUEUE_VIDEO_ID, True))
),
None,
) # this won't match anything for episodes, None to catch iterator
result = {
"title": nav(result, TITLE_TEXT),
"videoId": videoId,
"artists": parse_song_artists_runs(runs[:artists_len]),
"playlistId": nav(result, NAVIGATION_PLAYLIST_ID, True),
"thumbnails": nav(result, THUMBNAIL_RENDERER, True),
"views": runs[-1]["text"].split(" ")[0],
}

# it's an ~episode~ -> makes the first key a duration { "text": "%m min %s sec" } format
# unsure if we should capture the duration for edge cases
# could also be an unlinked artist
if "navigationEndpoint" not in runs[0] and any(x in runs[0]["text"] for x in ["sec", "min"]):
result["type"] = "episode"
# views are unavailable on episodes
result["views"] = None
result["view_count"] = -1
result["artists"] = artists_from_runs(runs[2:], 0)
else:
result["type"] = "song"
result["views"] = runs[-1]["text"].split(" ")[0]
result["view_count"] = parse_real_count(runs[-1]) if len(runs) > 2 else -1
result["artists"] = artists_from_runs(runs[:-2], 0)

return result


def parse_playlist(data):
playlist = {
"title": nav(data, TITLE_TEXT),
"playlistId": nav(data, TITLE + NAVIGATION_BROWSE_ID)[2:],
"thumbnails": nav(data, THUMBNAIL_RENDERER),
}
subtitle = data["subtitle"]
if "runs" in subtitle:
playlist["description"] = "".join([run["text"] for run in subtitle["runs"]])
if len(subtitle["runs"]) == 3 and re.search(r"\d+ ", nav(data, SUBTITLE2)):
playlist["count"] = nav(data, SUBTITLE2).split(" ")[0]
playlist["author"] = parse_song_artists_runs(subtitle["runs"][:1])
runs = nav(data, SUBTITLE_RUNS)
if runs:
playlist["description"] = "".join([run["text"] for run in runs])
if len(runs) == 3 and runs[1]["text"] == " • ":
# genre charts from get_charts('US') are sent here...
if runs[0]["text"] == "Chart" or runs[-1]["text"] == "YouTube Music":
playlist["count"] = None
playlist["view_count"] = -1
playlist["author"] = {"name": "YouTube Music", "id": None}
playlist["featured_artists"] = None
else:
playlist["count"] = nav(data, SUBTITLE2).split(" ")[0] # this is "views" everywhere else
playlist["view_count"] = parse_real_count(runs[2])
playlist["author"] = parse_id_name(runs[0])
playlist["featured_artists"] = None
else:
playlist["featured_artists"] = nav(runs, ZTEXT, True)
# fill default, maintain return format
playlist["author"] = {"name": "YouTube Music", "id": None}
playlist["view_count"] = -1

return playlist

Expand All @@ -143,6 +187,7 @@ def parse_related_artist(data):
"title": nav(data, TITLE_TEXT),
"browseId": nav(data, TITLE + NAVIGATION_BROWSE_ID),
"subscribers": subscribers,
"sub_count": parse_real_count(nav(data, LAST_SUB_RUN, True)),
"thumbnails": nav(data, THUMBNAIL_RENDERER),
}

Expand Down
2 changes: 1 addition & 1 deletion ytmusicapi/parsers/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def parse_chart_artist(data):

def parse_chart_trending(data):
flex_0 = get_flex_column_item(data, 0)
artists = parse_song_artists(data, 1)
artists = parse_pl_song_artists(data, 1)
index = get_dot_separator_index(artists)
# last item is views for some reason
views = None if index == len(artists) else artists.pop()["name"].split(" ")[0]
Expand Down
Loading
Loading