From f2dbd41742d6213d1d8075fce0a160fa49e40d92 Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Sun, 13 Oct 2024 22:51:56 +0100 Subject: [PATCH] fix scdl after adding regex --- scdlbot/__main__.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scdlbot/__main__.py b/scdlbot/__main__.py index 6420a488..166ef558 100755 --- a/scdlbot/__main__.py +++ b/scdlbot/__main__.py @@ -234,9 +234,9 @@ def get_response_text(file_name): # RANT_TEXT_PUBLIC = f"[Start me in PM to read help and learn how to use me](t.me/{TG_BOT_USERNAME}?start=1)" # Known and supported site domains: -# support soundcloud.com and soundcloud.app.goo.gl links: -DOMAIN_SC = "soundcloud" +DOMAIN_SC = "soundcloud.com" DOMAIN_SC_API = "api.soundcloud.com" +DOMAIN_SC_GOOGL = "soundcloud.app.goo.gl" DOMAIN_BC = "bandcamp.com" DOMAIN_YT = "youtube.com" DOMAIN_YT_BE = "youtu.be" @@ -246,7 +246,7 @@ def get_response_text(file_name): DOMAIN_IG = "instagram.com" DOMAIN_TW = "twitter.com" DOMAIN_TWX = "x.com" -DOMAINS_STRINGS = [DOMAIN_SC, DOMAIN_SC_API, DOMAIN_BC, DOMAIN_YT, DOMAIN_YT_BE, DOMAIN_YMR, DOMAIN_YMC, DOMAIN_TT, DOMAIN_IG, DOMAIN_TW, DOMAIN_TWX] +DOMAINS_STRINGS = [DOMAIN_SC, DOMAIN_SC_API, DOMAIN_SC_GOOGL, DOMAIN_BC, DOMAIN_YT, DOMAIN_YT_BE, DOMAIN_YMR, DOMAIN_YMC, DOMAIN_TT, DOMAIN_IG, DOMAIN_TW, DOMAIN_TWX] DOMAINS = [rf"^(?:[^\s]+\.)?{re.escape(domain_string)}$" for domain_string in DOMAINS_STRINGS] AUDIO_FORMATS = ["mp3"] @@ -725,12 +725,12 @@ def get_direct_urls_dict(message, mode, proxy, source_ip, allow_unknown_sites): urls_dict = {} for url_item in urls: - # Check domain in hostname (e.g netflix.com includes x.com) + # FIXME Check domain in hostname with regex in all places (e.g netflix.com includes x.com) unknown_site = not any((re.match(domain, url_item.host) for domain in DOMAINS)) # Unshorten soundcloud.app.goo.gl and unknown sites links. Example: https://soundcloud.app.goo.gl/mBMvG # TODO Unshorten unknown sites links again? Because yt-dlp may only support unshortened? - # if unknown_site or DOMAIN_SC in url_item.host: - if DOMAIN_SC in url_item.host: + # if unknown_site or DOMAIN_SC_GOOGL in url_item.host: + if DOMAIN_SC_GOOGL in url_item.host: proxy_args = None if proxy: proxy_args = {"http": proxy, "https": proxy} @@ -751,7 +751,7 @@ def get_direct_urls_dict(message, mode, proxy, source_ip, allow_unknown_sites): url = url_item unknown_site = not any((re.match(domain, url.host) for domain in DOMAINS)) url_text = url.to_text(full_quote=True) - logger.debug(f"unshortened link: {url_text}") + logger.debug(f"Unshortened link: {url_text}") # url_text = url_text.replace("m.soundcloud.com", "soundcloud.com") url_parts_num = len([part for part in url.path_parts if part]) if unknown_site or mode == "link": @@ -761,7 +761,7 @@ def get_direct_urls_dict(message, mode, proxy, source_ip, allow_unknown_sites): # If it's a known site, we check it more thoroughly below. # urls_dict[url_text] = ydl_get_direct_urls(url_text, COOKIES_FILE, source_ip, proxy) urls_dict[url_text] = "http" - elif DOMAIN_SC in url.host and (2 <= url_parts_num <= 4 or DOMAIN_SC_API in url.host) and (not "you" in url.path_parts): + elif (DOMAIN_SC in url.host or DOMAIN_SC_GOOGL in url.host) and (2 <= url_parts_num <= 4 or DOMAIN_SC_API in url.host) and (not "you" in url.path_parts): # SoundCloud: tracks, sets and widget pages, no /you/ pages # TODO support private sets URLs that have 5 parts # We know for sure these links can be downloaded, so we just skip running ydl_get_direct_urls @@ -940,9 +940,9 @@ def run_async(coro): cmd_name = "" cmd_args = () cmd_input = None - if (DOMAIN_SC in host and DOMAIN_SC_API not in host) or (DOMAIN_BC in host and BCDL_ENABLE): + if ((DOMAIN_SC in host or DOMAIN_SC_GOOGL in host) and DOMAIN_SC_API not in host) or (DOMAIN_BC in host and BCDL_ENABLE): # If link is sc/bc, we try scdl/bcdl first: - if DOMAIN_SC in host and DOMAIN_SC_API not in host: + if (DOMAIN_SC in host or DOMAIN_SC_GOOGL in host) and DOMAIN_SC_API not in host: cmd = scdl_bin cmd_name = str(cmd) cmd_args = ( @@ -1266,7 +1266,7 @@ def run_async(coro): file_root, file_ext = os.path.splitext(file_name) file_title = file_root.replace(file_ext, "") addition = ": " + file_title - elif DOMAIN_SC in host: + elif DOMAIN_SC in host or DOMAIN_SC_GOOGL in host: source = "SoundCloud" elif DOMAIN_BC in host: source = "Bandcamp"