From a744afb041c73cf2e98de850cfa3771edd44fed7 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 13:44:37 +0300 Subject: [PATCH 01/22] Add metadata to file while downloading --- scdlbot/scdlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index fb5800ae7..905330092 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -579,7 +579,7 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message 'preferredcodec': 'mp3', 'preferredquality': '128', }, - # {'key': 'EmbedThumbnail',}, {'key': 'FFmpegMetadata',}, + {'key': 'FFmpegMetadata',}, ], } host = urlparse(url).hostname From cdf2676a39b9735fb33fe4ed21b9536238b25c7a Mon Sep 17 00:00:00 2001 From: Nitan Alexandru Marcel Date: Mon, 5 Apr 2021 16:27:27 +0300 Subject: [PATCH 02/22] Remove 128 bitrate limit --- scdlbot/scdlbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 905330092..5d3074ff5 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -577,7 +577,7 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message { 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', - 'preferredquality': '128', + 'preferredquality': '312', }, {'key': 'FFmpegMetadata',}, ], @@ -737,7 +737,7 @@ def convert_and_split_audio_file(self, file=""): try: file_converted = file.replace(file_ext, ".mp3") ffinput = ffmpeg.input(file) - ffmpeg.output(ffinput, file_converted, audio_bitrate="128k", vn=None).run() + ffmpeg.output(ffinput, file_converted, vn=None).run() file = file_converted file_root, file_ext = os.path.splitext(file) file_format = file_ext.replace(".", "").lower() From 1f8377da2c46c15b8770ad06d46dadee351172a7 Mon Sep 17 00:00:00 2001 From: Nitan Alexandru Marcel Date: Mon, 5 Apr 2021 16:28:33 +0300 Subject: [PATCH 03/22] Update help.tg.md --- scdlbot/texts/help.tg.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scdlbot/texts/help.tg.md b/scdlbot/texts/help.tg.md index 38eaaaaa9..a19a9a0db 100755 --- a/scdlbot/texts/help.tg.md +++ b/scdlbot/texts/help.tg.md @@ -1,4 +1,4 @@ -Hi! I download and send audios from videos/tracks/sets/albums in *MP3 128 kbps* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit. +Hi! I download and send audios from videos/tracks/sets/albums in *MP3* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit. *Usage:* _Send or forward_ a text message containing links and I will: From ab3c6b348100db668f17586b5d54a73edc1cfd17 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 19:38:42 +0300 Subject: [PATCH 04/22] Add blacklist and whitelist support --- scdlbot/scdlbot.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 5d3074ff5..e548c824c 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -269,6 +269,9 @@ def settings_command_callback(self, update: Update, context: CallbackContext): def common_command_callback(self, update: Update, context: CallbackContext): self.init_chat(update.message) chat_id = update.message.chat_id + if not self.is_chat_allowed(chat_id): + context.bot.send_message(chat_id=chat_id, text="This command isn't allowed in this chat.") + return chat_type = update.message.chat.type reply_to_message_id = update.message.message_id command_entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) @@ -347,6 +350,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): self.cleanup_chat(chat_id) def button_query_callback(self, update: Update, context: CallbackContext): + btn_msg = update.callback_query.message self.init_chat(btn_msg) user_id = update.callback_query.from_user.id @@ -355,6 +359,9 @@ def button_query_callback(self, update: Update, context: CallbackContext): chat_id = chat.id chat_type = chat.type orig_msg_id, action = update.callback_query.data.split() + if not self.is_chat_allowed(chat_id): + update.callback_query.answer(text="This command isn't allowed in this chat.") + return if orig_msg_id == "settings": if chat_type != Chat.PRIVATE: chat_member_status = chat.get_member(user_id).status @@ -859,3 +866,23 @@ def send_audio_file_parts(self, bot, chat_id, file_parts, reply_to_message_id=No if len(sent_audio_ids) != len(file_parts): raise FileSentPartiallyError(sent_audio_ids) return sent_audio_ids + + def is_chat_allowed(self, chat_id): + try: + whitelist = set(int(x) for x in os.environ.get("WHITELIST_CHATS", "").split()) + except ValueError: + raise ValueError("Your whitelisted chats does not contain valid integers.") + try: + blacklist = set(int(x) for x in os.environ.get("BLACK_LIST_CHATS", "").split()) + except ValueError: + raise ValueError("Your blacklisted chats does not contain valid integers.") + if whitelist: + if chat_id not in whitelist: + return False + if blacklist: + if chat_id in blacklist: + return False + if whitelist and blacklist: + if chat_id in blacklist: + return False + return True From a2a46ee49c2d4f3481f0361ed91ca604a50f3be3 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 20:41:00 +0300 Subject: [PATCH 05/22] Exit chat if blacklisted --- scdlbot/scdlbot.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index e548c824c..0dc9635e9 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -124,6 +124,9 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= unknown_handler = MessageHandler(Filters.command, self.unknown_command_callback) dispatcher.add_handler(unknown_handler) + blacklist_whitelist_handler = MessageHandler(Filters.status_update.new_chat_members, self.blacklist_whitelist) + dispatcher.add_handler(blacklist_whitelist_handler) + dispatcher.add_error_handler(self.error_callback) self.bot_username = self.updater.bot.get_me().username @@ -728,6 +731,13 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message bot.delete_message(chat_id=chat_id, message_id=wait_message_id) except: pass + + @run_async + def blacklist_whitelist(self, update: Update, context: CallbackContext): + chat_id = update.message.chat_id + if not self.is_chat_allowed(chat_id): + context.bot.leave_chat(chat_id) + def convert_and_split_audio_file(self, file=""): file_root, file_ext = os.path.splitext(file) From 29a194e15e551f523b5bd75b9c92e8316fad004f Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 21:10:38 +0300 Subject: [PATCH 06/22] Ignore telegram urls --- scdlbot/scdlbot.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 0dc9635e9..5f1100d11 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -452,19 +452,26 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non url_entities.update(url_caption_entities) for entity in url_entities: url_str = url_entities[entity] - logger.debug("Entity URL Parsed: %s", url_str) - if "://" not in url_str: - url_str = "http://{}".format(url_str) - urls.append(URL(url_str)) + if self.url_valid(url_str): + logger.debug("Entity URL Parsed: %s", url_str) + if "://" not in url_str: + url_str = "http://{}".format(url_str) + urls.append(URL(url_str)) + else: + logger.debug("Entry URL not valid: %s", url_str) text_link_entities = msg_or_text.parse_entities(types=[MessageEntity.TEXT_LINK]) text_link_caption_entities = msg_or_text.parse_caption_entities(types=[MessageEntity.TEXT_LINK]) text_link_entities.update(text_link_caption_entities) for entity in text_link_entities: url_str = entity.url - logger.debug("Entity Text Link Parsed: %s", url_str) - urls.append(URL(url_str)) + if self.url_valid(url_str): + logger.debug("Entity Text Link Parsed: %s", url_str) + urls.append(URL(url_str)) + else: + logger.debug("Entry URL not valid: %s", url_str) else: - urls = find_all_links(msg_or_text, default_scheme="http") + all_links = find_all_links(msg_or_text, default_scheme="http") + urls = [link for link in all_links if self.url_valid(link)] urls_dict = {} for url_item in urls: url = url_item @@ -503,6 +510,22 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non urls_dict[url_text] = exc.status return urls_dict + def url_valid(self, url): + telegram_domains = ['t.me', + 'telegram.org', + 'telegram.dog', + 'telegra.ph', + 'tdesktop.com', + 'telesco.pe', + 'graph.org', + 'contest.dev'] + netloc = urlparse(url).netloc + domain = netloc.split(".", 1)[-1] + logger.debug("Checking Url Entry: %s", netloc) + if netloc in telegram_domains: + return False + return True + @REQUEST_TIME.time() @run_async def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message_id=None, From 33ca55765dedd2bf8b79b098a6e0045a3e3105c3 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 21:14:12 +0300 Subject: [PATCH 07/22] Catch AttributeError --- scdlbot/scdlbot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 5f1100d11..da3c4dec4 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -519,8 +519,10 @@ def url_valid(self, url): 'telesco.pe', 'graph.org', 'contest.dev'] - netloc = urlparse(url).netloc - domain = netloc.split(".", 1)[-1] + try: + netloc = urlparse(url).netloc + except AttributeError: + return False logger.debug("Checking Url Entry: %s", netloc) if netloc in telegram_domains: return False From 731e7f663a5e3799fdac43c3f6493374006da386 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 21:33:05 +0300 Subject: [PATCH 08/22] Add url blacklisting --- scdlbot/scdlbot.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index da3c4dec4..962ade242 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -3,6 +3,7 @@ """Main module.""" import gc +import os import pathlib import random import shelve @@ -458,7 +459,7 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non url_str = "http://{}".format(url_str) urls.append(URL(url_str)) else: - logger.debug("Entry URL not valid: %s", url_str) + logger.debug("Entry URL not valid or blacklisted: %s", url_str) text_link_entities = msg_or_text.parse_entities(types=[MessageEntity.TEXT_LINK]) text_link_caption_entities = msg_or_text.parse_caption_entities(types=[MessageEntity.TEXT_LINK]) text_link_entities.update(text_link_caption_entities) @@ -468,7 +469,7 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non logger.debug("Entity Text Link Parsed: %s", url_str) urls.append(URL(url_str)) else: - logger.debug("Entry URL not valid: %s", url_str) + logger.debug("Entry URL not valid or blacklisted: %s", url_str) else: all_links = find_all_links(msg_or_text, default_scheme="http") urls = [link for link in all_links if self.url_valid(link)] @@ -526,6 +527,23 @@ def url_valid(self, url): logger.debug("Checking Url Entry: %s", netloc) if netloc in telegram_domains: return False + return self.url_allowed(url) + + def url_allowed(self, url): + # Example export WHITELIST_DOM = "invidious.tube kavin.rocks himiko.cloud " + whitelist = set(x for x in os.environ.get("WHITELIST_DOM", "").split()) + blacklist = set(x for x in os.environ.get("BLACKLIST_DOM", "").split()) + netloc = urlparse(url).netloc + + if whitelist: + if netloc not in whitelist: + return False + if blacklist: + if netloc in blacklist: + return False + if whitelist and blacklist: + if netloc in blacklist: + return False return True @REQUEST_TIME.time() @@ -908,7 +926,7 @@ def is_chat_allowed(self, chat_id): except ValueError: raise ValueError("Your whitelisted chats does not contain valid integers.") try: - blacklist = set(int(x) for x in os.environ.get("BLACK_LIST_CHATS", "").split()) + blacklist = set(int(x) for x in os.environ.get("BLACKLIST_CHATS", "").split()) except ValueError: raise ValueError("Your blacklisted chats does not contain valid integers.") if whitelist: From d29ad2ae92ab9752a71b23549f5610ffd9802836 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 21:42:55 +0300 Subject: [PATCH 09/22] Update url blacklist/whitelsit details --- scdlbot/scdlbot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 962ade242..29161e0a1 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -520,21 +520,20 @@ def url_valid(self, url): 'telesco.pe', 'graph.org', 'contest.dev'] + logger.debug("Checking Url Entry: %s", url) try: netloc = urlparse(url).netloc except AttributeError: return False - logger.debug("Checking Url Entry: %s", netloc) if netloc in telegram_domains: return False return self.url_allowed(url) def url_allowed(self, url): - # Example export WHITELIST_DOM = "invidious.tube kavin.rocks himiko.cloud " + # Example export BLACKLIST_DOM = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks" whitelist = set(x for x in os.environ.get("WHITELIST_DOM", "").split()) blacklist = set(x for x in os.environ.get("BLACKLIST_DOM", "").split()) netloc = urlparse(url).netloc - if whitelist: if netloc not in whitelist: return False From f3cfe435621245b8703ca3302c1570e8284c096f Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 21:57:44 +0300 Subject: [PATCH 10/22] diable telegram_handler --- scdlbot/__main__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scdlbot/__main__.py b/scdlbot/__main__.py index 77501bfd3..78e09d532 100755 --- a/scdlbot/__main__.py +++ b/scdlbot/__main__.py @@ -19,9 +19,10 @@ tg_bot_token = os.environ['TG_BOT_TOKEN'] alert_chat_ids = list(map(int, os.getenv('ALERT_CHAT_IDS', '0').split(','))) -telegram_handler = TelegramHandler(token=tg_bot_token, chat_id=str(alert_chat_ids[0])) -telegram_handler.setLevel(logging.WARNING) -logging_handlers.append(telegram_handler) +# Disable telegram handler +#telegram_handler = TelegramHandler(token=tg_bot_token, chat_id=str(alert_chat_ids[0])) +#telegram_handler.setLevel(logging.WARNING) +#logging_handlers.append(telegram_handler) syslog_debug = bool(int(os.getenv('SYSLOG_DEBUG', '0'))) syslog_logging_level = logging.DEBUG if syslog_debug else logging.INFO From 04b2b39535d5174289c60e4fbfc41dc0661a4b0f Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 22:00:51 +0300 Subject: [PATCH 11/22] Completly remove python-telegram-handler Useless, and it spams a big error at start that can't be fixed unless I fork the libary's repo --- .env.sample | 2 -- app.json | 4 ---- requirements.txt | 1 - scdlbot/__main__.py | 6 ------ 4 files changed, 13 deletions(-) diff --git a/.env.sample b/.env.sample index be60a09e5..0a01f801d 100644 --- a/.env.sample +++ b/.env.sample @@ -12,8 +12,6 @@ DL_TIMEOUT="300" # Comma-separated chat IDs with no replying and caption spam NO_FLOOD_CHAT_IDS="-10018859218,-1011068201" -ALERT_CHAT_IDS="1265343,3265143" - BIN_PATH="" CHAT_STORAGE="/home/gpchelkin/scdlbotdata" diff --git a/app.json b/app.json index 7e7d97e19..6b6b35417 100644 --- a/app.json +++ b/app.json @@ -43,10 +43,6 @@ "description": "Comma-separated chat IDs with no replying and caption hashtags", "required": false }, - "ALERT_CHAT_IDS": { - "description": "Comma-separated chat IDs with no replying and caption hashtags", - "required": false - }, "BIN_PATH": { "description": "Custom directory where scdl and bandcamp-dl binaries are available", "required": false diff --git a/requirements.txt b/requirements.txt index 07a713aa8..f4047f469 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,6 @@ prompt-toolkit==3.0.3; python_version >= "3.6" pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" pypiwin32==223; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") python-telegram-bot==12.8 -python-telegram-handler==2.2 pytz==2021.1; python_version >= "3.6" pywin32==300; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") diff --git a/scdlbot/__main__.py b/scdlbot/__main__.py index 78e09d532..0badaebfc 100755 --- a/scdlbot/__main__.py +++ b/scdlbot/__main__.py @@ -5,7 +5,6 @@ from logging.handlers import SysLogHandler from prometheus_client import start_http_server -from telegram_handler import TelegramHandler from scdlbot.scdlbot import ScdlBot @@ -18,11 +17,6 @@ logging_handlers.append(console_handler) tg_bot_token = os.environ['TG_BOT_TOKEN'] -alert_chat_ids = list(map(int, os.getenv('ALERT_CHAT_IDS', '0').split(','))) -# Disable telegram handler -#telegram_handler = TelegramHandler(token=tg_bot_token, chat_id=str(alert_chat_ids[0])) -#telegram_handler.setLevel(logging.WARNING) -#logging_handlers.append(telegram_handler) syslog_debug = bool(int(os.getenv('SYSLOG_DEBUG', '0'))) syslog_logging_level = logging.DEBUG if syslog_debug else logging.INFO From e059b6cb3a2ba81882a3dcbac9539cb5420e0212 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 22:19:23 +0300 Subject: [PATCH 12/22] Update env vars --- .env.sample | 9 +++++++++ app.json | 18 +++++++++++++++++- scdlbot/scdlbot.py | 6 +++--- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/.env.sample b/.env.sample index 0a01f801d..0102956f9 100644 --- a/.env.sample +++ b/.env.sample @@ -19,6 +19,11 @@ CHAT_STORAGE="/home/gpchelkin/scdlbotdata" # For using inline mode bot needs to store audios somewhere. ID of that chat. STORE_CHAT_ID="-1795100" +A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +WHITELIST_CHATS ="-1795100, 1795102" +# A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +BLACKLIST_CHATS ="-1795100, 1795102" + SOURCE_IPS="9.21.18.2,9.21.16.9" PROXIES="socks5://127.0.0.1:1080,socks5://127.0.0.1:1081," SERVE_AUDIO="0" @@ -61,6 +66,10 @@ PORT="5000" # Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode APP_URL="https://yourapp.heroku.com/" +# A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +WHITELIST_DOMS="example.com subdomain.example.com" +# A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +BLACKLIST_DOMS="example.com subdomain.example.com" # Your URL path for webhook to listen URL_PATH="166849652AAEPCgHuDf3K6HvI2OkrJmDN2k9R6mcfmLs" diff --git a/app.json b/app.json index 6b6b35417..57f396b7f 100644 --- a/app.json +++ b/app.json @@ -56,6 +56,14 @@ "description": "Chat ID for storing audios for inline mode", "required": false }, + "WHITELIST_CHATS": { + "description": "A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, + "BLACKLIST_CHATS": { + "description": "A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, "COOKIES_FILE": { "description": "HTTP or local path with cookies file for Yandex.Music", "required": false @@ -87,7 +95,15 @@ "APP_URL": { "description": "Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode", "required": false - } + }, + "WHITELIST_DOMS": { + "description": "A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, + "BLACKLIST_DOMS": { + "description": "A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, }, "formation": { "web": { diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 29161e0a1..6a967fcfd 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -530,9 +530,9 @@ def url_valid(self, url): return self.url_allowed(url) def url_allowed(self, url): - # Example export BLACKLIST_DOM = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks" - whitelist = set(x for x in os.environ.get("WHITELIST_DOM", "").split()) - blacklist = set(x for x in os.environ.get("BLACKLIST_DOM", "").split()) + # Example export BLACKLIST_DOMS = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks" + whitelist = set(x for x in os.environ.get("WHITELIST_DOMS", "").split()) + blacklist = set(x for x in os.environ.get("BLACKLIST_DOMS", "").split()) netloc = urlparse(url).netloc if whitelist: if netloc not in whitelist: From 60f6b8511a7dc296c5f55adc975f8e495e1e5c62 Mon Sep 17 00:00:00 2001 From: Alex Marcel Date: Mon, 5 Apr 2021 22:23:33 +0300 Subject: [PATCH 13/22] Remove traces of python-telegram-handler module --- scdlbot/__main__.py | 2 +- scdlbot/scdlbot.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scdlbot/__main__.py b/scdlbot/__main__.py index 0badaebfc..d92561fd9 100755 --- a/scdlbot/__main__.py +++ b/scdlbot/__main__.py @@ -63,7 +63,7 @@ def main(): cookies_file = os.getenv('COOKIES_FILE', '') scdlbot = ScdlBot(tg_bot_token, tg_bot_api, proxies, - store_chat_id, no_flood_chat_ids, alert_chat_ids, + store_chat_id, no_flood_chat_ids, dl_dir, dl_timeout, max_tg_file_size, max_convert_file_size, chat_storage_file, app_url, serve_audio, cookies_file, source_ips) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 6a967fcfd..637f64c52 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -38,7 +38,7 @@ class ScdlBot: def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies=None, - store_chat_id=None, no_flood_chat_ids=None, alert_chat_ids=None, + store_chat_id=None, no_flood_chat_ids=None, dl_dir="/tmp/scdlbot", dl_timeout=300, max_tg_file_size=45_000_000, max_convert_file_size=80_000_000, chat_storage_file="/tmp/scdlbotdata", app_url=None, serve_audio=False, cookies_file=None, source_ips=None): @@ -71,7 +71,6 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= self.chat_storage = shelve.open(chat_storage_file, writeback=True) for chat_id in no_flood_chat_ids: self.init_chat(chat_id=chat_id, chat_type=Chat.PRIVATE if chat_id > 0 else Chat.SUPERGROUP, flood="no") - self.ALERT_CHAT_IDS = set(alert_chat_ids) if alert_chat_ids else set() self.STORE_CHAT_ID = store_chat_id self.DL_DIR = dl_dir self.COOKIES_DOWNLOAD_FILE = "/tmp/scdlbot_cookies.txt" @@ -370,7 +369,7 @@ def button_query_callback(self, update: Update, context: CallbackContext): if chat_type != Chat.PRIVATE: chat_member_status = chat.get_member(user_id).status if chat_member_status not in [ChatMember.ADMINISTRATOR, - ChatMember.CREATOR] and user_id not in self.ALERT_CHAT_IDS: + ChatMember.CREATOR]: log_and_track("settings_fail") update.callback_query.answer(text="You're not chat admin") return From fbc70eafc227054dddaaeb81039c325779f29c72 Mon Sep 17 00:00:00 2001 From: Nitan Alexandru Marcel Date: Tue, 6 Apr 2021 13:20:45 +0300 Subject: [PATCH 14/22] Direct links might not work outside the machine ip --- scdlbot/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scdlbot/utils.py b/scdlbot/utils.py index 80c6f8139..07b81dbe4 100644 --- a/scdlbot/utils.py +++ b/scdlbot/utils.py @@ -150,4 +150,5 @@ def get_link_text(urls): content_type = "Video" # direct_url = shorten_url(direct_url) link_text += "• {} [Direct Link]({})\n".format(content_type, direct_url) + link_text += "\n*Note:* Final download URLs are only guaranteed to work on the same machine/IP where extracted" return link_text From 09cae3a36dcd8009d80fdf311f4ec85651ef8d5e Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Sun, 18 Dec 2022 14:40:12 +0300 Subject: [PATCH 15/22] cosmetic fix --- render.yaml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/render.yaml b/render.yaml index ef4874068..eaebe4a32 100644 --- a/render.yaml +++ b/render.yaml @@ -9,31 +9,31 @@ services: plan: free # optional; defaults to starter numInstances: 1 envVars: - - key: ALERT_CHAT_IDS # Imported from Heroku app + - key: ALERT_CHAT_IDS value: 1306343 - - key: APP_URL # Imported from Heroku app + - key: APP_URL value: https://scdlbot.herokuapp.com/ - - key: DL_DIR # Imported from Heroku app + - key: DL_DIR value: /tmp/scdlbot - - key: DL_TIMEOUT # Imported from Heroku app + - key: DL_TIMEOUT value: 900 - - key: HOST # Imported from Heroku app + - key: HOST value: 0.0.0.0 - - key: HOSTNAME # Imported from Heroku app + - key: HOSTNAME value: test-heroku - - key: MAX_CONVERT_FILE_SIZE # Imported from Heroku app + - key: MAX_CONVERT_FILE_SIZE value: 300_000_000 - - key: MAX_TG_FILE_SIZE # Imported from Heroku app + - key: MAX_TG_FILE_SIZE value: 45_000_000 - - key: NO_FLOOD_CHAT_IDS # Imported from Heroku app + - key: NO_FLOOD_CHAT_IDS value: -1001108859218,-1001106680201 - - key: STORE_CHAT_ID # Imported from Heroku app + - key: STORE_CHAT_ID value: -172951900 - - key: SYSLOG_ADDRESS # Imported from Heroku app + - key: SYSLOG_ADDRESS value: logs6.papertrailapp.com:54882 - - key: SYSLOG_DEBUG # Imported from Heroku app + - key: SYSLOG_DEBUG value: 1 - - key: USE_WEBHOOK # Imported from Heroku app + - key: USE_WEBHOOK value: 1 - key: WORKERS value: 2 From 4d303f99908a8edf90aedb308e4a510888292a8f Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 00:35:04 +0300 Subject: [PATCH 16/22] restore alerts using python-telegram-handler since they work for me --- .env.sample | 2 + app.json | 4 ++ requirements.txt | 99 +++++++++++++++++++++++---------------------- scdlbot/__main__.py | 7 +++- scdlbot/scdlbot.py | 9 +++-- 5 files changed, 67 insertions(+), 54 deletions(-) diff --git a/.env.sample b/.env.sample index 0102956f9..000c3f170 100644 --- a/.env.sample +++ b/.env.sample @@ -12,6 +12,8 @@ DL_TIMEOUT="300" # Comma-separated chat IDs with no replying and caption spam NO_FLOOD_CHAT_IDS="-10018859218,-1011068201" +ALERT_CHAT_IDS="1265343,3265143" + BIN_PATH="" CHAT_STORAGE="/home/gpchelkin/scdlbotdata" diff --git a/app.json b/app.json index 57f396b7f..e18ee55a0 100644 --- a/app.json +++ b/app.json @@ -43,6 +43,10 @@ "description": "Comma-separated chat IDs with no replying and caption hashtags", "required": false }, + "ALERT_CHAT_IDS": { + "description": "Comma-separated chat IDs with no replying and caption hashtags", + "required": false + }, "BIN_PATH": { "description": "Custom directory where scdl and bandcamp-dl binaries are available", "required": false diff --git a/requirements.txt b/requirements.txt index f4047f469..a814531e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,49 +1,50 @@ -amqp==5.0.6; python_version >= "3.6" -args==0.1.0 -bandcamp-downloader @ git+https://github.com/iheanyi/bandcamp-dl.git@master ; python_version >= "3.4" and python_version < "4.0" -beautifulsoup4==4.9.3; python_version >= "3.4" and python_version < "4.0" -billiard==3.6.4.0; python_version >= "3.6" -boltons==20.2.1 -celery==5.0.5; python_version >= "3.6" -certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" -cffi==1.14.5; python_version >= "3.6" -chardet==4.0.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.5.0" -click-didyoumean==0.0.3; python_version >= "3.6" -click-plugins==1.1.1; python_version >= "3.6" -click-repl==0.1.6; python_version >= "3.6" -click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" -clint==0.5.1 -cryptography==3.4.7; python_version >= "3.6" -decorator==5.0.3; python_version >= "3.5" -demjson==2.2.4; python_version >= "3.4" and python_version < "4.0" -docopt==0.6.2; python_version >= "3.4" and python_version < "4.0" -ffmpeg-python==0.2.0 -future==0.18.2; python_version >= "2.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" -idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" -importlib-metadata==3.10.0; python_version < "3.8" and python_version >= "3.6" -kombu==5.0.2; python_version >= "3.6" -mock==4.0.3; python_version >= "3.6" and python_version < "4.0" -mutagen==1.45.1; python_version >= "3.5" and python_version < "4" -patool==1.12 -plumbum==1.7.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_version > "3.4") -prometheus-client==0.9.0 -prompt-toolkit==3.0.3; python_version >= "3.6" -pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" -pypiwin32==223; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") -python-telegram-bot==12.8 -pytz==2021.1; python_version >= "3.6" -pywin32==300; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") -requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") -scdl @ git+https://github.com/flyingrub/scdl.git@master -six==1.15.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.3.0" and python_version >= "3.6" and python_version < "4.0" -soupsieve==2.2.1; python_version >= "3.6" and python_version < "4.0" -termcolor==1.1.0 -tornado==6.1; python_version >= "3.5" -typing-extensions==3.7.4.3; python_version < "3.8" and python_version >= "3.6" -unicode-slugify==0.1.3; python_version >= "3.4" and python_version < "4.0" -unidecode==1.2.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.4.0" -urllib3==1.26.4; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" -vine==5.0.0; python_version >= "3.6" -wcwidth==0.2.5; python_version >= "3.6" -youtube-dl==2021.4.1 -zipp==3.4.1; python_version < "3.8" and python_version >= "3.6" +amqp==5.0.6; python_version >= "3.6" +args==0.1.0 +bandcamp-downloader @ git+https://github.com/iheanyi/bandcamp-dl.git@master ; python_version >= "3.4" and python_version < "4.0" +beautifulsoup4==4.9.3; python_version >= "3.4" and python_version < "4.0" +billiard==3.6.4.0; python_version >= "3.6" +boltons==20.2.1 +celery==5.0.5; python_version >= "3.6" +certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" +cffi==1.14.5; python_version >= "3.6" +chardet==4.0.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.5.0" +click-didyoumean==0.0.3; python_version >= "3.6" +click-plugins==1.1.1; python_version >= "3.6" +click-repl==0.1.6; python_version >= "3.6" +click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" +clint==0.5.1 +cryptography==3.4.7; python_version >= "3.6" +decorator==5.0.3; python_version >= "3.5" +demjson==2.2.4; python_version >= "3.4" and python_version < "4.0" +docopt==0.6.2; python_version >= "3.4" and python_version < "4.0" +ffmpeg-python==0.2.0 +future==0.18.2; python_version >= "2.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" +idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" +importlib-metadata==3.10.0; python_version < "3.8" and python_version >= "3.6" +kombu==5.0.2; python_version >= "3.6" +mock==4.0.3; python_version >= "3.6" and python_version < "4.0" +mutagen==1.45.1; python_version >= "3.5" and python_version < "4" +patool==1.12 +plumbum==1.7.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_version > "3.4") +prometheus-client==0.9.0 +prompt-toolkit==3.0.3; python_version >= "3.6" +pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" +pypiwin32==223; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") +python-telegram-bot==12.8 +python-telegram-handler==2.2 +pytz==2021.1; python_version >= "3.6" +pywin32==300; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") +requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") +scdl @ git+https://github.com/flyingrub/scdl.git@master +six==1.15.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.3.0" and python_version >= "3.6" and python_version < "4.0" +soupsieve==2.2.1; python_version >= "3.6" and python_version < "4.0" +termcolor==1.1.0 +tornado==6.1; python_version >= "3.5" +typing-extensions==3.7.4.3; python_version < "3.8" and python_version >= "3.6" +unicode-slugify==0.1.3; python_version >= "3.4" and python_version < "4.0" +unidecode==1.2.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.4.0" +urllib3==1.26.4; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" +vine==5.0.0; python_version >= "3.6" +wcwidth==0.2.5; python_version >= "3.6" +youtube-dl==2021.4.1 +zipp==3.4.1; python_version < "3.8" and python_version >= "3.6" diff --git a/scdlbot/__main__.py b/scdlbot/__main__.py index d92561fd9..77501bfd3 100755 --- a/scdlbot/__main__.py +++ b/scdlbot/__main__.py @@ -5,6 +5,7 @@ from logging.handlers import SysLogHandler from prometheus_client import start_http_server +from telegram_handler import TelegramHandler from scdlbot.scdlbot import ScdlBot @@ -17,6 +18,10 @@ logging_handlers.append(console_handler) tg_bot_token = os.environ['TG_BOT_TOKEN'] +alert_chat_ids = list(map(int, os.getenv('ALERT_CHAT_IDS', '0').split(','))) +telegram_handler = TelegramHandler(token=tg_bot_token, chat_id=str(alert_chat_ids[0])) +telegram_handler.setLevel(logging.WARNING) +logging_handlers.append(telegram_handler) syslog_debug = bool(int(os.getenv('SYSLOG_DEBUG', '0'))) syslog_logging_level = logging.DEBUG if syslog_debug else logging.INFO @@ -63,7 +68,7 @@ def main(): cookies_file = os.getenv('COOKIES_FILE', '') scdlbot = ScdlBot(tg_bot_token, tg_bot_api, proxies, - store_chat_id, no_flood_chat_ids, + store_chat_id, no_flood_chat_ids, alert_chat_ids, dl_dir, dl_timeout, max_tg_file_size, max_convert_file_size, chat_storage_file, app_url, serve_audio, cookies_file, source_ips) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 637f64c52..04132821b 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -38,7 +38,7 @@ class ScdlBot: def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies=None, - store_chat_id=None, no_flood_chat_ids=None, + store_chat_id=None, no_flood_chat_ids=None, alert_chat_ids=None, dl_dir="/tmp/scdlbot", dl_timeout=300, max_tg_file_size=45_000_000, max_convert_file_size=80_000_000, chat_storage_file="/tmp/scdlbotdata", app_url=None, serve_audio=False, cookies_file=None, source_ips=None): @@ -71,6 +71,7 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= self.chat_storage = shelve.open(chat_storage_file, writeback=True) for chat_id in no_flood_chat_ids: self.init_chat(chat_id=chat_id, chat_type=Chat.PRIVATE if chat_id > 0 else Chat.SUPERGROUP, flood="no") + self.ALERT_CHAT_IDS = set(alert_chat_ids) if alert_chat_ids else set() self.STORE_CHAT_ID = store_chat_id self.DL_DIR = dl_dir self.COOKIES_DOWNLOAD_FILE = "/tmp/scdlbot_cookies.txt" @@ -369,7 +370,7 @@ def button_query_callback(self, update: Update, context: CallbackContext): if chat_type != Chat.PRIVATE: chat_member_status = chat.get_member(user_id).status if chat_member_status not in [ChatMember.ADMINISTRATOR, - ChatMember.CREATOR]: + ChatMember.CREATOR] and user_id not in self.ALERT_CHAT_IDS: log_and_track("settings_fail") update.callback_query.answer(text="You're not chat admin") return @@ -527,7 +528,7 @@ def url_valid(self, url): if netloc in telegram_domains: return False return self.url_allowed(url) - + def url_allowed(self, url): # Example export BLACKLIST_DOMS = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks" whitelist = set(x for x in os.environ.get("WHITELIST_DOMS", "").split()) @@ -772,7 +773,7 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message bot.delete_message(chat_id=chat_id, message_id=wait_message_id) except: pass - + @run_async def blacklist_whitelist(self, update: Update, context: CallbackContext): chat_id = update.message.chat_id From e0fe0d30a62270a55f3f9ce5e22976f214741d1a Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 00:50:55 +0300 Subject: [PATCH 17/22] cosmetic fix --- app.json | 2 +- scdlbot/scdlbot.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/app.json b/app.json index e18ee55a0..4735e9d1f 100644 --- a/app.json +++ b/app.json @@ -107,7 +107,7 @@ "BLACKLIST_DOMS": { "description": "A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", "required": false - }, + } }, "formation": { "web": { diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 04132821b..b9e73c096 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -354,7 +354,6 @@ def common_command_callback(self, update: Update, context: CallbackContext): self.cleanup_chat(chat_id) def button_query_callback(self, update: Update, context: CallbackContext): - btn_msg = update.callback_query.message self.init_chat(btn_msg) user_id = update.callback_query.from_user.id @@ -629,9 +628,12 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message { 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', - 'preferredquality': '312', + 'preferredquality': '320', + }, + { + 'key': 'FFmpegMetadata', }, - {'key': 'FFmpegMetadata',}, + # {'key': 'EmbedThumbnail'}, ], } host = urlparse(url).hostname From 1e9eafb64a038d75a65d904a4a75ba9d748921fa Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 00:54:21 +0300 Subject: [PATCH 18/22] resolve conflict --- requirements.txt | 96 +++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/requirements.txt b/requirements.txt index a814531e8..5b7026a61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,50 +1,46 @@ -amqp==5.0.6; python_version >= "3.6" -args==0.1.0 -bandcamp-downloader @ git+https://github.com/iheanyi/bandcamp-dl.git@master ; python_version >= "3.4" and python_version < "4.0" -beautifulsoup4==4.9.3; python_version >= "3.4" and python_version < "4.0" -billiard==3.6.4.0; python_version >= "3.6" -boltons==20.2.1 -celery==5.0.5; python_version >= "3.6" -certifi==2020.12.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" -cffi==1.14.5; python_version >= "3.6" -chardet==4.0.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.5.0" -click-didyoumean==0.0.3; python_version >= "3.6" -click-plugins==1.1.1; python_version >= "3.6" -click-repl==0.1.6; python_version >= "3.6" -click==7.1.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" -clint==0.5.1 -cryptography==3.4.7; python_version >= "3.6" -decorator==5.0.3; python_version >= "3.5" -demjson==2.2.4; python_version >= "3.4" and python_version < "4.0" -docopt==0.6.2; python_version >= "3.4" and python_version < "4.0" -ffmpeg-python==0.2.0 -future==0.18.2; python_version >= "2.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" -idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" -importlib-metadata==3.10.0; python_version < "3.8" and python_version >= "3.6" -kombu==5.0.2; python_version >= "3.6" -mock==4.0.3; python_version >= "3.6" and python_version < "4.0" -mutagen==1.45.1; python_version >= "3.5" and python_version < "4" -patool==1.12 -plumbum==1.7.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_version > "3.4") -prometheus-client==0.9.0 -prompt-toolkit==3.0.3; python_version >= "3.6" -pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" -pypiwin32==223; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") -python-telegram-bot==12.8 -python-telegram-handler==2.2 -pytz==2021.1; python_version >= "3.6" -pywin32==300; platform_system == "Windows" and platform_python_implementation != "PyPy" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_version > "3.4") -requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") -scdl @ git+https://github.com/flyingrub/scdl.git@master -six==1.15.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.3.0" and python_version >= "3.6" and python_version < "4.0" -soupsieve==2.2.1; python_version >= "3.6" and python_version < "4.0" -termcolor==1.1.0 -tornado==6.1; python_version >= "3.5" -typing-extensions==3.7.4.3; python_version < "3.8" and python_version >= "3.6" -unicode-slugify==0.1.3; python_version >= "3.4" and python_version < "4.0" -unidecode==1.2.0; python_version >= "3.4" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.4" and python_version < "4.0" and python_full_version >= "3.4.0" -urllib3==1.26.4; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" -vine==5.0.0; python_version >= "3.6" -wcwidth==0.2.5; python_version >= "3.6" -youtube-dl==2021.4.1 -zipp==3.4.1; python_version < "3.8" and python_version >= "3.6" +args==0.1.0 ; python_version >= "3.9" and python_version < "4.0" +bandcamp-downloader==0.0.13 ; python_version >= "3.9" and python_version < "4.0" +beautifulsoup4==4.11.1 ; python_version >= "3.9" and python_version < "4.0" +boltons==21.0.0 ; python_version >= "3.9" and python_version < "4.0" +brotli==1.0.9 ; python_version >= "3.9" and python_version < "4.0" and platform_python_implementation == "CPython" +brotlicffi==1.0.9.2 ; python_version >= "3.9" and python_version < "4.0" and platform_python_implementation != "CPython" +certifi==2022.12.7 ; python_version >= "3.9" and python_version < "4.0" +cffi==1.15.1 ; python_version >= "3.9" and python_version < "4.0" +chardet==5.1.0 ; python_version >= "3.9" and python_version < "4.0" +charset-normalizer==2.1.1 ; python_version >= "3.9" and python_version < "4" +clint==0.5.1 ; python_version >= "3.9" and python_version < "4.0" +cryptography==38.0.4 ; python_version >= "3.9" and python_version < "4.0" +dacite==1.6.0 ; python_version >= "3.9" and python_version < "4.0" +decorator==5.1.1 ; python_version >= "3.9" and python_version < "4.0" +demjson3==3.0.6 ; python_version >= "3.9" and python_version < "4.0" +docopt==0.6.2 ; python_version >= "3.9" and python_version < "4.0" +ffmpeg-python==0.2.0 ; python_version >= "3.9" and python_version < "4.0" +future==0.18.2 ; python_version >= "3.9" and python_version < "4.0" +idna==3.4 ; python_version >= "3.9" and python_version < "4" +lxml==4.9.2 ; python_version >= "3.9" and python_version < "4.0" +mock==4.0.3 ; python_version >= "3.9" and python_version < "4.0" +mutagen==1.46.0 ; python_version >= "3.9" and python_version < "4.0" +pathvalidate==2.5.2 ; python_version >= "3.9" and python_version < "4.0" +patool==1.12 ; python_version >= "3.9" and python_version < "4.0" +plumbum==1.8.0 ; python_version >= "3.9" and python_version < "4.0" +prometheus-client==0.15.0 ; python_version >= "3.9" and python_version < "4.0" +pycparser==2.21 ; python_version >= "3.9" and python_version < "4.0" +pycryptodomex==3.16.0 ; python_version >= "3.9" and python_version < "4.0" +pysocks==1.7.1 ; python_version >= "3.9" and python_version < "4" +python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "4.0" +python-telegram-bot==12.8 ; python_version >= "3.9" and python_version < "4.0" +python-telegram-handler==2.2.1 ; python_version >= "3.9" and python_version < "4.0" +pywin32==305 ; platform_system == "Windows" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "4.0" +requests==2.28.1 ; python_version >= "3.9" and python_version < "4" +requests[socks]==2.28.1 ; python_version >= "3.9" and python_version < "4" +scdl==2.7.3 ; python_version >= "3.9" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" +soundcloud-v2==1.3.1 ; python_version >= "3.9" and python_version < "4.0" +soupsieve==2.3.2.post1 ; python_version >= "3.9" and python_version < "4.0" +termcolor==2.1.1 ; python_version >= "3.9" and python_version < "4.0" +tornado==6.2 ; python_version >= "3.9" and python_version < "4.0" +unicode-slugify==0.1.5 ; python_version >= "3.9" and python_version < "4.0" +unidecode==1.3.6 ; python_version >= "3.9" and python_version < "4.0" +urllib3==1.26.13 ; python_version >= "3.9" and python_version < "4" +websockets==10.4 ; python_version >= "3.9" and python_version < "4.0" +yt-dlp==2022.11.11 ; python_version >= "3.9" and python_version < "4.0" From 4627ccd80194dc8b10787bf4ac55a56ca967a2a3 Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 01:06:44 +0300 Subject: [PATCH 19/22] resolve conflict (merge master code) --- scdlbot/scdlbot.py | 814 ++++++++++++++++++++++----------------------- 1 file changed, 404 insertions(+), 410 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index b9e73c096..4d7b863bc 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -3,7 +3,6 @@ """Main module.""" import gc -import os import pathlib import random import shelve @@ -20,28 +19,38 @@ from mutagen.id3 import ID3 from mutagen.mp3 import EasyMP3 as MP3 from prometheus_client import Summary -from telegram import (Message, Chat, ChatMember, MessageEntity, ChatAction, InlineKeyboardMarkup, - InlineKeyboardButton, InlineQueryResultAudio, Update) -from telegram.error import (TelegramError, Unauthorized, BadRequest, - TimedOut, ChatMigrated, NetworkError) -from telegram.ext import (Updater, CommandHandler, MessageHandler, Filters, InlineQueryHandler, - CallbackQueryHandler, CallbackContext) +from telegram import Chat, ChatAction, ChatMember, InlineKeyboardButton, InlineKeyboardMarkup, Message, MessageEntity, Update +from telegram.error import BadRequest, ChatMigrated, NetworkError, TelegramError, TimedOut, Unauthorized +from telegram.ext import CallbackContext, CallbackQueryHandler, CommandHandler, Filters, MessageHandler, Updater from telegram.ext.dispatcher import run_async from scdlbot.utils import * logger = logging.getLogger(__name__) -REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request') +REQUEST_TIME = Summary("request_processing_seconds", "Time spent processing request") class ScdlBot: - - def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies=None, - store_chat_id=None, no_flood_chat_ids=None, alert_chat_ids=None, - dl_dir="/tmp/scdlbot", dl_timeout=300, max_tg_file_size=45_000_000, max_convert_file_size=80_000_000, - chat_storage_file="/tmp/scdlbotdata", app_url=None, - serve_audio=False, cookies_file=None, source_ips=None): + def __init__( + self, + tg_bot_token, + tg_bot_api="https://api.telegram.org", + proxies=None, + store_chat_id=None, + no_flood_chat_ids=None, + alert_chat_ids=None, + dl_dir="/tmp/scdlbot", + dl_timeout=300, + max_tg_file_size=45_000_000, + max_convert_file_size=80_000_000, + chat_storage_file="/tmp/scdlbotdata", + app_url=None, + serve_audio=False, + cookies_file=None, + source_ips=None, + workers=4, + ): self.SITES = { "sc": "soundcloud", "scapi": "api.soundcloud", @@ -56,17 +65,16 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= self.SERVE_AUDIO = serve_audio if self.SERVE_AUDIO: self.MAX_TG_FILE_SIZE = 19_000_000 - self.HELP_TEXT = get_response_text('help.tg.md') - self.SETTINGS_TEXT = get_response_text('settings.tg.md') - self.DL_TIMEOUT_TEXT = get_response_text('dl_timeout.txt').format(self.DL_TIMEOUT // 60) - self.WAIT_BIT_TEXT = [get_response_text('wait_bit.txt'), get_response_text('wait_beat.txt'), - get_response_text('wait_beet.txt')] - self.NO_AUDIO_TEXT = get_response_text('no_audio.txt') - self.NO_URLS_TEXT = get_response_text('no_urls.txt') - self.OLG_MSG_TEXT = get_response_text('old_msg.txt') - self.REGION_RESTRICTION_TEXT = get_response_text('region_restriction.txt') - self.DIRECT_RESTRICTION_TEXT = get_response_text('direct_restriction.txt') - self.LIVE_RESTRICTION_TEXT = get_response_text('live_restriction.txt') + self.HELP_TEXT = get_response_text("help.tg.md") + self.SETTINGS_TEXT = get_response_text("settings.tg.md") + self.DL_TIMEOUT_TEXT = get_response_text("dl_timeout.txt").format(self.DL_TIMEOUT // 60) + self.WAIT_BIT_TEXT = [get_response_text("wait_bit.txt"), get_response_text("wait_beat.txt"), get_response_text("wait_beet.txt")] + self.NO_AUDIO_TEXT = get_response_text("no_audio.txt") + self.NO_URLS_TEXT = get_response_text("no_urls.txt") + self.OLD_MSG_TEXT = get_response_text("old_msg.txt") + self.REGION_RESTRICTION_TEXT = get_response_text("region_restriction.txt") + self.DIRECT_RESTRICTION_TEXT = get_response_text("direct_restriction.txt") + self.LIVE_RESTRICTION_TEXT = get_response_text("live_restriction.txt") # self.chat_storage = {} self.chat_storage = shelve.open(chat_storage_file, writeback=True) for chat_id in no_flood_chat_ids: @@ -79,6 +87,7 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= self.source_ips = source_ips # https://yandex.com/support/music-app-ios/search-and-listen/listening-abroad.html self.cookies_file = cookies_file + self.workers = workers # if sc_auth_token: # config = configparser.ConfigParser() @@ -91,37 +100,34 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= # with open(config_path, 'w') as config_file: # config.write(config_file) - self.updater = Updater(token=tg_bot_token, base_url=f"{self.TG_BOT_API}/bot", use_context=True, base_file_url=f"{self.TG_BOT_API}/file/bot") + self.updater = Updater(token=tg_bot_token, base_url=f"{self.TG_BOT_API}/bot", use_context=True, base_file_url=f"{self.TG_BOT_API}/file/bot", workers=self.workers) dispatcher = self.updater.dispatcher - start_command_handler = CommandHandler('start', self.help_command_callback) + start_command_handler = CommandHandler("start", self.help_command_callback) dispatcher.add_handler(start_command_handler) - help_command_handler = CommandHandler('help', self.help_command_callback) + help_command_handler = CommandHandler("help", self.help_command_callback) dispatcher.add_handler(help_command_handler) - settings_command_handler = CommandHandler('settings', self.settings_command_callback) + settings_command_handler = CommandHandler("settings", self.settings_command_callback) dispatcher.add_handler(settings_command_handler) - dl_command_handler = CommandHandler('dl', self.common_command_callback, - filters=~Filters.update.edited_message & ~Filters.forwarded) + dl_command_handler = CommandHandler("dl", self.common_command_callback, filters=~Filters.update.edited_message & ~Filters.forwarded) dispatcher.add_handler(dl_command_handler) - link_command_handler = CommandHandler('link', self.common_command_callback, - filters=~Filters.update.edited_message & ~Filters.forwarded) + link_command_handler = CommandHandler("link", self.common_command_callback, filters=~Filters.update.edited_message & ~Filters.forwarded) dispatcher.add_handler(link_command_handler) - message_with_links_handler = MessageHandler(~Filters.update.edited_message & ~Filters.command & - ((Filters.text & (Filters.entity(MessageEntity.URL) | - Filters.entity(MessageEntity.TEXT_LINK))) | - (Filters.caption & (Filters.caption_entity(MessageEntity.URL) | - Filters.caption_entity( - MessageEntity.TEXT_LINK)))), - self.common_command_callback) + message_with_links_handler = MessageHandler( + ~Filters.update.edited_message + & ~Filters.command + & ( + (Filters.text & (Filters.entity(MessageEntity.URL) | Filters.entity(MessageEntity.TEXT_LINK))) + | (Filters.caption & (Filters.caption_entity(MessageEntity.URL) | Filters.caption_entity(MessageEntity.TEXT_LINK))) + ), + self.common_command_callback, + ) dispatcher.add_handler(message_with_links_handler) button_query_handler = CallbackQueryHandler(self.button_query_callback) dispatcher.add_handler(button_query_handler) - inline_query_handler = InlineQueryHandler(self.inline_query_callback) - dispatcher.add_handler(inline_query_handler) - unknown_handler = MessageHandler(Filters.command, self.unknown_command_callback) dispatcher.add_handler(unknown_handler) @@ -132,20 +138,15 @@ def __init__(self, tg_bot_token, tg_bot_api="https://api.telegram.org", proxies= self.bot_username = self.updater.bot.get_me().username self.RANT_TEXT_PRIVATE = "Read /help to learn how to use me" - self.RANT_TEXT_PUBLIC = "[Start me in PM to read help and learn how to use me](t.me/{}?start=1)".format( - self.bot_username) + self.RANT_TEXT_PUBLIC = "[Start me in PM to read help and learn how to use me](t.me/{}?start=1)".format(self.bot_username) - def start(self, use_webhook=False, webhook_host="127.0.0.1", webhook_port=None, cert_file=None, cert_key_file=None, - url_path="scdlbot"): + def start(self, use_webhook=False, webhook_host="127.0.0.1", webhook_port=None, cert_file=None, cert_key_file=None, url_path="scdlbot"): if use_webhook: - self.updater.start_webhook(listen=webhook_host, - port=webhook_port, - url_path=url_path) + self.updater.start_webhook(listen=webhook_host, port=webhook_port, url_path=url_path) # cert=cert_file if cert_file else None, # key=cert_key_file if cert_key_file else None, # webhook_url=urljoin(app_url, url_path)) - self.updater.bot.set_webhook(url=urljoin(self.APP_URL, url_path), - certificate=open(cert_file, 'rb') if cert_file else None) + self.updater.bot.set_webhook(url=urljoin(self.APP_URL, url_path), certificate=open(cert_file, "rb") if cert_file else None) else: self.updater.start_polling() logger.warning("Bot started") @@ -160,22 +161,22 @@ def error_callback(self, update: Update, context: CallbackContext): # skipcq: P raise context.error except Unauthorized: # remove update.message.chat_id from conversation list - logger.debug('Update {} caused Unauthorized error: {}'.format(update, context.error)) + logger.debug("Update {} caused Unauthorized error: {}".format(update, context.error)) except BadRequest: # handle malformed requests - read more below! - logger.debug('Update {} caused BadRequest error: {}'.format(update, context.error)) + logger.debug("Update {} caused BadRequest error: {}".format(update, context.error)) except TimedOut: # handle slow connection problems - logger.debug('Update {} caused TimedOut error: {}'.format(update, context.error)) + logger.debug("Update {} caused TimedOut error: {}".format(update, context.error)) except NetworkError: # handle other connection problems - logger.debug('Update {} caused NetworkError: {}'.format(update, context.error)) + logger.debug("Update {} caused NetworkError: {}".format(update, context.error)) except ChatMigrated as e: # the chat_id of a group has changed, use e.new_chat_id instead - logger.debug('Update {} caused ChatMigrated error: {}'.format(update, context.error)) + logger.debug("Update {} caused ChatMigrated error: {}".format(update, context.error)) except TelegramError: # handle all other telegram related errors - logger.debug('Update {} caused TelegramError: {}'.format(update, context.error)) + logger.debug("Update {} caused TelegramError: {}".format(update, context.error)) def init_chat(self, message=None, chat_id=None, chat_type=None, flood="yes"): if message: @@ -203,14 +204,13 @@ def cleanup_chat(self, chat_id): chat_msgs = self.chat_storage[str(chat_id)].copy() for msg_id in chat_msgs: if msg_id != "settings": - timedelta = datetime.now() - self.chat_storage[str(chat_id)][msg_id]["message"].date + timedelta = datetime.now().replace(tzinfo=None) - self.chat_storage[str(chat_id)][msg_id]["message"].date.replace(tzinfo=None) if timedelta.days > 0: self.chat_storage[str(chat_id)].pop(msg_id) self.chat_storage.sync() def rant_and_cleanup(self, bot, chat_id, rant_text, reply_to_message_id=None): - rant_msg = bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=rant_text, parse_mode='Markdown', disable_web_page_preview=True) + rant_msg = bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=rant_text, parse_mode="Markdown", disable_web_page_preview=True) flood = self.chat_storage[str(chat_id)]["settings"]["flood"] if flood == "no": rant_msgs = self.chat_storage[str(chat_id)]["settings"]["rant_msg_ids"].copy() @@ -238,8 +238,7 @@ def help_command_callback(self, update: Update, context: CallbackContext): if chat_type != Chat.PRIVATE and flood == "no": self.rant_and_cleanup(context.bot, chat_id, self.RANT_TEXT_PUBLIC, reply_to_message_id=reply_to_message_id) else: - context.bot.send_message(chat_id=chat_id, text=self.HELP_TEXT, - parse_mode='Markdown', disable_web_page_preview=True) + context.bot.send_message(chat_id=chat_id, text=self.HELP_TEXT, parse_mode="Markdown", disable_web_page_preview=True) def get_wait_text(self): return random.choice(self.WAIT_BIT_TEXT) @@ -249,16 +248,11 @@ def get_settings_inline_keyboard(self, chat_id): flood = self.chat_storage[str(chat_id)]["settings"]["flood"] emoji_yes = "✅" emoji_no = "❌" - button_dl = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "dl" else emoji_no, "Download"]), - callback_data=" ".join(["settings", "dl"])) - button_link = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "link" else emoji_no, "Links"]), - callback_data=" ".join(["settings", "link"])) - button_ask = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "ask" else emoji_no, "Ask"]), - callback_data=" ".join(["settings", "ask"])) - button_flood = InlineKeyboardButton(text=" ".join([emoji_yes if flood == "yes" else emoji_no, "Captions"]), - callback_data=" ".join(["settings", "flood"])) - button_close = InlineKeyboardButton(text=" ".join([emoji_no, "Close settings"]), - callback_data=" ".join(["settings", "close"])) + button_dl = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "dl" else emoji_no, "Download"]), callback_data=" ".join(["settings", "dl"])) + button_link = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "link" else emoji_no, "Links"]), callback_data=" ".join(["settings", "link"])) + button_ask = InlineKeyboardButton(text=" ".join([emoji_yes if mode == "ask" else emoji_no, "Ask"]), callback_data=" ".join(["settings", "ask"])) + button_flood = InlineKeyboardButton(text=" ".join([emoji_yes if flood == "yes" else emoji_no, "Captions"]), callback_data=" ".join(["settings", "flood"])) + button_close = InlineKeyboardButton(text=" ".join([emoji_no, "Close settings"]), callback_data=" ".join(["settings", "close"])) inline_keyboard = InlineKeyboardMarkup([[button_dl, button_link, button_ask], [button_flood, button_close]]) return inline_keyboard @@ -266,9 +260,7 @@ def settings_command_callback(self, update: Update, context: CallbackContext): self.init_chat(update.message) log_and_track("settings") chat_id = update.message.chat_id - context.bot.send_message(chat_id=chat_id, parse_mode='Markdown', - reply_markup=self.get_settings_inline_keyboard(chat_id), - text=self.SETTINGS_TEXT) + context.bot.send_message(chat_id=chat_id, parse_mode="Markdown", reply_markup=self.get_settings_inline_keyboard(chat_id), text=self.SETTINGS_TEXT) def common_command_callback(self, update: Update, context: CallbackContext): self.init_chat(update.message) @@ -279,6 +271,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): chat_type = update.message.chat.type reply_to_message_id = update.message.message_id command_entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) + command_passed = False if not command_entities: command_passed = False # if no command then it is just a message and use default mode @@ -294,64 +287,25 @@ def common_command_callback(self, update: Update, context: CallbackContext): mode = "dl" if command_passed and not context.args: rant_text = self.RANT_TEXT_PRIVATE if chat_type == Chat.PRIVATE else self.RANT_TEXT_PUBLIC - rant_text += "\nYou can simply send message with links (to download) OR command as `/{} `.".format( - mode) + rant_text += "\nYou can simply send message with links (to download) OR command as `/{} `.".format(mode) self.rant_and_cleanup(context.bot, chat_id, rant_text, reply_to_message_id=reply_to_message_id) return - # apologize and send TYPING: always in PM and only when it's command in non-PM - apologize = chat_type == Chat.PRIVATE or command_passed - if apologize: - context.bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING) + event_name = ("{}_cmd".format(mode)) if command_passed else ("{}_msg".format(mode)) + log_and_track(event_name, update.message) + + apologize = False + # apologize and send TYPING: always in PM, only when it's command in non-PM + if chat_type == Chat.PRIVATE or command_passed: + apologize = True source_ip = None proxy = None if self.source_ips: source_ip = random.choice(self.source_ips) if self.proxies: proxy = random.choice(self.proxies) - # TODO find working IP? - urls = self.prepare_urls(msg_or_text=update.message, - direct_urls=(mode == "link"), - source_ip=source_ip, proxy=proxy) - logger.debug(urls) - if not urls: - if apologize: - context.bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.NO_URLS_TEXT, parse_mode='Markdown') - else: - event_name = ("{}_cmd".format(mode)) if command_passed else ("{}_msg".format(mode)) - log_and_track(event_name, update.message) - if mode == "dl": - wait_message = context.bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - parse_mode='Markdown', text=get_italic(self.get_wait_text())) - for url in urls: - self.download_url_and_send(context.bot, url, urls[url], chat_id=chat_id, - reply_to_message_id=reply_to_message_id, - wait_message_id=wait_message.message_id, - source_ip=source_ip, proxy=proxy) - elif mode == "link": - wait_message = context.bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - parse_mode='Markdown', text=get_italic(self.get_wait_text())) - - link_text = get_link_text(urls) - context.bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - parse_mode='Markdown', disable_web_page_preview=True, - text=link_text if link_text else self.NO_URLS_TEXT) - context.bot.delete_message(chat_id=chat_id, message_id=wait_message.message_id) - elif mode == "ask": - # ask: always in PM and only if good urls exist in non-PM - if chat_type == Chat.PRIVATE or "http" in " ".join(urls.values()): - orig_msg_id = str(reply_to_message_id) - self.chat_storage[str(chat_id)][orig_msg_id] = {"message": update.message, "urls": urls, - "source_ip": source_ip, "proxy": proxy} - question = "🎶 links found, what to do?" - button_dl = InlineKeyboardButton(text="✅ Download", callback_data=" ".join([orig_msg_id, "dl"])) - button_link = InlineKeyboardButton(text="❇️ Links", - callback_data=" ".join([orig_msg_id, "link"])) - button_cancel = InlineKeyboardButton(text="❎", callback_data=" ".join([orig_msg_id, "nodl"])) - inline_keyboard = InlineKeyboardMarkup([[button_dl, button_link, button_cancel]]) - context.bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - reply_markup=inline_keyboard, text=question) - self.cleanup_chat(chat_id) + self.prepare_urls( + message=update.message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot + ) def button_query_callback(self, update: Update, context: CallbackContext): btn_msg = update.callback_query.message @@ -368,8 +322,7 @@ def button_query_callback(self, update: Update, context: CallbackContext): if orig_msg_id == "settings": if chat_type != Chat.PRIVATE: chat_member_status = chat.get_member(user_id).status - if chat_member_status not in [ChatMember.ADMINISTRATOR, - ChatMember.CREATOR] and user_id not in self.ALERT_CHAT_IDS: + if chat_member_status not in [ChatMember.ADMINISTRATOR, ChatMember.CREATOR] and user_id not in self.ALERT_CHAT_IDS: log_and_track("settings_fail") update.callback_query.answer(text="You're not chat admin") return @@ -390,9 +343,7 @@ def button_query_callback(self, update: Update, context: CallbackContext): if setting_changed: self.chat_storage.sync() update.callback_query.answer(text="Settings changed") - update.callback_query.edit_message_reply_markup(parse_mode='Markdown', - reply_markup=self.get_settings_inline_keyboard( - chat_id)) + update.callback_query.edit_message_reply_markup(parse_mode="Markdown", reply_markup=self.get_settings_inline_keyboard(chat_id)) else: update.callback_query.answer(text="Settings not changed") @@ -405,50 +356,34 @@ def button_query_callback(self, update: Update, context: CallbackContext): log_and_track("{}_msg".format(action), orig_msg) if action == "dl": update.callback_query.answer(text=self.get_wait_text()) - wait_message = update.callback_query.edit_message_text(parse_mode='Markdown', - text=get_italic(self.get_wait_text())) + wait_message = update.callback_query.edit_message_text(parse_mode="Markdown", text=get_italic(self.get_wait_text())) for url in urls: - self.download_url_and_send(context.bot, url, urls[url], chat_id=chat_id, - reply_to_message_id=orig_msg_id, - wait_message_id=wait_message.message_id, - source_ip=source_ip, proxy=proxy) + self.download_url_and_send( + context.bot, url, urls[url], chat_id=chat_id, reply_to_message_id=orig_msg_id, wait_message_id=wait_message.message_id, source_ip=source_ip, proxy=proxy + ) elif action == "link": - update.callback_query.answer(text=self.get_wait_text()) - wait_message = update.callback_query.edit_message_text(parse_mode='Markdown', - text=get_italic(self.get_wait_text())) - urls = self.prepare_urls(urls.keys(), direct_urls=True, source_ip=source_ip, proxy=proxy) - link_text = get_link_text(urls) - context.bot.send_message(chat_id=chat_id, reply_to_message_id=orig_msg_id, - parse_mode='Markdown', disable_web_page_preview=True, - text=link_text if link_text else self.NO_URLS_TEXT) - context.bot.delete_message(chat_id=chat_id, message_id=wait_message.message_id) + context.bot.send_message(chat_id=chat_id, reply_to_message_id=orig_msg_id, parse_mode="Markdown", disable_web_page_preview=True, text=get_link_text(urls)) + context.bot.delete_message(chat_id=chat_id, message_id=btn_msg_id) elif action == "nodl": context.bot.delete_message(chat_id=chat_id, message_id=btn_msg_id) else: - update.callback_query.answer(text=self.OLG_MSG_TEXT) + update.callback_query.answer(text=self.OLD_MSG_TEXT) context.bot.delete_message(chat_id=chat_id, message_id=btn_msg_id) - def inline_query_callback(self, update: Update, context: CallbackContext): - log_and_track("link_inline") - inline_query_id = update.inline_query.id - text = update.inline_query.query - results = [] - urls = self.prepare_urls(msg_or_text=text, direct_urls=True) - for url in urls: - for direct_url in urls[url].splitlines(): # TODO: fix non-mp3 and allow only sc/bc - logger.debug(direct_url) - results.append( - InlineQueryResultAudio(id=str(uuid4()), audio_url=direct_url, title="FAST_INLINE_DOWNLOAD")) - try: - context.bot.answer_inline_query(inline_query_id, results) - except: - pass + @REQUEST_TIME.time() + @run_async + def prepare_urls(self, message, mode=None, source_ip=None, proxy=None, apologize=None, chat_id=None, reply_to_message_id=None, bot=None): + direct_urls = False + if mode == "link": + direct_urls = True + + if apologize: + bot.send_chat_action(chat_id=chat_id, action=ChatAction.TYPING) - def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=None): - if isinstance(msg_or_text, Message): + if isinstance(message, Message): urls = [] - url_entities = msg_or_text.parse_entities(types=[MessageEntity.URL]) - url_caption_entities = msg_or_text.parse_caption_entities(types=[MessageEntity.URL]) + url_entities = message.parse_entities(types=[MessageEntity.URL]) + url_caption_entities = message.parse_caption_entities(types=[MessageEntity.URL]) url_entities.update(url_caption_entities) for entity in url_entities: url_str = url_entities[entity] @@ -459,8 +394,8 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non urls.append(URL(url_str)) else: logger.debug("Entry URL not valid or blacklisted: %s", url_str) - text_link_entities = msg_or_text.parse_entities(types=[MessageEntity.TEXT_LINK]) - text_link_caption_entities = msg_or_text.parse_caption_entities(types=[MessageEntity.TEXT_LINK]) + text_link_entities = message.parse_entities(types=[MessageEntity.TEXT_LINK]) + text_link_caption_entities = message.parse_caption_entities(types=[MessageEntity.TEXT_LINK]) text_link_entities.update(text_link_caption_entities) for entity in text_link_entities: url_str = entity.url @@ -470,55 +405,84 @@ def prepare_urls(self, msg_or_text, direct_urls=False, source_ip=None, proxy=Non else: logger.debug("Entry URL not valid or blacklisted: %s", url_str) else: - all_links = find_all_links(msg_or_text, default_scheme="http") + all_links = find_all_links(message, default_scheme="http") urls = [link for link in all_links if self.url_valid(link)] + logger.debug(urls) + urls_dict = {} for url_item in urls: - url = url_item - # unshorten soundcloud.app.goo.gl and other links, but not tiktok: - if "tiktok" not in url_item.host: + # unshorten soundcloud.app.goo.gl and other links, but not tiktok or instagram or youtube: + if "tiktok" in url_item.host or "instagr" in url_item.host or self.SITES["yt"] in url_item.host: + url = url_item + else: try: - url = URL(requests.head(url_item, allow_redirects=True).url) + url = URL( + requests.head( + url_item, + allow_redirects=True, + timeout=5, + proxies=dict(http=proxy, https=proxy), + headers={"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:105.0) Gecko/20100101 Firefox/105.0"}, + ).url + ) except: - pass + url = url_item url_text = url.to_text(True) - #FIXME crutch: + # FIXME crutch: url_text = url_text.replace("m.soundcloud.com", "soundcloud.com") url_parts_num = len([part for part in url.path_parts if part]) try: if ( - # SoundCloud: tracks, sets and widget pages, no /you/ pages #TODO private sets are 5 - (self.SITES["sc"] in url.host and (2 <= url_parts_num <= 4 or self.SITES["scapi"] in url_text) and ( - not "you" in url.path_parts)) or + # SoundCloud: tracks, sets and widget pages, no /you/ pages # TODO private sets are 5 + (self.SITES["sc"] in url.host and (2 <= url_parts_num <= 4 or self.SITES["scapi"] in url_text) and (not "you" in url.path_parts)) + or # Bandcamp: tracks and albums - (self.SITES["bc"] in url.host and (2 <= url_parts_num <= 2)) or + (self.SITES["bc"] in url.host and (2 <= url_parts_num <= 2)) + or # YouTube: videos and playlists - (self.SITES["yt"] in url.host and ( - "youtu.be" in url.host or "watch" in url.path or "playlist" in url.path)) + (self.SITES["yt"] in url.host and ("youtu.be" in url.host or "watch" in url.path or "playlist" in url.path)) ): if direct_urls or self.SITES["yt"] in url.host: - urls_dict[url_text] = get_direct_urls(url_text, self.cookies_file, self.COOKIES_DOWNLOAD_FILE, - source_ip, proxy) + urls_dict[url_text] = get_direct_urls(url_text, self.cookies_file, self.COOKIES_DOWNLOAD_FILE, source_ip, proxy) else: urls_dict[url_text] = "http" elif not any((site in url.host for site in self.SITES.values())): - urls_dict[url_text] = get_direct_urls(url_text, self.cookies_file, self.COOKIES_DOWNLOAD_FILE, - source_ip, proxy) + urls_dict[url_text] = get_direct_urls(url_text, self.cookies_file, self.COOKIES_DOWNLOAD_FILE, source_ip, proxy) except ProcessExecutionError: logger.debug("youtube-dl get-url failed: %s", url_text) except URLError as exc: urls_dict[url_text] = exc.status - return urls_dict + + logger.debug(urls_dict) + if not urls_dict and apologize: + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.NO_URLS_TEXT, parse_mode="Markdown") + return + + if mode == "dl": + wait_message = bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, parse_mode="Markdown", text=get_italic(self.get_wait_text())) + for url in urls_dict: + self.download_url_and_send( + bot, url, urls_dict[url], chat_id=chat_id, reply_to_message_id=reply_to_message_id, wait_message_id=wait_message.message_id, source_ip=source_ip, proxy=proxy + ) + elif mode == "link": + wait_message = bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, parse_mode="Markdown", text=get_italic(self.get_wait_text())) + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, parse_mode="Markdown", disable_web_page_preview=True, text=get_link_text(urls_dict)) + bot.delete_message(chat_id=chat_id, message_id=wait_message.message_id) + elif mode == "ask": + # ask only if good urls exist + if "http" in " ".join(urls_dict.values()): + orig_msg_id = str(reply_to_message_id) + self.chat_storage[str(chat_id)][orig_msg_id] = {"message": message, "urls": urls_dict, "source_ip": source_ip, "proxy": proxy} + question = "🎶 links found, what to do?" + button_dl = InlineKeyboardButton(text="✅ Download", callback_data=" ".join([orig_msg_id, "dl"])) + button_link = InlineKeyboardButton(text="❇️ Links", callback_data=" ".join([orig_msg_id, "link"])) + button_cancel = InlineKeyboardButton(text="❎", callback_data=" ".join([orig_msg_id, "nodl"])) + inline_keyboard = InlineKeyboardMarkup([[button_dl, button_link, button_cancel]]) + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, reply_markup=inline_keyboard, text=question) + self.cleanup_chat(chat_id) def url_valid(self, url): - telegram_domains = ['t.me', - 'telegram.org', - 'telegram.dog', - 'telegra.ph', - 'tdesktop.com', - 'telesco.pe', - 'graph.org', - 'contest.dev'] + telegram_domains = ["t.me", "telegram.org", "telegram.dog", "telegra.ph", "tdesktop.com", "telesco.pe", "graph.org", "contest.dev"] logger.debug("Checking Url Entry: %s", url) try: netloc = urlparse(url).netloc @@ -546,8 +510,7 @@ def url_allowed(self, url): @REQUEST_TIME.time() @run_async - def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message_id=None, - wait_message_id=None, source_ip=None, proxy=None): + def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message_id=None, wait_message_id=None, source_ip=None, proxy=None): bot.send_chat_action(chat_id=chat_id, action=ChatAction.RECORD_AUDIO) download_dir = os.path.join(self.DL_DIR, str(uuid4())) shutil.rmtree(download_dir, ignore_errors=True) @@ -572,9 +535,11 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message cmd = scdl_bin cmd_name = str(cmd) cmd_args = ( - "-l", url, # URL of track/playlist/user + "-l", + url, # URL of track/playlist/user "-c", # Continue if a music already exist - "--path", download_dir, # Download the music to a custom path + "--path", + download_dir, # Download the music to a custom path "--onlymp3", # Download only the mp3 file even if the track is Downloadable "--addtofile", # Add the artist name to the filename if it isn't in the filename already "--addtimestamp", @@ -588,24 +553,28 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message cmd = bandcamp_dl_bin cmd_name = str(cmd) cmd_args = ( - "--base-dir", download_dir, # Base location of which all files are downloaded - "--template", "%{track} - %{artist} - %{title} [%{album}]", # Output filename template + "--base-dir", + download_dir, # Base location of which all files are downloaded + "--template", + "%{track} - %{artist} - %{title} [%{album}]", # Output filename template "--overwrite", # Overwrite tracks that already exist "--group", # Use album/track Label as iTunes grouping - "--embed-art", # Embed album art (if available) + # "--embed-art", # Embed album art (if available) "--no-slugify", # Disable slugification of track, album, and artist names url, # URL of album/track ) cmd_input = "yes" logger.info("%s starts: %s", cmd_name, url) - cmd_proc = cmd[cmd_args].popen(stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True) + env = None + if proxy: + env = {"http_proxy": proxy, "https_proxy": proxy} + cmd_proc = cmd[cmd_args].popen(env=env, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True) try: cmd_stdout, cmd_stderr = cmd_proc.communicate(input=cmd_input, timeout=self.DL_TIMEOUT) cmd_retcode = cmd_proc.returncode # TODO listed are common scdl problems for one track with 0 retcode, all its output is always in stderr: - if cmd_retcode or (any(err in cmd_stderr for err in ["Error resolving url", "is not streamable", - "Failed to get item"]) and ".mp3" not in cmd_stderr): + if cmd_retcode or (any(err in cmd_stderr for err in ["Error resolving url", "is not streamable", "Failed to get item"]) and ".mp3" not in cmd_stderr): raise ProcessExecutionError(cmd_args, cmd_retcode, cmd_stdout, cmd_stderr) logger.info("%s succeeded: %s", cmd_name, url) status = 1 @@ -620,38 +589,58 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message cmd = youtube_dl_func cmd_name = "youtube_dl_func" # TODO: set different ydl_opts for different sites - ydl_opts = { - 'format': 'bestaudio/best', - 'outtmpl': os.path.join(download_dir, '%(title)s.%(ext)s'), - # default: %(autonumber)s - %(title)s-%(id)s.%(ext)s - 'postprocessors': [ - { - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - 'preferredquality': '320', - }, - { - 'key': 'FFmpegMetadata', - }, - # {'key': 'EmbedThumbnail'}, - ], - } host = urlparse(url).hostname + ydl_opts = {} if host == "tiktok.com" or host.endswith(".tiktok.com"): - ydl_opts['postprocessors'] = [] - ydl_opts['outtmpl'] = os.path.join(download_dir, 'tiktok.%(ext)s') + ydl_opts = { + "outtmpl": os.path.join(download_dir, "tiktok.%(ext)s"), + "videoformat": "mp4", + } + elif "instagr" in host: + ydl_opts = { + "outtmpl": os.path.join(download_dir, "inst.%(ext)s"), + "videoformat": "mp4", + "postprocessors": [ + { + "key": "FFmpegVideoConvertor", + "preferedformat": "mp4", + } + ], + } + else: + ydl_opts = { + "outtmpl": os.path.join(download_dir, "%(title)s.%(ext)s"), + # default: %(autonumber)s - %(title)s-%(id)s.%(ext)s + "format": "bestaudio/best", + "postprocessors": [ + { + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + "preferredquality": "320", + }, + { + "key": "FFmpegMetadata", + }, + # {'key': 'EmbedThumbnail'}, + ], + "noplaylist": True, + } if proxy: - ydl_opts['proxy'] = proxy + ydl_opts["proxy"] = proxy if source_ip: - ydl_opts['source_address'] = source_ip + ydl_opts["source_address"] = source_ip # https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L210 if self.cookies_file: if "http" in self.cookies_file: - ydl_opts['cookiefile'] = self.COOKIES_DOWNLOAD_FILE + ydl_opts["cookiefile"] = self.COOKIES_DOWNLOAD_FILE else: - ydl_opts['cookiefile'] = self.cookies_file + ydl_opts["cookiefile"] = self.cookies_file queue = Queue() - cmd_args = (url, ydl_opts, queue,) + cmd_args = ( + url, + ydl_opts, + queue, + ) logger.info("%s starts: %s", cmd_name, url) cmd_proc = Process(target=cmd, args=cmd_args) cmd_proc.start() @@ -661,7 +650,7 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message cmd_proc.join() if cmd_retcode: raise ProcessExecutionError(cmd_args, cmd_retcode, cmd_stdout, cmd_stderr) - # raise cmd_status #TODO: pass and re-raise original Exception? + # raise cmd_status # TODO: pass and re-raise original Exception? logger.info("%s succeeded: %s", cmd_name, url) status = 1 except Empty: @@ -676,20 +665,15 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message gc.collect() if status in [-1, -6]: - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.DL_TIMEOUT_TEXT, parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.DL_TIMEOUT_TEXT, parse_mode="Markdown") elif status == -2: - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.NO_AUDIO_TEXT, parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.NO_AUDIO_TEXT, parse_mode="Markdown") elif status == -3: - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.DIRECT_RESTRICTION_TEXT, parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.DIRECT_RESTRICTION_TEXT, parse_mode="Markdown") elif status == -4: - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.REGION_RESTRICTION_TEXT, parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.REGION_RESTRICTION_TEXT, parse_mode="Markdown") elif status == -5: - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text=self.LIVE_RESTRICTION_TEXT, parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text=self.LIVE_RESTRICTION_TEXT, parse_mode="Markdown") elif status == 1: file_list = [] for d, dirs, files in os.walk(download_dir): @@ -697,42 +681,109 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message file_list.append(os.path.join(d, file)) if not file_list: logger.info("No files in dir: %s", download_dir) - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, I couldn't download any files from provided links", - parse_mode='Markdown') + bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, text="*Sorry*, I couldn't download any files from provided links", parse_mode="Markdown") else: for file in sorted(file_list): file_name = os.path.split(file)[-1] file_parts = [] try: - file_parts = self.convert_and_split_audio_file(file) + file_root, file_ext = os.path.splitext(file) + file_format = file_ext.replace(".", "").lower() + file_size = os.path.getsize(file) + if file_format not in ["mp3", "m4a", "mp4"]: + raise FileNotSupportedError(file_format) + if file_size > self.MAX_CONVERT_FILE_SIZE: + raise FileTooLargeError(file_size) + # FIXME tiktok.mp4 is for tiktok, inst.mp4 for instagram + if file_format not in ["mp3"] and not ("tiktok." in file or "inst." in file): + logger.info("Converting: %s", file) + try: + file_converted = file.replace(file_ext, ".mp3") + ffinput = ffmpeg.input(file) + # audio_bitrate="320k" + ffmpeg.output(ffinput, file_converted, vn=None).run() + file = file_converted + file_root, file_ext = os.path.splitext(file) + file_format = file_ext.replace(".", "").lower() + file_size = os.path.getsize(file) + except Exception: + # TODO exceptions + raise FileNotConvertedError + + file_parts = [] + if file_size <= self.MAX_TG_FILE_SIZE: + file_parts.append(file) + else: + logger.info("Splitting: %s", file) + id3 = None + try: + id3 = ID3(file, translate=False) + except: + pass + + parts_number = file_size // self.MAX_TG_FILE_SIZE + 1 + + # https://github.com/c0decracker/video-splitter + # https://superuser.com/a/1354956/464797 + try: + # file_duration = float(ffmpeg.probe(file)['format']['duration']) + part_size = file_size // parts_number + cur_position = 0 + for i in range(parts_number): + file_part = file.replace(file_ext, ".part{}{}".format(str(i + 1), file_ext)) + ffinput = ffmpeg.input(file) + if i == (parts_number - 1): + ffmpeg.output(ffinput, file_part, codec="copy", vn=None, ss=cur_position).run() + else: + ffmpeg.output(ffinput, file_part, codec="copy", vn=None, ss=cur_position, fs=part_size).run() + part_duration = float(ffmpeg.probe(file_part)["format"]["duration"]) + cur_position += part_duration + if id3: + try: + id3.save(file_part, v1=2, v2_version=4) + except: + pass + file_parts.append(file_part) + except Exception: + # TODO exceptions + raise FileSplittedPartiallyError(file_parts) + except FileNotSupportedError as exc: if not (exc.file_format in ["m3u", "jpg", "jpeg", "png", "finished", "tmp"]): logger.warning("Unsupported file format: %s", file_name) - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, downloaded file `{}` is in format I could not yet convert or send".format( - file_name), - parse_mode='Markdown') + bot.send_message( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id, + text="*Sorry*, downloaded file `{}` is in format I could not yet convert or send".format(file_name), + parse_mode="Markdown", + ) except FileTooLargeError as exc: logger.info("Large file for convert: %s", file_name) - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, downloaded file `{}` is `{}` MB and it is larger than I could convert (`{} MB`)".format( - file_name, exc.file_size // 1000000, - self.MAX_CONVERT_FILE_SIZE // 1000000), - parse_mode='Markdown') + bot.send_message( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id, + text="*Sorry*, downloaded file `{}` is `{}` MB and it is larger than I could convert (`{} MB`)".format( + file_name, exc.file_size // 1000000, self.MAX_CONVERT_FILE_SIZE // 1000000 + ), + parse_mode="Markdown", + ) except FileSplittedPartiallyError as exc: file_parts = exc.file_parts logger.exception("Splitting failed: %s", file_name) - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, not enough memory to convert file `{}`..".format( - file_name), - parse_mode='Markdown') + bot.send_message( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id, + text="*Sorry*, not enough memory to convert file `{}`..".format(file_name), + parse_mode="Markdown", + ) except FileNotConvertedError as exc: logger.exception("Splitting failed: %s", file_name) - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, not enough memory to convert file `{}`..".format( - file_name), - parse_mode='Markdown') + bot.send_message( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id, + text="*Sorry*, not enough memory to convert file `{}`..".format(file_name), + parse_mode="Markdown", + ) try: caption = None flood = self.chat_storage[str(chat_id)]["settings"]["flood"] @@ -754,18 +805,100 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message # url = url.replace("http://", "").replace("https://", "") # else: # url = shorten_url(url) - caption = "@{} _got it from_ [{}]({}){}".format(self.bot_username.replace("_", "\_"), - source, url, addition.replace("_", "\_")) + caption = "@{} _got it from_ [{}]({}){}".format(self.bot_username.replace("_", "\_"), source, url, addition.replace("_", "\_")) # logger.info(caption) - sent_audio_ids = self.send_audio_file_parts(bot, chat_id, file_parts, - reply_to_message_id if flood == "yes" else None, - caption) + reply_to_message_id_send = reply_to_message_id if flood == "yes" else None + sent_audio_ids = [] + for index, file_part in enumerate(file_parts): + path = pathlib.Path(file_part) + file_name = os.path.split(file_part)[-1] + # file_name = translit(file_name, 'ru', reversed=True) + logger.info("Sending: %s", file_name) + bot.send_chat_action(chat_id=chat_id, action=ChatAction.UPLOAD_AUDIO) + caption_part = None + if len(file_parts) > 1: + caption_part = "Part {} of {}".format(str(index + 1), str(len(file_parts))) + if caption: + if caption_part: + caption_full = caption_part + " | " + caption + else: + caption_full = caption + else: + if caption_part: + caption_full = caption_part + else: + caption_full = "" + # caption_full = textwrap.shorten(caption_full, width=190, placeholder="..") + for i in range(3): + try: + if file_part.endswith(".mp3"): + mp3 = MP3(file_part) + duration = round(mp3.info.length) + performer = None + title = None + try: + performer = ", ".join(mp3["artist"]) + title = ", ".join(mp3["title"]) + except: + pass + if "127.0.0.1" in self.TG_BOT_API: + audio = path.absolute().as_uri() + logger.debug(audio) + elif self.SERVE_AUDIO: + audio = str(urljoin(self.APP_URL, str(path.relative_to(self.DL_DIR)))) + logger.debug(audio) + else: + audio = open(file_part, "rb") + if i > 0: + # maybe: Reply message not found + reply_to_message_id_send = None + audio_msg = bot.send_audio( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id_send, + audio=audio, + duration=duration, + performer=performer, + title=title, + caption=caption_full, + parse_mode="Markdown", + ) + sent_audio_ids.append(audio_msg.audio.file_id) + logger.info("Sending succeeded: %s", file_name) + break + elif "tiktok." in file_part or "inst." in file_part: + video = open(file_part, "rb") + duration = float(ffmpeg.probe(file_part)["format"]["duration"]) + videostream = next(item for item in ffmpeg.probe(file_part)["streams"] if item["codec_type"] == "video") + width = int(videostream["width"]) + height = int(videostream["height"]) + video_msg = bot.send_video( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id_send, + video=video, + supports_streaming=True, + duration=duration, + width=width, + height=height, + caption=caption_full, + parse_mode="Markdown", + ) + sent_audio_ids.append(video_msg.video.file_id) + logger.info("Sending succeeded: %s", file_name) + break + except TelegramError: + if i == 2: + logger.exception("Sending failed because of TelegramError: %s", file_name) + if len(sent_audio_ids) != len(file_parts): + raise FileSentPartiallyError(sent_audio_ids) + except FileSentPartiallyError as exc: sent_audio_ids = exc.sent_audio_ids - bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, - text="*Sorry*, could not send file `{}` or some of it's parts..".format( - file_name), - parse_mode='Markdown') + bot.send_message( + chat_id=chat_id, + reply_to_message_id=reply_to_message_id, + text="*Sorry*, could not send file `{}` or some of it's parts..".format(file_name), + parse_mode="Markdown", + ) logger.warning("Sending some parts failed: %s", file_name) if not self.SERVE_AUDIO: @@ -778,148 +911,9 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message @run_async def blacklist_whitelist(self, update: Update, context: CallbackContext): - chat_id = update.message.chat_id - if not self.is_chat_allowed(chat_id): - context.bot.leave_chat(chat_id) - - - def convert_and_split_audio_file(self, file=""): - file_root, file_ext = os.path.splitext(file) - file_format = file_ext.replace(".", "").lower() - file_size = os.path.getsize(file) - # FIXME unknown_video is for tiktok - if file_format not in ["mp3", "m4a", "mp4", "unknown_video"]: - raise FileNotSupportedError(file_format) - if file_size > self.MAX_CONVERT_FILE_SIZE: - raise FileTooLargeError(file_size) - # FIXME unknown_video is for tiktok and also tiktok.mp4 - if file_format not in ["mp3", "unknown_video"] and "tiktok." not in file: - logger.info("Converting: %s", file) - try: - file_converted = file.replace(file_ext, ".mp3") - ffinput = ffmpeg.input(file) - ffmpeg.output(ffinput, file_converted, vn=None).run() - file = file_converted - file_root, file_ext = os.path.splitext(file) - file_format = file_ext.replace(".", "").lower() - file_size = os.path.getsize(file) - except Exception: - # TODO exceptions - raise FileNotConvertedError - - file_parts = [] - if file_size <= self.MAX_TG_FILE_SIZE: - file_parts.append(file) - else: - logger.info("Splitting: %s", file) - id3 = None - try: - id3 = ID3(file, translate=False) - except: - pass - - parts_number = file_size // self.MAX_TG_FILE_SIZE + 1 - - # https://github.com/c0decracker/video-splitter - # https://superuser.com/a/1354956/464797 - try: - # file_duration = float(ffmpeg.probe(file)['format']['duration']) - part_size = file_size // parts_number - cur_position = 0 - for i in range(parts_number): - file_part = file.replace(file_ext, ".part{}{}".format(str(i + 1), file_ext)) - ffinput = ffmpeg.input(file) - if i == (parts_number - 1): - ffmpeg.output(ffinput, file_part, codec="copy", vn=None, ss=cur_position).run() - else: - ffmpeg.output(ffinput, file_part, codec="copy", vn=None, ss=cur_position, fs=part_size).run() - part_duration = float(ffmpeg.probe(file_part)['format']['duration']) - cur_position += part_duration - if id3: - try: - id3.save(file_part, v1=2, v2_version=4) - except: - pass - file_parts.append(file_part) - except Exception: - # TODO exceptions - raise FileSplittedPartiallyError(file_parts) - return file_parts - - def send_audio_file_parts(self, bot, chat_id, file_parts, reply_to_message_id=None, caption=None): - sent_audio_ids = [] - for index, file_part in enumerate(file_parts): - path = pathlib.Path(file_part) - file_name = os.path.split(file_part)[-1] - # file_name = translit(file_name, 'ru', reversed=True) - logger.info("Sending: %s", file_name) - bot.send_chat_action(chat_id=chat_id, action=ChatAction.UPLOAD_AUDIO) - caption_part = None - if len(file_parts) > 1: - caption_part = "Part {} of {}".format(str(index + 1), str(len(file_parts))) - if caption: - if caption_part: - caption_full = caption_part + " | " + caption - else: - caption_full = caption - else: - if caption_part: - caption_full = caption_part - else: - caption_full = "" - # caption_full = textwrap.shorten(caption_full, width=190, placeholder="..") - for i in range(3): - try: - if file_part.endswith('.mp3'): - mp3 = MP3(file_part) - duration = round(mp3.info.length) - performer = None - title = None - try: - performer = ", ".join(mp3['artist']) - title = ", ".join(mp3['title']) - except: - pass - if "127.0.0.1" in self.TG_BOT_API: - audio = path.absolute().as_uri() - logger.debug(audio) - elif self.SERVE_AUDIO: - audio = str(urljoin(self.APP_URL, str(path.relative_to(self.DL_DIR)))) - logger.debug(audio) - else: - audio = open(file_part, 'rb') - if i > 0: - # maybe: Reply message not found - reply_to_message_id = None - audio_msg = bot.send_audio(chat_id=chat_id, - reply_to_message_id=reply_to_message_id, - audio=audio, - duration=duration, - performer=performer, - title=title, - caption=caption_full, - parse_mode='Markdown') - sent_audio_ids.append(audio_msg.audio.file_id) - logger.info("Sending succeeded: %s", file_name) - break - # FIXME unknown_video is for tiktok - elif file_part.endswith('.unknown_video') or "tiktok." in file_part: - video = open(file_part, 'rb') - video_msg = bot.send_video(chat_id=chat_id, - reply_to_message_id=reply_to_message_id, - video=video, - # duration=duration, - caption=caption_full, - parse_mode='Markdown') - sent_audio_ids.append(video_msg.video.file_id) - logger.info("Sending succeeded: %s", file_name) - break - except TelegramError: - if i == 2: - logger.exception("Sending failed because of TelegramError: %s", file_name) - if len(sent_audio_ids) != len(file_parts): - raise FileSentPartiallyError(sent_audio_ids) - return sent_audio_ids + chat_id = update.message.chat_id + if not self.is_chat_allowed(chat_id): + context.bot.leave_chat(chat_id) def is_chat_allowed(self, chat_id): try: From 51319a623f40a316fe331f310a1e5cb6f1ecabec Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 01:39:59 +0300 Subject: [PATCH 20/22] support working in channels, close #176 --- scdlbot/scdlbot.py | 48 ++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 4d7b863bc..a30e02cf4 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -224,16 +224,20 @@ def rant_and_cleanup(self, bot, chat_id, rant_text, reply_to_message_id=None): self.chat_storage.sync() def help_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) event_name = "help" - entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) + entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND]) for entity_value in entities.values(): event_name = entity_value.replace("/", "").replace("@{}".format(self.bot_username), "") break - log_and_track(event_name, update.message) - chat_id = update.message.chat_id - chat_type = update.message.chat.type - reply_to_message_id = update.message.message_id + log_and_track(event_name, message) + chat_id = message.chat_id + chat_type = message.chat.type + reply_to_message_id = message.message_id flood = self.chat_storage[str(chat_id)]["settings"]["flood"] if chat_type != Chat.PRIVATE and flood == "no": self.rant_and_cleanup(context.bot, chat_id, self.RANT_TEXT_PUBLIC, reply_to_message_id=reply_to_message_id) @@ -257,20 +261,28 @@ def get_settings_inline_keyboard(self, chat_id): return inline_keyboard def settings_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) log_and_track("settings") - chat_id = update.message.chat_id + chat_id = message.chat_id context.bot.send_message(chat_id=chat_id, parse_mode="Markdown", reply_markup=self.get_settings_inline_keyboard(chat_id), text=self.SETTINGS_TEXT) def common_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) - chat_id = update.message.chat_id + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) + chat_id = message.chat_id if not self.is_chat_allowed(chat_id): context.bot.send_message(chat_id=chat_id, text="This command isn't allowed in this chat.") return - chat_type = update.message.chat.type - reply_to_message_id = update.message.message_id - command_entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) + chat_type = message.chat.type + reply_to_message_id = message.message_id + command_entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND]) command_passed = False if not command_entities: command_passed = False @@ -291,7 +303,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): self.rant_and_cleanup(context.bot, chat_id, rant_text, reply_to_message_id=reply_to_message_id) return event_name = ("{}_cmd".format(mode)) if command_passed else ("{}_msg".format(mode)) - log_and_track(event_name, update.message) + log_and_track(event_name, message) apologize = False # apologize and send TYPING: always in PM, only when it's command in non-PM @@ -304,7 +316,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): if self.proxies: proxy = random.choice(self.proxies) self.prepare_urls( - message=update.message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot + message=message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot ) def button_query_callback(self, update: Update, context: CallbackContext): @@ -911,7 +923,11 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message @run_async def blacklist_whitelist(self, update: Update, context: CallbackContext): - chat_id = update.message.chat_id + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + chat_id = message.chat_id if not self.is_chat_allowed(chat_id): context.bot.leave_chat(chat_id) From 0e097775b5f162540293471f48a8bcb4d2fe8e33 Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 10:55:51 +0300 Subject: [PATCH 21/22] prepare v0.14.2 --- CHANGELOG.rst | 12 +++++++++++- pyproject.toml | 2 +- scdlbot/__init__.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eed7b38c6..ad24e1a64 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,9 +1,19 @@ Version history =============== +0.14.2 (2022-12-19) +----------------------- +* merge #508: + * support 320 Kbps + * stop the bot from handling telegram links + * added domain and id blacklisting and whitelisting + * add metadata to downloaded songs + * more described in PR #508 +* support working in channels, close #176 + 0.14.1 (2022-12-18) ----------------------- -* fix youtube playlist and video in playlist download +* fix youtube playlist and video in playlist download, close #439 0.14.0 (2022-12-18) ----------------------- diff --git a/pyproject.toml b/pyproject.toml index 6b43479c3..e17ea952d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ commands = [tool.poetry] name = "scdlbot" description = "Telegram Bot for downloading MP3 rips of tracks/sets from SoundCloud, Bandcamp, YouTube with tags and artwork" -version = "0.14.1" +version = "0.14.2" license = "MIT" authors = ["George Pchelkin "] diff --git a/scdlbot/__init__.py b/scdlbot/__init__.py index 5823a9199..ff32063cf 100644 --- a/scdlbot/__init__.py +++ b/scdlbot/__init__.py @@ -4,4 +4,4 @@ __author__ = """George Pchelkin""" __email__ = "george@pchelk.in" -__version__ = "0.14.1" +__version__ = "0.14.2" From 38033b1f2a659c8bddbde785e4a5819a77c8ef3b Mon Sep 17 00:00:00 2001 From: George Pchelkin Date: Mon, 19 Dec 2022 11:00:39 +0300 Subject: [PATCH 22/22] prepare v0.14.2 --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ad24e1a64..99134e6ef 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,11 +4,13 @@ Version history 0.14.2 (2022-12-19) ----------------------- * merge #508: + * support 320 Kbps * stop the bot from handling telegram links * added domain and id blacklisting and whitelisting * add metadata to downloaded songs * more described in PR #508 + * support working in channels, close #176 0.14.1 (2022-12-18)