diff --git a/.env.sample b/.env.sample index 760365c6d..14dd64799 100644 --- a/.env.sample +++ b/.env.sample @@ -21,6 +21,11 @@ CHAT_STORAGE="/home/gpchelkin/scdlbotdata" # For using inline mode bot needs to store audios somewhere. ID of that chat. STORE_CHAT_ID="-1795100" +A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +WHITELIST_CHATS ="-1795100, 1795102" +# A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +BLACKLIST_CHATS ="-1795100, 1795102" + SOURCE_IPS="9.21.18.2,9.21.16.9" PROXIES="socks5://127.0.0.1:1080,socks5://127.0.0.1:1081," SERVE_AUDIO="0" @@ -63,6 +68,10 @@ PORT="5000" # Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode APP_URL="https://yourapp.heroku.com/" +# A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +WHITELIST_DOMS="example.com subdomain.example.com" +# A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration. +BLACKLIST_DOMS="example.com subdomain.example.com" # Your URL path for webhook to listen URL_PATH="166849652AAEPCgHuDf3K6HvI2OkrJmDN2k9R6mcfmLs" diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eed7b38c6..99134e6ef 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,9 +1,21 @@ Version history =============== +0.14.2 (2022-12-19) +----------------------- +* merge #508: + + * support 320 Kbps + * stop the bot from handling telegram links + * added domain and id blacklisting and whitelisting + * add metadata to downloaded songs + * more described in PR #508 + +* support working in channels, close #176 + 0.14.1 (2022-12-18) ----------------------- -* fix youtube playlist and video in playlist download +* fix youtube playlist and video in playlist download, close #439 0.14.0 (2022-12-18) ----------------------- diff --git a/app.json b/app.json index 7e7d97e19..4735e9d1f 100644 --- a/app.json +++ b/app.json @@ -60,6 +60,14 @@ "description": "Chat ID for storing audios for inline mode", "required": false }, + "WHITELIST_CHATS": { + "description": "A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, + "BLACKLIST_CHATS": { + "description": "A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, "COOKIES_FILE": { "description": "HTTP or local path with cookies file for Yandex.Music", "required": false @@ -91,6 +99,14 @@ "APP_URL": { "description": "Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode", "required": false + }, + "WHITELIST_DOMS": { + "description": "A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false + }, + "BLACKLIST_DOMS": { + "description": "A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.", + "required": false } }, "formation": { diff --git a/pyproject.toml b/pyproject.toml index 6b43479c3..e17ea952d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ commands = [tool.poetry] name = "scdlbot" description = "Telegram Bot for downloading MP3 rips of tracks/sets from SoundCloud, Bandcamp, YouTube with tags and artwork" -version = "0.14.1" +version = "0.14.2" license = "MIT" authors = ["George Pchelkin "] diff --git a/render.yaml b/render.yaml index ef4874068..eaebe4a32 100644 --- a/render.yaml +++ b/render.yaml @@ -9,31 +9,31 @@ services: plan: free # optional; defaults to starter numInstances: 1 envVars: - - key: ALERT_CHAT_IDS # Imported from Heroku app + - key: ALERT_CHAT_IDS value: 1306343 - - key: APP_URL # Imported from Heroku app + - key: APP_URL value: https://scdlbot.herokuapp.com/ - - key: DL_DIR # Imported from Heroku app + - key: DL_DIR value: /tmp/scdlbot - - key: DL_TIMEOUT # Imported from Heroku app + - key: DL_TIMEOUT value: 900 - - key: HOST # Imported from Heroku app + - key: HOST value: 0.0.0.0 - - key: HOSTNAME # Imported from Heroku app + - key: HOSTNAME value: test-heroku - - key: MAX_CONVERT_FILE_SIZE # Imported from Heroku app + - key: MAX_CONVERT_FILE_SIZE value: 300_000_000 - - key: MAX_TG_FILE_SIZE # Imported from Heroku app + - key: MAX_TG_FILE_SIZE value: 45_000_000 - - key: NO_FLOOD_CHAT_IDS # Imported from Heroku app + - key: NO_FLOOD_CHAT_IDS value: -1001108859218,-1001106680201 - - key: STORE_CHAT_ID # Imported from Heroku app + - key: STORE_CHAT_ID value: -172951900 - - key: SYSLOG_ADDRESS # Imported from Heroku app + - key: SYSLOG_ADDRESS value: logs6.papertrailapp.com:54882 - - key: SYSLOG_DEBUG # Imported from Heroku app + - key: SYSLOG_DEBUG value: 1 - - key: USE_WEBHOOK # Imported from Heroku app + - key: USE_WEBHOOK value: 1 - key: WORKERS value: 2 diff --git a/scdlbot/__init__.py b/scdlbot/__init__.py index 5823a9199..ff32063cf 100644 --- a/scdlbot/__init__.py +++ b/scdlbot/__init__.py @@ -4,4 +4,4 @@ __author__ = """George Pchelkin""" __email__ = "george@pchelk.in" -__version__ = "0.14.1" +__version__ = "0.14.2" diff --git a/scdlbot/scdlbot.py b/scdlbot/scdlbot.py index 539422395..a30e02cf4 100644 --- a/scdlbot/scdlbot.py +++ b/scdlbot/scdlbot.py @@ -131,6 +131,9 @@ def __init__( unknown_handler = MessageHandler(Filters.command, self.unknown_command_callback) dispatcher.add_handler(unknown_handler) + blacklist_whitelist_handler = MessageHandler(Filters.status_update.new_chat_members, self.blacklist_whitelist) + dispatcher.add_handler(blacklist_whitelist_handler) + dispatcher.add_error_handler(self.error_callback) self.bot_username = self.updater.bot.get_me().username @@ -221,16 +224,20 @@ def rant_and_cleanup(self, bot, chat_id, rant_text, reply_to_message_id=None): self.chat_storage.sync() def help_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) event_name = "help" - entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) + entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND]) for entity_value in entities.values(): event_name = entity_value.replace("/", "").replace("@{}".format(self.bot_username), "") break - log_and_track(event_name, update.message) - chat_id = update.message.chat_id - chat_type = update.message.chat.type - reply_to_message_id = update.message.message_id + log_and_track(event_name, message) + chat_id = message.chat_id + chat_type = message.chat.type + reply_to_message_id = message.message_id flood = self.chat_storage[str(chat_id)]["settings"]["flood"] if chat_type != Chat.PRIVATE and flood == "no": self.rant_and_cleanup(context.bot, chat_id, self.RANT_TEXT_PUBLIC, reply_to_message_id=reply_to_message_id) @@ -254,17 +261,28 @@ def get_settings_inline_keyboard(self, chat_id): return inline_keyboard def settings_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) log_and_track("settings") - chat_id = update.message.chat_id + chat_id = message.chat_id context.bot.send_message(chat_id=chat_id, parse_mode="Markdown", reply_markup=self.get_settings_inline_keyboard(chat_id), text=self.SETTINGS_TEXT) def common_command_callback(self, update: Update, context: CallbackContext): - self.init_chat(update.message) - chat_id = update.message.chat_id - chat_type = update.message.chat.type - reply_to_message_id = update.message.message_id - command_entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND]) + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + self.init_chat(message) + chat_id = message.chat_id + if not self.is_chat_allowed(chat_id): + context.bot.send_message(chat_id=chat_id, text="This command isn't allowed in this chat.") + return + chat_type = message.chat.type + reply_to_message_id = message.message_id + command_entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND]) command_passed = False if not command_entities: command_passed = False @@ -285,7 +303,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): self.rant_and_cleanup(context.bot, chat_id, rant_text, reply_to_message_id=reply_to_message_id) return event_name = ("{}_cmd".format(mode)) if command_passed else ("{}_msg".format(mode)) - log_and_track(event_name, update.message) + log_and_track(event_name, message) apologize = False # apologize and send TYPING: always in PM, only when it's command in non-PM @@ -298,7 +316,7 @@ def common_command_callback(self, update: Update, context: CallbackContext): if self.proxies: proxy = random.choice(self.proxies) self.prepare_urls( - message=update.message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot + message=message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot ) def button_query_callback(self, update: Update, context: CallbackContext): @@ -310,6 +328,9 @@ def button_query_callback(self, update: Update, context: CallbackContext): chat_id = chat.id chat_type = chat.type orig_msg_id, action = update.callback_query.data.split() + if not self.is_chat_allowed(chat_id): + update.callback_query.answer(text="This command isn't allowed in this chat.") + return if orig_msg_id == "settings": if chat_type != Chat.PRIVATE: chat_member_status = chat.get_member(user_id).status @@ -378,19 +399,26 @@ def prepare_urls(self, message, mode=None, source_ip=None, proxy=None, apologize url_entities.update(url_caption_entities) for entity in url_entities: url_str = url_entities[entity] - logger.debug("Entity URL Parsed: %s", url_str) - if "://" not in url_str: - url_str = "http://{}".format(url_str) - urls.append(URL(url_str)) + if self.url_valid(url_str): + logger.debug("Entity URL Parsed: %s", url_str) + if "://" not in url_str: + url_str = "http://{}".format(url_str) + urls.append(URL(url_str)) + else: + logger.debug("Entry URL not valid or blacklisted: %s", url_str) text_link_entities = message.parse_entities(types=[MessageEntity.TEXT_LINK]) text_link_caption_entities = message.parse_caption_entities(types=[MessageEntity.TEXT_LINK]) text_link_entities.update(text_link_caption_entities) for entity in text_link_entities: url_str = entity.url - logger.debug("Entity Text Link Parsed: %s", url_str) - urls.append(URL(url_str)) + if self.url_valid(url_str): + logger.debug("Entity Text Link Parsed: %s", url_str) + urls.append(URL(url_str)) + else: + logger.debug("Entry URL not valid or blacklisted: %s", url_str) else: - urls = find_all_links(message, default_scheme="http") + all_links = find_all_links(message, default_scheme="http") + urls = [link for link in all_links if self.url_valid(link)] logger.debug(urls) urls_dict = {} @@ -465,6 +493,33 @@ def prepare_urls(self, message, mode=None, source_ip=None, proxy=None, apologize bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, reply_markup=inline_keyboard, text=question) self.cleanup_chat(chat_id) + def url_valid(self, url): + telegram_domains = ["t.me", "telegram.org", "telegram.dog", "telegra.ph", "tdesktop.com", "telesco.pe", "graph.org", "contest.dev"] + logger.debug("Checking Url Entry: %s", url) + try: + netloc = urlparse(url).netloc + except AttributeError: + return False + if netloc in telegram_domains: + return False + return self.url_allowed(url) + + def url_allowed(self, url): + # Example export BLACKLIST_DOMS = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks" + whitelist = set(x for x in os.environ.get("WHITELIST_DOMS", "").split()) + blacklist = set(x for x in os.environ.get("BLACKLIST_DOMS", "").split()) + netloc = urlparse(url).netloc + if whitelist: + if netloc not in whitelist: + return False + if blacklist: + if netloc in blacklist: + return False + if whitelist and blacklist: + if netloc in blacklist: + return False + return True + @REQUEST_TIME.time() @run_async def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message_id=None, wait_message_id=None, source_ip=None, proxy=None): @@ -573,9 +628,12 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message { "key": "FFmpegExtractAudio", "preferredcodec": "mp3", - "preferredquality": "128", + "preferredquality": "320", }, - # {'key': 'EmbedThumbnail',}, {'key': 'FFmpegMetadata',}, + { + "key": "FFmpegMetadata", + }, + # {'key': 'EmbedThumbnail'}, ], "noplaylist": True, } @@ -654,7 +712,8 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message try: file_converted = file.replace(file_ext, ".mp3") ffinput = ffmpeg.input(file) - ffmpeg.output(ffinput, file_converted, audio_bitrate="128k", vn=None).run() + # audio_bitrate="320k" + ffmpeg.output(ffinput, file_converted, vn=None).run() file = file_converted file_root, file_ext = os.path.splitext(file) file_format = file_ext.replace(".", "").lower() @@ -861,3 +920,33 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message bot.delete_message(chat_id=chat_id, message_id=wait_message_id) except: pass + + @run_async + def blacklist_whitelist(self, update: Update, context: CallbackContext): + if update.channel_post: + message = update.channel_post + elif update.message: + message = update.message + chat_id = message.chat_id + if not self.is_chat_allowed(chat_id): + context.bot.leave_chat(chat_id) + + def is_chat_allowed(self, chat_id): + try: + whitelist = set(int(x) for x in os.environ.get("WHITELIST_CHATS", "").split()) + except ValueError: + raise ValueError("Your whitelisted chats does not contain valid integers.") + try: + blacklist = set(int(x) for x in os.environ.get("BLACKLIST_CHATS", "").split()) + except ValueError: + raise ValueError("Your blacklisted chats does not contain valid integers.") + if whitelist: + if chat_id not in whitelist: + return False + if blacklist: + if chat_id in blacklist: + return False + if whitelist and blacklist: + if chat_id in blacklist: + return False + return True diff --git a/scdlbot/texts/help.tg.md b/scdlbot/texts/help.tg.md index 502b8453e..da72fc681 100755 --- a/scdlbot/texts/help.tg.md +++ b/scdlbot/texts/help.tg.md @@ -1,4 +1,4 @@ -Hi! I download and send audios from videos/tracks/sets/albums in *MP3 128 kbps* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit. +Hi! I download and send audios from videos/tracks/sets/albums in *MP3* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit. *Usage:* _Send or forward_ a text message containing links and I will: diff --git a/scdlbot/utils.py b/scdlbot/utils.py index 4eac3f202..168545d3a 100644 --- a/scdlbot/utils.py +++ b/scdlbot/utils.py @@ -158,4 +158,5 @@ def get_link_text(urls): content_type = "Video" # direct_url = shorten_url(direct_url) link_text += "• {} [Direct Link]({})\n".format(content_type, direct_url) + link_text += "\n*Note:* Final download URLs are only guaranteed to work on the same machine/IP where extracted" return link_text