Skip to content

Commit

Permalink
Merge pull request #675 from gpchelkin/master
Browse files Browse the repository at this point in the history
  • Loading branch information
gpchelkin authored Dec 19, 2022
2 parents efb435b + 38033b1 commit 30aac1c
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 42 deletions.
9 changes: 9 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ CHAT_STORAGE="/home/gpchelkin/scdlbotdata"
# For using inline mode bot needs to store audios somewhere. ID of that chat.
STORE_CHAT_ID="-1795100"

A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.
WHITELIST_CHATS ="-1795100, 1795102"
# A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.
BLACKLIST_CHATS ="-1795100, 1795102"

SOURCE_IPS="9.21.18.2,9.21.16.9"
PROXIES="socks5://127.0.0.1:1080,socks5://127.0.0.1:1081,"
SERVE_AUDIO="0"
Expand Down Expand Up @@ -63,6 +68,10 @@ PORT="5000"
# Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode
APP_URL="https://yourapp.heroku.com/"

# A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.
WHITELIST_DOMS="example.com subdomain.example.com"
# A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.
BLACKLIST_DOMS="example.com subdomain.example.com"
# Your URL path for webhook to listen
URL_PATH="166849652AAEPCgHuDf3K6HvI2OkrJmDN2k9R6mcfmLs"

Expand Down
14 changes: 13 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
Version history
===============

0.14.2 (2022-12-19)
-----------------------
* merge #508:

* support 320 Kbps
* stop the bot from handling telegram links
* added domain and id blacklisting and whitelisting
* add metadata to downloaded songs
* more described in PR #508

* support working in channels, close #176

0.14.1 (2022-12-18)
-----------------------
* fix youtube playlist and video in playlist download
* fix youtube playlist and video in playlist download, close #439

0.14.0 (2022-12-18)
-----------------------
Expand Down
16 changes: 16 additions & 0 deletions app.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@
"description": "Chat ID for storing audios for inline mode",
"required": false
},
"WHITELIST_CHATS": {
"description": "A space separated list of chat_ids which should be considered whitelisted - the bot will only join those chats **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.",
"required": false
},
"BLACKLIST_CHATS": {
"description": "A space separated list of chat_ids which should be considered blacklisted - the bot will not join those chats. **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.",
"required": false
},
"COOKIES_FILE": {
"description": "HTTP or local path with cookies file for Yandex.Music",
"required": false
Expand Down Expand Up @@ -91,6 +99,14 @@
"APP_URL": {
"description": "Your host URL like https://scdlbot.herokuapp.com/, required for webhook mode",
"required": false
},
"WHITELIST_DOMS": {
"description": "A space separated list of domains which should be considered whitelisted - the bot will only process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.",
"required": false
},
"BLACKLIST_DOMS": {
"description": "A space separated list of domains which should be considered blacklisted - the bot will not process these domains. Example of domain: example.com if the domain has a subdomain this needs to be included: subdomain.example.com **NOTE** that if both whitelist and blacklist will be used, only the blacklist will be taken into consideration.",
"required": false
}
},
"formation": {
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ commands =
[tool.poetry]
name = "scdlbot"
description = "Telegram Bot for downloading MP3 rips of tracks/sets from SoundCloud, Bandcamp, YouTube with tags and artwork"
version = "0.14.1"
version = "0.14.2"
license = "MIT"

authors = ["George Pchelkin <[email protected]>"]
Expand Down
26 changes: 13 additions & 13 deletions render.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,31 @@ services:
plan: free # optional; defaults to starter
numInstances: 1
envVars:
- key: ALERT_CHAT_IDS # Imported from Heroku app
- key: ALERT_CHAT_IDS
value: 1306343
- key: APP_URL # Imported from Heroku app
- key: APP_URL
value: https://scdlbot.herokuapp.com/
- key: DL_DIR # Imported from Heroku app
- key: DL_DIR
value: /tmp/scdlbot
- key: DL_TIMEOUT # Imported from Heroku app
- key: DL_TIMEOUT
value: 900
- key: HOST # Imported from Heroku app
- key: HOST
value: 0.0.0.0
- key: HOSTNAME # Imported from Heroku app
- key: HOSTNAME
value: test-heroku
- key: MAX_CONVERT_FILE_SIZE # Imported from Heroku app
- key: MAX_CONVERT_FILE_SIZE
value: 300_000_000
- key: MAX_TG_FILE_SIZE # Imported from Heroku app
- key: MAX_TG_FILE_SIZE
value: 45_000_000
- key: NO_FLOOD_CHAT_IDS # Imported from Heroku app
- key: NO_FLOOD_CHAT_IDS
value: -1001108859218,-1001106680201
- key: STORE_CHAT_ID # Imported from Heroku app
- key: STORE_CHAT_ID
value: -172951900
- key: SYSLOG_ADDRESS # Imported from Heroku app
- key: SYSLOG_ADDRESS
value: logs6.papertrailapp.com:54882
- key: SYSLOG_DEBUG # Imported from Heroku app
- key: SYSLOG_DEBUG
value: 1
- key: USE_WEBHOOK # Imported from Heroku app
- key: USE_WEBHOOK
value: 1
- key: WORKERS
value: 2
2 changes: 1 addition & 1 deletion scdlbot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__author__ = """George Pchelkin"""
__email__ = "[email protected]"
__version__ = "0.14.1"
__version__ = "0.14.2"
139 changes: 114 additions & 25 deletions scdlbot/scdlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def __init__(
unknown_handler = MessageHandler(Filters.command, self.unknown_command_callback)
dispatcher.add_handler(unknown_handler)

blacklist_whitelist_handler = MessageHandler(Filters.status_update.new_chat_members, self.blacklist_whitelist)
dispatcher.add_handler(blacklist_whitelist_handler)

dispatcher.add_error_handler(self.error_callback)

self.bot_username = self.updater.bot.get_me().username
Expand Down Expand Up @@ -221,16 +224,20 @@ def rant_and_cleanup(self, bot, chat_id, rant_text, reply_to_message_id=None):
self.chat_storage.sync()

def help_command_callback(self, update: Update, context: CallbackContext):
self.init_chat(update.message)
if update.channel_post:
message = update.channel_post
elif update.message:
message = update.message
self.init_chat(message)
event_name = "help"
entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND])
entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND])
for entity_value in entities.values():
event_name = entity_value.replace("/", "").replace("@{}".format(self.bot_username), "")
break
log_and_track(event_name, update.message)
chat_id = update.message.chat_id
chat_type = update.message.chat.type
reply_to_message_id = update.message.message_id
log_and_track(event_name, message)
chat_id = message.chat_id
chat_type = message.chat.type
reply_to_message_id = message.message_id
flood = self.chat_storage[str(chat_id)]["settings"]["flood"]
if chat_type != Chat.PRIVATE and flood == "no":
self.rant_and_cleanup(context.bot, chat_id, self.RANT_TEXT_PUBLIC, reply_to_message_id=reply_to_message_id)
Expand All @@ -254,17 +261,28 @@ def get_settings_inline_keyboard(self, chat_id):
return inline_keyboard

def settings_command_callback(self, update: Update, context: CallbackContext):
self.init_chat(update.message)
if update.channel_post:
message = update.channel_post
elif update.message:
message = update.message
self.init_chat(message)
log_and_track("settings")
chat_id = update.message.chat_id
chat_id = message.chat_id
context.bot.send_message(chat_id=chat_id, parse_mode="Markdown", reply_markup=self.get_settings_inline_keyboard(chat_id), text=self.SETTINGS_TEXT)

def common_command_callback(self, update: Update, context: CallbackContext):
self.init_chat(update.message)
chat_id = update.message.chat_id
chat_type = update.message.chat.type
reply_to_message_id = update.message.message_id
command_entities = update.message.parse_entities(types=[MessageEntity.BOT_COMMAND])
if update.channel_post:
message = update.channel_post
elif update.message:
message = update.message
self.init_chat(message)
chat_id = message.chat_id
if not self.is_chat_allowed(chat_id):
context.bot.send_message(chat_id=chat_id, text="This command isn't allowed in this chat.")
return
chat_type = message.chat.type
reply_to_message_id = message.message_id
command_entities = message.parse_entities(types=[MessageEntity.BOT_COMMAND])
command_passed = False
if not command_entities:
command_passed = False
Expand All @@ -285,7 +303,7 @@ def common_command_callback(self, update: Update, context: CallbackContext):
self.rant_and_cleanup(context.bot, chat_id, rant_text, reply_to_message_id=reply_to_message_id)
return
event_name = ("{}_cmd".format(mode)) if command_passed else ("{}_msg".format(mode))
log_and_track(event_name, update.message)
log_and_track(event_name, message)

apologize = False
# apologize and send TYPING: always in PM, only when it's command in non-PM
Expand All @@ -298,7 +316,7 @@ def common_command_callback(self, update: Update, context: CallbackContext):
if self.proxies:
proxy = random.choice(self.proxies)
self.prepare_urls(
message=update.message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot
message=message, mode=mode, source_ip=source_ip, proxy=proxy, apologize=apologize, chat_id=chat_id, reply_to_message_id=reply_to_message_id, bot=context.bot
)

def button_query_callback(self, update: Update, context: CallbackContext):
Expand All @@ -310,6 +328,9 @@ def button_query_callback(self, update: Update, context: CallbackContext):
chat_id = chat.id
chat_type = chat.type
orig_msg_id, action = update.callback_query.data.split()
if not self.is_chat_allowed(chat_id):
update.callback_query.answer(text="This command isn't allowed in this chat.")
return
if orig_msg_id == "settings":
if chat_type != Chat.PRIVATE:
chat_member_status = chat.get_member(user_id).status
Expand Down Expand Up @@ -378,19 +399,26 @@ def prepare_urls(self, message, mode=None, source_ip=None, proxy=None, apologize
url_entities.update(url_caption_entities)
for entity in url_entities:
url_str = url_entities[entity]
logger.debug("Entity URL Parsed: %s", url_str)
if "://" not in url_str:
url_str = "http://{}".format(url_str)
urls.append(URL(url_str))
if self.url_valid(url_str):
logger.debug("Entity URL Parsed: %s", url_str)
if "://" not in url_str:
url_str = "http://{}".format(url_str)
urls.append(URL(url_str))
else:
logger.debug("Entry URL not valid or blacklisted: %s", url_str)
text_link_entities = message.parse_entities(types=[MessageEntity.TEXT_LINK])
text_link_caption_entities = message.parse_caption_entities(types=[MessageEntity.TEXT_LINK])
text_link_entities.update(text_link_caption_entities)
for entity in text_link_entities:
url_str = entity.url
logger.debug("Entity Text Link Parsed: %s", url_str)
urls.append(URL(url_str))
if self.url_valid(url_str):
logger.debug("Entity Text Link Parsed: %s", url_str)
urls.append(URL(url_str))
else:
logger.debug("Entry URL not valid or blacklisted: %s", url_str)
else:
urls = find_all_links(message, default_scheme="http")
all_links = find_all_links(message, default_scheme="http")
urls = [link for link in all_links if self.url_valid(link)]
logger.debug(urls)

urls_dict = {}
Expand Down Expand Up @@ -465,6 +493,33 @@ def prepare_urls(self, message, mode=None, source_ip=None, proxy=None, apologize
bot.send_message(chat_id=chat_id, reply_to_message_id=reply_to_message_id, reply_markup=inline_keyboard, text=question)
self.cleanup_chat(chat_id)

def url_valid(self, url):
telegram_domains = ["t.me", "telegram.org", "telegram.dog", "telegra.ph", "tdesktop.com", "telesco.pe", "graph.org", "contest.dev"]
logger.debug("Checking Url Entry: %s", url)
try:
netloc = urlparse(url).netloc
except AttributeError:
return False
if netloc in telegram_domains:
return False
return self.url_allowed(url)

def url_allowed(self, url):
# Example export BLACKLIST_DOMS = "invidious.tube invidious.kavin.rocks invidious.himiko.cloud invidious.namazso.eu dev.viewtube.io tube.cadence.moe piped.kavin.rocks"
whitelist = set(x for x in os.environ.get("WHITELIST_DOMS", "").split())
blacklist = set(x for x in os.environ.get("BLACKLIST_DOMS", "").split())
netloc = urlparse(url).netloc
if whitelist:
if netloc not in whitelist:
return False
if blacklist:
if netloc in blacklist:
return False
if whitelist and blacklist:
if netloc in blacklist:
return False
return True

@REQUEST_TIME.time()
@run_async
def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message_id=None, wait_message_id=None, source_ip=None, proxy=None):
Expand Down Expand Up @@ -573,9 +628,12 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "128",
"preferredquality": "320",
},
# {'key': 'EmbedThumbnail',}, {'key': 'FFmpegMetadata',},
{
"key": "FFmpegMetadata",
},
# {'key': 'EmbedThumbnail'},
],
"noplaylist": True,
}
Expand Down Expand Up @@ -654,7 +712,8 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message
try:
file_converted = file.replace(file_ext, ".mp3")
ffinput = ffmpeg.input(file)
ffmpeg.output(ffinput, file_converted, audio_bitrate="128k", vn=None).run()
# audio_bitrate="320k"
ffmpeg.output(ffinput, file_converted, vn=None).run()
file = file_converted
file_root, file_ext = os.path.splitext(file)
file_format = file_ext.replace(".", "").lower()
Expand Down Expand Up @@ -861,3 +920,33 @@ def download_url_and_send(self, bot, url, direct_urls, chat_id, reply_to_message
bot.delete_message(chat_id=chat_id, message_id=wait_message_id)
except:
pass

@run_async
def blacklist_whitelist(self, update: Update, context: CallbackContext):
if update.channel_post:
message = update.channel_post
elif update.message:
message = update.message
chat_id = message.chat_id
if not self.is_chat_allowed(chat_id):
context.bot.leave_chat(chat_id)

def is_chat_allowed(self, chat_id):
try:
whitelist = set(int(x) for x in os.environ.get("WHITELIST_CHATS", "").split())
except ValueError:
raise ValueError("Your whitelisted chats does not contain valid integers.")
try:
blacklist = set(int(x) for x in os.environ.get("BLACKLIST_CHATS", "").split())
except ValueError:
raise ValueError("Your blacklisted chats does not contain valid integers.")
if whitelist:
if chat_id not in whitelist:
return False
if blacklist:
if chat_id in blacklist:
return False
if whitelist and blacklist:
if chat_id in blacklist:
return False
return True
2 changes: 1 addition & 1 deletion scdlbot/texts/help.tg.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Hi! I download and send audios from videos/tracks/sets/albums in *MP3 128 kbps* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit.
Hi! I download and send audios from videos/tracks/sets/albums in *MP3* with tags and artwork. Files over 50 MB are split into parts due to Telegram Bot API limit.

*Usage:*
_Send or forward_ a text message containing links and I will:
Expand Down
1 change: 1 addition & 0 deletions scdlbot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,5 @@ def get_link_text(urls):
content_type = "Video"
# direct_url = shorten_url(direct_url)
link_text += "• {} [Direct Link]({})\n".format(content_type, direct_url)
link_text += "\n*Note:* Final download URLs are only guaranteed to work on the same machine/IP where extracted"
return link_text

0 comments on commit 30aac1c

Please sign in to comment.