From 3799eeb17e02564ec3c5294d5ae7c0e12b69f36e Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Fri, 15 Jan 2021 08:57:17 -0500 Subject: [PATCH 1/9] first prototype using new api --- archivebot.py | 244 ++++++++++++++++++++++++++++++++--------------- requirements.txt | 2 +- 2 files changed, 169 insertions(+), 77 deletions(-) diff --git a/archivebot.py b/archivebot.py index eafaf10..1a90d14 100644 --- a/archivebot.py +++ b/archivebot.py @@ -5,17 +5,16 @@ import sqlite3 import time import traceback - -from slackclient import SlackClient from websocket import WebSocketConnectionClosedException - +from slack_bolt import App parser = argparse.ArgumentParser() parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( 'path to the SQLite database. (default = ./slack.sqlite)')) parser.add_argument('-l', '--log-level', default='debug', help=( 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) +parser.add_argument('-p', '--port', default=3333, help='Port to serve on. (default = 3333)') args = parser.parse_args() log_level = args.log_level.upper() @@ -26,18 +25,22 @@ database_path = args.database_path # Connects to the previously created SQL database -conn = sqlite3.connect(database_path) -cursor = conn.cursor() -cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') +def db_connect(): + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + return conn, cursor + -# This token is given when the bot is started in terminal -slack_token = os.environ["SLACK_API_TOKEN"] -# Makes bot user active on Slack -# NOTE: terminal must be running for the bot to continue -sc = SlackClient(slack_token) + +app = App( + token=os.environ.get("SLACK_BOT_TOKEN"), + signing_secret=os.environ.get("SLACK_SIGNING_SECRET"), + logger=logger +) + +# Save the bot user's user ID +app._bot_user_id = app.client.auth_test()['user_id'] # Double naming for better search functionality # Keys are both the name and unique ID where needed @@ -51,16 +54,17 @@ # Uses slack API to get most recent user list # Necessary for User ID correlation -def update_users(): +def update_users(conn, cursor): logger.info('Updating users') - info = sc.api_call('users.list') - ENV['user_id'] = dict([(m['name'], m['id']) for m in info['members']]) - ENV['id_user'] = dict([(m['id'], m['name']) for m in info['members']]) + info = app.client.users_list() + + ENV['user_id'] = dict([(m['profile']['display_name'], m['id']) for m in info['members']]) + ENV['id_user'] = dict([(m['id'], m['profile']['display_name']) for m in info['members']]) args = [] for m in info['members']: args.append(( - m['name'], + m['profile']['display_name'], m['id'], m['profile'].get('image_72', 'https://secure.gravatar.com/avatar/c3a07fba0c4787b0ef1d417838eae9c5.jpg?s=32&d=https%3A%2F%2Ffst.slack-edge.com%2F66f9%2Fimg%2Favatars%2Fava_0024-32.png') )) @@ -68,43 +72,59 @@ def update_users(): conn.commit() def get_user_id(name): - if name not in ENV['user_id']: - update_users() + """ + Get a user's user_id given their name; Used to resolve from:@X queries. + """ return ENV['user_id'].get(name, None) +def update_channel(channel_id): + channel = app.client.conversations_info(channel=channel_id)['channel'] + + ENV['channel_id'][channel['name']] = channel['id'] + ENV['id_channel'][channel['id']] = channel['name'] + + # If the channel is private, we need to get the member list + if channel['is_private']: + response = app.client.conversations_members(channel=channel['id']) + members = response['members'] + while response['response_metadata']['next_cursor']: + response = app.client.conversations_members(channel=channel['id']) + members += response['members'] + members = set(members) + else: + members = set() + + ENV['channel_info'][channel['id']] = { + 'is_private': channel['is_private'], + 'members': members + } -def update_channels(): + return channel['id'], channel['name'] + +def update_channels(conn, cursor): logger.info("Updating channels") - info = sc.api_call('channels.list')['channels'] + sc.api_call('groups.list')['groups'] - ENV['channel_id'] = dict([(m['name'], m['id']) for m in info]) - ENV['id_channel'] = dict([(m['id'], m['name']) for m in info]) + channels = app.client.conversations_list(types='public_channel,private_channel')['channels'] args = [] - for m in info: - ENV['channel_info'][m['id']] = { - 'is_private': ('is_group' in m) or m['is_private'], - 'members': m['members'] - } + for channel in channels: + # Only add channels that archive bot is a member of + if not channel['is_member']: + continue + + update_channel(channel['id']) args.append(( - m['name'], - m['id'] + channel['name'], + channel['id'] )) cursor.executemany("INSERT INTO channels(name, id) VALUES(?,?)", args) conn.commit() + def get_channel_id(name): - if name not in ENV['channel_id']: - update_channels() return ENV['channel_id'].get(name, None) -def send_message(message, channel): - sc.api_call( - "chat.postMessage", - channel=channel, - text=message - ) def can_query_channel(channel_id, user_id): if channel_id in ENV['id_channel']: @@ -114,7 +134,7 @@ def can_query_channel(channel_id, user_id): ) -def handle_query(event): +def handle_query(event, cursor, say): """ Handles a DM to the bot that is requesting a search of the archives. @@ -152,11 +172,11 @@ def handle_query(event): if p[0] == 'from': user = get_user_id(p[1].replace('@','').strip()) if user is None: - raise ValueError('User %s not found' % p[1]) + raise ValueError(f'User {p[1]} not found') if p[0] == 'in': channel = get_channel_id(p[1].replace('#','').strip()) if channel is None: - raise ValueError('Channel %s not found' % p[1]) + raise ValueError(f'Channel {p[1]} not found. Either {p[1]} does not exist or Archive Bot is not a member of {p[1]}.') if p[0] == 'sort': if p[1] in ['asc', 'desc']: sort = p[1] @@ -179,7 +199,6 @@ def handle_query(event): query_args.append(channel) if sort: query += ' ORDER BY timestamp %s' % sort - #query_args.append(sort) logger.debug(query) logger.debug(query_args) @@ -196,52 +215,125 @@ def handle_query(event): ) for i in res if can_query_channel(i[3], event['user'])] ) if res_message: - send_message(res_message, event['channel']) + say(res_message) else: - send_message('No results found', event['channel']) + say('No results found') except ValueError as e: logger.error(traceback.format_exc()) - send_message(str(e), event['channel']) + say(str(e)) + +@app.event('member_joined_channel') +def handle_join(event): + #print(event) + print(event) + conn, cursor = db_connect() + + # If the user added is archive bot, then add the channel too + if event['user'] == app._bot_user_id: + channel_id, channel_name = update_channel(event['channel']) + cursor.execute("INSERT INTO channels(name, id) VALUES(?,?)", (channel_id, channel_name)) + elif event['channel'] in ENV['id_channel']: + ENV['channel_info'][event['channel']]['members'].add(event['user']) + + print(ENV) + +@app.event('member_left_channel') +def handle_left(event): + if event['channel'] in ENV['channel_info']: + ENV['channel_info'][event['channel']]['members'].discard(event['user']) + +def handle_rename(event): + channel = event['channel'] + channel_id = channel['id'] + new_channel_name = channel['name'] + old_channel_name = ENV['id_channel'][channel_id] + + ENV['id_channel'][channel_id] = new_channel_name + del ENV['channel_id'][old_channel_name] + ENV['channel_id'][new_channel_name] = channel_id + + conn, cursor = db_connect() + cursor.execute("UPDATE channels SET name = ? WHERE id = ?", (new_channel_name, channel_id)) + conn.commit() -def handle_message(event): - if 'text' not in event: - return - if 'subtype' in event and event['subtype'] == 'bot_message': +@app.event('channel_rename') +def handle_channel_rename(event): + handle_rename(event) + +@app.event('group_rename') +def handle_group_rename(event): + handle_rename(event) + +# For some reason slack fires off both *_rename and *_name events, so create handlers for them +# but don't do anything in the *_name events. +@app.event({ + "type": "message", + "subtype": "group_name" +}) +def handle_group_name(event): + pass + +@app.event({ + "type": "message", + "subtype": "channel_name" +}) +def handle_channel_name(event): + pass + +@app.event('user_change') +def handle_user_change(event): + print("USER CHANGE MY GOD") + print(event) + + user_id = event['user']['id'] + new_username = event['user']['profile']['display_name'] + old_username = ENV['id_user'][user_id] + + ENV['id_user'][user_id] = new_username + del ENV['user_id'][old_username] + ENV['user_id'][new_username] = user_id + + conn, cursor = db_connect() + cursor.execute("UPDATE users SET name = ? WHERE id = ?", (new_username, user_id)) + conn.commit() + +@app.message('') +def handle_message(message, say): + logger.debug(message) + if 'text' not in message or message['user'] == 'USLACKBOT': return - logger.debug(event) + conn, cursor = db_connect() # If it's a DM, treat it as a search query - if event['channel'][0] == 'D': - handle_query(event) - elif 'user' not in event: + if message['channel_type'] == 'im': + handle_query(message, cursor, say) + elif 'user' not in message: logger.warn("No valid user. Previous event not saved") else: # Otherwise save the message to the archive. cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', - [(event['text'], event['user'], event['channel'], event['ts'])] + [(message['text'], message['user'], message['channel'], message['ts'])] ) conn.commit() + # Ensure that the user exists in the DB/ENV + if message['user'] not in ENV['id_user']: + update_users(conn, cursor) + logger.debug("--------------------------") -# Loop -if sc.rtm_connect(auto_reconnect=True): - update_users() - update_channels() - logger.info('Archive bot online. Messages will now be recorded...') - while sc.server.connected is True: - try: - for event in sc.rtm_read(): - if event['type'] == 'message': - handle_message(event) - if 'subtype' in event and event['subtype'] in ['group_leave']: - update_channels() - elif event['type'] in ['group_joined', 'member_joined_channel', 'channel_created', 'group_left']: - update_channels() - except WebSocketConnectionClosedException: - sc.rtm_connect() - except: - logger.error(traceback.format_exc()) - time.sleep(1) -else: - logger.error('Connection Failed, invalid token?') +if __name__ == '__main__': + # Initialize the DB if it doesn't exist + conn, cursor = db_connect() + cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') + cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') + cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') + conn.commit() + + # Update the users and channels in the DB and in the local memory mapping + update_users(conn, cursor) + update_channels(conn, cursor) + #print(ENV) + + #1/0 + app.start(port=args.port) diff --git a/requirements.txt b/requirements.txt index a7b7cf6..fb68377 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ six>=1.10.0 -slackclient==1.3.2 +slack-bolt==1.2.1 From b6c604d706a437a2f96865ee04a1ad0cf4d65be9 Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Fri, 15 Jan 2021 09:59:47 -0500 Subject: [PATCH 2/9] remove ENV, use DB for all storage --- archivebot.py | 244 ++++++++++++++++++++++++++------------------------ 1 file changed, 128 insertions(+), 116 deletions(-) diff --git a/archivebot.py b/archivebot.py index 1a90d14..2ae1560 100644 --- a/archivebot.py +++ b/archivebot.py @@ -30,9 +30,6 @@ def db_connect(): cursor = conn.cursor() return conn, cursor - - - app = App( token=os.environ.get("SLACK_BOT_TOKEN"), signing_secret=os.environ.get("SLACK_SIGNING_SECRET"), @@ -42,25 +39,12 @@ def db_connect(): # Save the bot user's user ID app._bot_user_id = app.client.auth_test()['user_id'] -# Double naming for better search functionality -# Keys are both the name and unique ID where needed -ENV = { - 'user_id': {}, - 'id_user': {}, - 'channel_id': {}, - 'id_channel': {}, - 'channel_info': {} -} - # Uses slack API to get most recent user list # Necessary for User ID correlation def update_users(conn, cursor): logger.info('Updating users') info = app.client.users_list() - ENV['user_id'] = dict([(m['profile']['display_name'], m['id']) for m in info['members']]) - ENV['id_user'] = dict([(m['id'], m['profile']['display_name']) for m in info['members']]) - args = [] for m in info['members']: args.append(( @@ -71,67 +55,45 @@ def update_users(conn, cursor): cursor.executemany("INSERT INTO users(name, id, avatar) VALUES(?,?,?)", args) conn.commit() -def get_user_id(name): - """ - Get a user's user_id given their name; Used to resolve from:@X queries. - """ - return ENV['user_id'].get(name, None) - -def update_channel(channel_id): +def get_channel_info(channel_id): channel = app.client.conversations_info(channel=channel_id)['channel'] - ENV['channel_id'][channel['name']] = channel['id'] - ENV['id_channel'][channel['id']] = channel['name'] - - # If the channel is private, we need to get the member list - if channel['is_private']: + # Get a list of members for the channel. This will be used when querying private channels. + response = app.client.conversations_members(channel=channel['id']) + members = response['members'] + while response['response_metadata']['next_cursor']: response = app.client.conversations_members(channel=channel['id']) - members = response['members'] - while response['response_metadata']['next_cursor']: - response = app.client.conversations_members(channel=channel['id']) - members += response['members'] - members = set(members) - else: - members = set() + members += response['members'] - ENV['channel_info'][channel['id']] = { - 'is_private': channel['is_private'], - 'members': members - } - - return channel['id'], channel['name'] + return ( + channel['id'], channel['name'], channel['is_private'], + [(channel['id'], m) for m in members] + ) def update_channels(conn, cursor): logger.info("Updating channels") channels = app.client.conversations_list(types='public_channel,private_channel')['channels'] - args = [] + channel_args = [] + member_args = [] for channel in channels: # Only add channels that archive bot is a member of - if not channel['is_member']: - continue + #if not channel['is_member']: + # continue - update_channel(channel['id']) + channel_id, channel_name, channel_is_private, members = get_channel_info(channel['id']) - args.append(( - channel['name'], - channel['id'] + channel_args.append(( + channel_name, + channel_id, + channel_is_private )) - cursor.executemany("INSERT INTO channels(name, id) VALUES(?,?)", args) - conn.commit() - + member_args += members -def get_channel_id(name): - return ENV['channel_id'].get(name, None) - - -def can_query_channel(channel_id, user_id): - if channel_id in ENV['id_channel']: - return ( - (not ENV['channel_info'][channel_id]['is_private']) or - (user_id in ENV['channel_info'][channel_id]['members']) - ) + cursor.executemany("INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", channel_args) + cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", member_args) + conn.commit() def handle_query(event, cursor, say): @@ -151,8 +113,8 @@ def handle_query(event, cursor, say): """ try: text = [] - user = None - channel = None + user_name = None + channel_name = None sort = None limit = 10 @@ -170,13 +132,9 @@ def handle_query(event, cursor, say): text.append(p[0]) if len(p) == 2: if p[0] == 'from': - user = get_user_id(p[1].replace('@','').strip()) - if user is None: - raise ValueError(f'User {p[1]} not found') + user_name = p[1] if p[0] == 'in': - channel = get_channel_id(p[1].replace('#','').strip()) - if channel is None: - raise ValueError(f'Channel {p[1]} not found. Either {p[1]} does not exist or Archive Bot is not a member of {p[1]}.') + channel_name = p[1].replace('#','').strip() if p[0] == 'sort': if p[1] in ['asc', 'desc']: sort = p[1] @@ -188,22 +146,39 @@ def handle_query(event, cursor, say): except: raise ValueError('%s not a valid number' % p[1]) - query = 'SELECT message,user,timestamp,channel FROM messages WHERE message LIKE (?)' - query_args=["%"+" ".join(text)+"%"] - - if user: - query += ' AND user=(?)' - query_args.append(user) - if channel: - query += ' AND channel=(?)' - query_args.append(channel) + query = f''' + SELECT DISTINCT + messages.message, messages.user, messages.timestamp, messages.channel + FROM messages + INNER JOIN users ON messages.user = users.id + -- Only query channel that archive bot is a part of + INNER JOIN ( + SELECT * FROM channels + INNER JOIN members ON + channels.id = members.channel AND + members.user = (?) + ) as channels ON messages.channel = channels.id + INNER JOIN members ON channels.id = members.channel + WHERE + -- Only return messages that are in public channels or the user is a member of + (channels.is_private <> 1 OR members.user = (?)) AND + messages.message LIKE (?) + ''' + query_args=[app._bot_user_id, event['user'], "%"+" ".join(text)+"%"] + + if user_name: + query += ' AND users.name = (?)' + query_args.append(user_name) + if channel_name: + query += ' AND channels.name = (?)' + query_args.append(channel_name) if sort: - query += ' ORDER BY timestamp %s' % sort + query += ' ORDER BY messages.timestamp %s' % sort logger.debug(query) logger.debug(query_args) - cursor.execute(query,query_args) + cursor.execute(query, query_args) res = cursor.fetchmany(limit) res_message=None @@ -212,7 +187,7 @@ def handle_query(event, cursor, say): res_message = '\n'.join( ['*<@%s>* __ _<#%s>_\n%s\n\n' % ( i[1], int(float(i[2])), i[3], i[0] - ) for i in res if can_query_channel(i[3], event['user'])] + ) for i in res] ) if res_message: say(res_message) @@ -225,35 +200,36 @@ def handle_query(event, cursor, say): @app.event('member_joined_channel') def handle_join(event): #print(event) - print(event) conn, cursor = db_connect() # If the user added is archive bot, then add the channel too if event['user'] == app._bot_user_id: - channel_id, channel_name = update_channel(event['channel']) - cursor.execute("INSERT INTO channels(name, id) VALUES(?,?)", (channel_id, channel_name)) - elif event['channel'] in ENV['id_channel']: - ENV['channel_info'][event['channel']]['members'].add(event['user']) + channel_id, channel_name, channel_is_private, members = get_channel_info(event['channel']) + cursor.execute( + "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", + (channel_id, channel_name, channel_is_private) + ) + cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", members) + else: + cursor.execute( + "INSERT INTO members(channel, user) VALUES(?,?)", + (event['channel'], event['user']) + ) - print(ENV) + conn.commit() @app.event('member_left_channel') def handle_left(event): - if event['channel'] in ENV['channel_info']: - ENV['channel_info'][event['channel']]['members'].discard(event['user']) + conn, cursor = db_connect() + cursor.execute( + "DELETE FROM members WHERE channel = ? AND user = ?", (event['channel'], event['user']) + ) + conn.commit() def handle_rename(event): channel = event['channel'] - channel_id = channel['id'] - new_channel_name = channel['name'] - old_channel_name = ENV['id_channel'][channel_id] - - ENV['id_channel'][channel_id] = new_channel_name - del ENV['channel_id'][old_channel_name] - ENV['channel_id'][new_channel_name] = channel_id - conn, cursor = db_connect() - cursor.execute("UPDATE channels SET name = ? WHERE id = ?", (new_channel_name, channel_id)) + cursor.execute("UPDATE channels SET name = ? WHERE id = ?", (channel['name'], channel['id'])) conn.commit() @app.event('channel_rename') @@ -282,16 +258,10 @@ def handle_channel_name(event): @app.event('user_change') def handle_user_change(event): - print("USER CHANGE MY GOD") - print(event) - + # print("USER CHANGE MY GOD") + # print(event) user_id = event['user']['id'] new_username = event['user']['profile']['display_name'] - old_username = ENV['id_user'][user_id] - - ENV['id_user'][user_id] = new_username - del ENV['user_id'][old_username] - ENV['user_id'][new_username] = user_id conn, cursor = db_connect() cursor.execute("UPDATE users SET name = ? WHERE id = ?", (new_username, user_id)) @@ -316,24 +286,66 @@ def handle_message(message, say): ) conn.commit() - # Ensure that the user exists in the DB/ENV - if message['user'] not in ENV['id_user']: + # Ensure that the user exists in the DB + cursor.execute('SELECT * FROM users WHERE id = ?', (message['user'],)) + row = cursor.fetchone() + if row is None: update_users(conn, cursor) logger.debug("--------------------------") +def migrate_db(conn, cursor): + cursor.execute(''' + CREATE TABLE IF NOT EXISTS messages ( + message TEXT, + user TEXT, + channel TEXT, + timestamp TEXT, + UNIQUE(channel, timestamp) ON CONFLICT REPLACE + ) + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS users ( + name TEXT, + id TEXT, + avatar TEXT, + UNIQUE(id) ON CONFLICT REPLACE + )''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS channels ( + name TEXT, + id TEXT, + is_private BOOLEAN NOT NULL CHECK (is_private IN (0,1)), + UNIQUE(id) ON CONFLICT REPLACE + )''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS members ( + channel TEXT, + user TEXT, + FOREIGN KEY (channel) REFERENCES channels(id), + FOREIGN KEY (user) REFERENCES users(id) + ) + ''') + conn.commit() + + # Add `is_private` to channels for dbs that existed in v0.1 + try: + cursor.execute(''' + ALTER TABLE channels + ADD COLUMN is_private BOOLEAN default 1 + NOT NULL CHECK (is_private IN (0,1)) + ''') + conn.commit() + except: + pass + if __name__ == '__main__': # Initialize the DB if it doesn't exist conn, cursor = db_connect() - cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') - cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') - cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') - conn.commit() + migrate_db(conn, cursor) # Update the users and channels in the DB and in the local memory mapping - update_users(conn, cursor) - update_channels(conn, cursor) - #print(ENV) + #update_users(conn, cursor) + #update_channels(conn, cursor) - #1/0 app.start(port=args.port) From 56bfa9dfc344c3886ea678abb2396ff618bba7bc Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Sat, 16 Jan 2021 13:53:25 -0500 Subject: [PATCH 3/9] handle message changed --- archivebot.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/archivebot.py b/archivebot.py index 2ae1560..52cc20e 100644 --- a/archivebot.py +++ b/archivebot.py @@ -77,10 +77,6 @@ def update_channels(conn, cursor): channel_args = [] member_args = [] for channel in channels: - # Only add channels that archive bot is a member of - #if not channel['is_member']: - # continue - channel_id, channel_name, channel_is_private, members = get_channel_info(channel['id']) channel_args.append(( @@ -199,7 +195,6 @@ def handle_query(event, cursor, say): @app.event('member_joined_channel') def handle_join(event): - #print(event) conn, cursor = db_connect() # If the user added is archive bot, then add the channel too @@ -258,8 +253,6 @@ def handle_channel_name(event): @app.event('user_change') def handle_user_change(event): - # print("USER CHANGE MY GOD") - # print(event) user_id = event['user']['id'] new_username = event['user']['profile']['display_name'] @@ -294,6 +287,19 @@ def handle_message(message, say): logger.debug("--------------------------") +@app.event({ + "type": "message", + "subtype": "message_changed" +}) +def handle_message_changed(event): + message = event['message'] + conn, cursor = db_connect() + cursor.execute( + "UPDATE messages SET message = ? WHERE user = ? AND channel = ? AND timestamp = ?", + (message['text'], message['user'], event['channel'], message['ts']) + ) + conn.commit() + def migrate_db(conn, cursor): cursor.execute(''' CREATE TABLE IF NOT EXISTS messages ( @@ -345,7 +351,7 @@ def migrate_db(conn, cursor): migrate_db(conn, cursor) # Update the users and channels in the DB and in the local memory mapping - #update_users(conn, cursor) - #update_channels(conn, cursor) + update_users(conn, cursor) + update_channels(conn, cursor) app.start(port=args.port) From ec114f003274896fa95ee3eca6159e914d7e4acc Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Sat, 16 Jan 2021 14:06:19 -0500 Subject: [PATCH 4/9] update import script for new schema --- .gitignore | 1 + archivebot.py | 68 +++++++-------------------------------------------- import.py | 13 +++++----- utils.py | 51 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 66 deletions(-) create mode 100644 utils.py diff --git a/.gitignore b/.gitignore index 27d9510..ad6efba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ export **.sqlite +**.pyc diff --git a/archivebot.py b/archivebot.py index 52cc20e..a8c0694 100644 --- a/archivebot.py +++ b/archivebot.py @@ -2,13 +2,14 @@ import datetime import logging import os -import sqlite3 import time import traceback from websocket import WebSocketConnectionClosedException from slack_bolt import App +from utils import db_connect, migrate_db + parser = argparse.ArgumentParser() parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( 'path to the SQLite database. (default = ./slack.sqlite)')) @@ -24,12 +25,6 @@ database_path = args.database_path -# Connects to the previously created SQL database -def db_connect(): - conn = sqlite3.connect(database_path) - cursor = conn.cursor() - return conn, cursor - app = App( token=os.environ.get("SLACK_BOT_TOKEN"), signing_secret=os.environ.get("SLACK_SIGNING_SECRET"), @@ -195,7 +190,7 @@ def handle_query(event, cursor, say): @app.event('member_joined_channel') def handle_join(event): - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) # If the user added is archive bot, then add the channel too if event['user'] == app._bot_user_id: @@ -215,7 +210,7 @@ def handle_join(event): @app.event('member_left_channel') def handle_left(event): - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) cursor.execute( "DELETE FROM members WHERE channel = ? AND user = ?", (event['channel'], event['user']) ) @@ -223,7 +218,7 @@ def handle_left(event): def handle_rename(event): channel = event['channel'] - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) cursor.execute("UPDATE channels SET name = ? WHERE id = ?", (channel['name'], channel['id'])) conn.commit() @@ -256,7 +251,7 @@ def handle_user_change(event): user_id = event['user']['id'] new_username = event['user']['profile']['display_name'] - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) cursor.execute("UPDATE users SET name = ? WHERE id = ?", (new_username, user_id)) conn.commit() @@ -266,7 +261,7 @@ def handle_message(message, say): if 'text' not in message or message['user'] == 'USLACKBOT': return - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) # If it's a DM, treat it as a search query if message['channel_type'] == 'im': @@ -293,61 +288,16 @@ def handle_message(message, say): }) def handle_message_changed(event): message = event['message'] - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) cursor.execute( "UPDATE messages SET message = ? WHERE user = ? AND channel = ? AND timestamp = ?", (message['text'], message['user'], event['channel'], message['ts']) ) conn.commit() -def migrate_db(conn, cursor): - cursor.execute(''' - CREATE TABLE IF NOT EXISTS messages ( - message TEXT, - user TEXT, - channel TEXT, - timestamp TEXT, - UNIQUE(channel, timestamp) ON CONFLICT REPLACE - ) - ''') - cursor.execute(''' - CREATE TABLE IF NOT EXISTS users ( - name TEXT, - id TEXT, - avatar TEXT, - UNIQUE(id) ON CONFLICT REPLACE - )''') - cursor.execute(''' - CREATE TABLE IF NOT EXISTS channels ( - name TEXT, - id TEXT, - is_private BOOLEAN NOT NULL CHECK (is_private IN (0,1)), - UNIQUE(id) ON CONFLICT REPLACE - )''') - cursor.execute(''' - CREATE TABLE IF NOT EXISTS members ( - channel TEXT, - user TEXT, - FOREIGN KEY (channel) REFERENCES channels(id), - FOREIGN KEY (user) REFERENCES users(id) - ) - ''') - conn.commit() - - # Add `is_private` to channels for dbs that existed in v0.1 - try: - cursor.execute(''' - ALTER TABLE channels - ADD COLUMN is_private BOOLEAN default 1 - NOT NULL CHECK (is_private IN (0,1)) - ''') - conn.commit() - except: - pass - if __name__ == '__main__': # Initialize the DB if it doesn't exist - conn, cursor = db_connect() + conn, cursor = db_connect(database_path) migrate_db(conn, cursor) # Update the users and channels in the DB and in the local memory mapping diff --git a/import.py b/import.py index ae3b690..9ed52c7 100644 --- a/import.py +++ b/import.py @@ -5,6 +5,8 @@ import os import sqlite3 +from utils import db_connect, migrate_db + parser = argparse.ArgumentParser() parser.add_argument('directory', help=( @@ -20,19 +22,16 @@ logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) -conn = sqlite3.connect(args.database_path) -cursor = conn.cursor() -cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') +conn, cursor = db_connect(args.database_path) +migrate_db(conn, cursor) directory = args.directory logger.info("Importing channels..") with open(os.path.join(directory, 'channels.json')) as f: channels = json.load(f) -args = [(c['name'], c['id']) for c in channels] -cursor.executemany('INSERT INTO channels VALUES(?,?)', (args)) +args = [(c['name'], c['id'], 1) for c in channels] +cursor.executemany('INSERT INTO channels VALUES(?,?,?)', (args)) logger.info("- Channels imported") logger.info("Importing users..") diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..a18a0b1 --- /dev/null +++ b/utils.py @@ -0,0 +1,51 @@ +import sqlite3 + +def migrate_db(conn, cursor): + cursor.execute(''' + CREATE TABLE IF NOT EXISTS messages ( + message TEXT, + user TEXT, + channel TEXT, + timestamp TEXT, + UNIQUE(channel, timestamp) ON CONFLICT REPLACE + ) + ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS users ( + name TEXT, + id TEXT, + avatar TEXT, + UNIQUE(id) ON CONFLICT REPLACE + )''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS channels ( + name TEXT, + id TEXT, + is_private BOOLEAN NOT NULL CHECK (is_private IN (0,1)), + UNIQUE(id) ON CONFLICT REPLACE + )''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS members ( + channel TEXT, + user TEXT, + FOREIGN KEY (channel) REFERENCES channels(id), + FOREIGN KEY (user) REFERENCES users(id) + ) + ''') + conn.commit() + + # Add `is_private` to channels for dbs that existed in v0.1 + try: + cursor.execute(''' + ALTER TABLE channels + ADD COLUMN is_private BOOLEAN default 1 + NOT NULL CHECK (is_private IN (0,1)) + ''') + conn.commit() + except: + pass + +def db_connect(database_path): + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + return conn, cursor From 4031eee7e966de7234098f6343254d814840629e Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Sat, 16 Jan 2021 14:41:09 -0500 Subject: [PATCH 5/9] Update README.md --- README.md | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 459b518..be1cde2 100755 --- a/README.md +++ b/README.md @@ -3,26 +3,60 @@ A bot that can search your slack message history. Makes it possible to search further back than 10,000 messages. +## Requirements + +1. Permission to install new apps to your Slack workspace. +2. python3 +3. A publicly accessible URL to serve the bot from. (Slack recommends using [ngrok](https://ngrok.com/) to get around this.) + ## Installation 1. Clone this repo. -1. Install the requirements: +2. Install the requirements: pip install -r requirements.txt -1. [Export your team's slack history.](https://get.slack.help/hc/en-us/articles/201658943-Export-your-team-s-Slack-history) -Download the archive and export it to a directory. Then run `import_archive.py` +3. If you want to include your existing slack messages, [export your team's slack history.](https://get.slack.help/hc/en-us/articles/201658943-Export-your-team-s-Slack-history) +Download the archive and export it to a directory. Then run `import.py` on the directory. For example: python import.py export This will create a file `slack.sqlite`. -1. Create a new [bot user](https://api.slack.com/bot-users) on your slack -channel and get the API key. Start the bot with: + +4. Create a new [Slack app](https://api.slack.com/start/overview). + +- Add the following bot token oauth scopes and install it to your workspace: + + - `channels:history` + - `channels:read` + - `chat:write` + - `groups:history` (if you want to archive/search private channels) + - `groups:read` (if you want to archive/search private channels) + - `im:history` + - `users:read` + +5. Start archive bot with: + + SLACK_BOT_TOKEN= SLACK_SIGNING_SECRET= python archivebot.py - export SLACK_API_TOKEN= && python archivebot.py +Where `SIGNING_SECRET` is the "Signing Secret" from your app's "Basic Information" page and `BOT_TOKEN` is the +"Bot User OAuth Access Token" from the app's "OAuth & Permissions" page. - Where API_TOKEN is the token you got when creating the bot user. +Use `python archivebot.py -h` for a list of all command line options. + +6. Go to the app's "Event Subscriptions" page and add the url to where archive bot is being served. The default port is `3333`. + +- Then add the following bot events: + + - `channel_rename` + - `group_rename` (if you want to archive/search private channels) + - `member_joined_channel` + - `member_left_channel` + - `message.channels` + - `message.groups` (if you want to archive/search private channels) + - `message.im` + - `user_change` ## Archiving New Messages @@ -50,6 +84,13 @@ to the query. The full usage is: limit: The number of responses to return. Default 10. +## Migrating from slack-archive-bot v0.1 + +`slack-archive-bot` v0.1 used the legacy Slack API which Slack [ended support for in February 2021](https://api.slack.com/changelog/2020-01-deprecating-antecedents-to-the-conversations-api). To migrate to the new version: + +- Follow the installation steps above to create a new slack app with all of the required permissions and event subscriptions. +- The biggest change in requirements with the new version is the move from the [Real Time Messaging API](https://api.slack.com/rtm) to the [Events API](https://api.slack.com/apis/connections/events-api) which necessitates having a publicly-accessible url that Slack can send events to. If you are unable to serve a public endpoint, you can use [ngrok](https://ngrok.com/). + ## Contributing Contributions are more than welcome. From bugs to new features. I threw this From da535cffcf0582ade279d1d427d2276925e6fefa Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Sat, 16 Jan 2021 15:03:11 -0500 Subject: [PATCH 6/9] linting --- archivebot.py | 41 ++++++++++++++++++++++------------------- export.py | 45 ++++++++++++++++++++++----------------------- import.py | 11 +++++------ 3 files changed, 49 insertions(+), 48 deletions(-) diff --git a/archivebot.py b/archivebot.py index a8c0694..a2de077 100644 --- a/archivebot.py +++ b/archivebot.py @@ -1,10 +1,7 @@ import argparse -import datetime import logging import os -import time import traceback -from websocket import WebSocketConnectionClosedException from slack_bolt import App @@ -12,18 +9,18 @@ parser = argparse.ArgumentParser() parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) + 'path to the SQLite database. (default = ./slack.sqlite)')) parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) + 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) parser.add_argument('-p', '--port', default=3333, help='Port to serve on. (default = 3333)') -args = parser.parse_args() +cmd_args = parser.parse_args() -log_level = args.log_level.upper() +log_level = cmd_args.log_level.upper() assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) -database_path = args.database_path +database_path = cmd_args.database_path app = App( token=os.environ.get("SLACK_BOT_TOKEN"), @@ -45,7 +42,9 @@ def update_users(conn, cursor): args.append(( m['profile']['display_name'], m['id'], - m['profile'].get('image_72', 'https://secure.gravatar.com/avatar/c3a07fba0c4787b0ef1d417838eae9c5.jpg?s=32&d=https%3A%2F%2Ffst.slack-edge.com%2F66f9%2Fimg%2Favatars%2Fava_0024-32.png') + m['profile'].get( + 'image_72', 'http://fst.slack-edge.com/66f9/img/avatars/ava_0024-32.png' + ) )) cursor.executemany("INSERT INTO users(name, id, avatar) VALUES(?,?,?)", args) conn.commit() @@ -125,7 +124,7 @@ def handle_query(event, cursor, say): if p[0] == 'from': user_name = p[1] if p[0] == 'in': - channel_name = p[1].replace('#','').strip() + channel_name = p[1].replace('#', '').strip() if p[0] == 'sort': if p[1] in ['asc', 'desc']: sort = p[1] @@ -155,7 +154,7 @@ def handle_query(event, cursor, say): (channels.is_private <> 1 OR members.user = (?)) AND messages.message LIKE (?) ''' - query_args=[app._bot_user_id, event['user'], "%"+" ".join(text)+"%"] + query_args = [app._bot_user_id, event['user'], "%" + " ".join(text) + "%"] if user_name: query += ' AND users.name = (?)' @@ -172,7 +171,7 @@ def handle_query(event, cursor, say): cursor.execute(query, query_args) res = cursor.fetchmany(limit) - res_message=None + res_message = None if res: logger.debug(res) res_message = '\n'.join( @@ -236,14 +235,14 @@ def handle_group_rename(event): "type": "message", "subtype": "group_name" }) -def handle_group_name(event): +def handle_group_name(): pass @app.event({ "type": "message", "subtype": "channel_name" }) -def handle_channel_name(event): +def handle_channel_name(): pass @app.event('user_change') @@ -267,10 +266,11 @@ def handle_message(message, say): if message['channel_type'] == 'im': handle_query(message, cursor, say) elif 'user' not in message: - logger.warn("No valid user. Previous event not saved") + logger.warning("No valid user. Previous event not saved") else: # Otherwise save the message to the archive. - cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', - [(message['text'], message['user'], message['channel'], message['ts'])] + cursor.execute( + 'INSERT INTO messages VALUES(?, ?, ?, ?)', + (message['text'], message['user'], message['channel'], message['ts']) ) conn.commit() @@ -295,7 +295,7 @@ def handle_message_changed(event): ) conn.commit() -if __name__ == '__main__': +def main(): # Initialize the DB if it doesn't exist conn, cursor = db_connect(database_path) migrate_db(conn, cursor) @@ -304,4 +304,7 @@ def handle_message_changed(event): update_users(conn, cursor) update_channels(conn, cursor) - app.start(port=args.port) + app.start(port=cmd_args.port) + +if __name__ == '__main__': + main() diff --git a/export.py b/export.py index 346d3dd..74595cc 100644 --- a/export.py +++ b/export.py @@ -21,21 +21,20 @@ def dict_factory(cursor, row): return d # Turns unicode into text -def byteify(input): - if isinstance(input, dict): +def byteify(inp): + if isinstance(inp, dict): return {byteify(key): byteify(value) - for key, value in iteritems(input)} - elif isinstance(input, list): - return [byteify(element) for element in input] - elif 'unicode' in vars(globals()['__builtins__']) and isinstance(input, unicode): - return input.encode('utf-8') - else: - return input + for key, value in iteritems(inp)} + if isinstance(inp, list): + return [byteify(element) for element in inp] + if 'unicode' in vars(globals()['__builtins__']) and isinstance(inp, unicode): + return inp.encode('utf-8') + return inp -def get_channel_name(id): - return ENV['id_channel'].get(id, 'None') +def get_channel_name(channel_id): + return ENV['id_channel'].get(channel_id, 'None') -def getDate(ts): +def get_date(ts): return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d') @@ -44,11 +43,11 @@ def getDate(ts): parser = argparse.ArgumentParser() parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) + 'path to the SQLite database. (default = ./slack.sqlite)')) parser.add_argument('-a', '--archive_path', default='export', help=( - 'path to export to (default ./export)')) + 'path to export to (default ./export)')) parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) + 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) args = parser.parse_args() database_path = args.database_path @@ -123,7 +122,7 @@ def getDate(ts): continue # timestamp format is #########.###### - day = getDate(message['ts'].split('.')[0]) + day = get_date(message['ts'].split('.')[0]) if channel_msgs[channel_name].get(day, None): channel_msgs[channel_name][day].append(message) else: @@ -133,22 +132,22 @@ def getDate(ts): update_count = 0 for channel_name in channel_msgs.keys(): # Checks for any messages from today - if len(channel_msgs[channel_name]) == 0: + if not channel_msgs[channel_name]: continue else: update_count += 1 logger.info("%s has been updated" % channel_name) - dir = os.path.join(archive_path, channel_name) - if "None" in dir: - logger.warn("Channel not found: %s") %message['channel'] + directory = os.path.join(archive_path, channel_name) + if "None" in directory: + logger.warning("Channel not found: %s" % channel_name) continue - if not os.path.isdir(dir): - os.makedirs(dir) + if not os.path.isdir(directory): + os.makedirs(directory) for day in channel_msgs[channel_name].keys(): - file = os.path.join(dir, "%s.json") % day + file = os.path.join(directory, "%s.json") % day with open(file, 'w') as outfile: json.dump(channel_msgs[channel_name][day], outfile) outfile.close() diff --git a/import.py b/import.py index 9ed52c7..1dc9897 100644 --- a/import.py +++ b/import.py @@ -3,18 +3,17 @@ import json import logging import os -import sqlite3 from utils import db_connect, migrate_db parser = argparse.ArgumentParser() parser.add_argument('directory', help=( - 'path to the downloaded Slack archive')) + 'path to the downloaded Slack archive')) parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) + 'path to the SQLite database. (default = ./slack.sqlite)')) parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) + 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) args = parser.parse_args() log_level = args.log_level.upper() @@ -44,7 +43,7 @@ logger.info("Importing messages..") for channel in channels: files = glob.glob(os.path.join(directory, channel['name'], '*.json')) - if len(files) == 0: + if not files: logger.warning("No messages found for #%s" % channel['name']) for file_name in files: with open(file_name, encoding='utf8') as f: @@ -58,7 +57,7 @@ message['user'] if 'user' in message else "", channel['id'], message['ts'] )) else: - logger.warn("In "+file_name+": An exception occured, message not added to archive.") + logger.warning("In "+file_name+": An exception occured, message not added to archive.") cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', args) conn.commit() From 1d0dc85fa8990b3c0a8f4a997c00a5eec6a42ade Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Sat, 16 Jan 2021 15:47:30 -0500 Subject: [PATCH 7/9] blacken --- archivebot.py | 211 ++++++++++++++++++++++++++++---------------------- export.py | 86 +++++++++++--------- import.py | 63 +++++++++------ utils.py | 32 +++++--- 4 files changed, 233 insertions(+), 159 deletions(-) diff --git a/archivebot.py b/archivebot.py index a2de077..2049181 100644 --- a/archivebot.py +++ b/archivebot.py @@ -8,15 +8,25 @@ from utils import db_connect, migrate_db parser = argparse.ArgumentParser() -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) -parser.add_argument('-p', '--port', default=3333, help='Port to serve on. (default = 3333)') +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) +parser.add_argument( + "-p", "--port", default=3333, help="Port to serve on. (default = 3333)" +) cmd_args = parser.parse_args() log_level = cmd_args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) @@ -25,63 +35,72 @@ app = App( token=os.environ.get("SLACK_BOT_TOKEN"), signing_secret=os.environ.get("SLACK_SIGNING_SECRET"), - logger=logger + logger=logger, ) # Save the bot user's user ID -app._bot_user_id = app.client.auth_test()['user_id'] +app._bot_user_id = app.client.auth_test()["user_id"] # Uses slack API to get most recent user list # Necessary for User ID correlation def update_users(conn, cursor): - logger.info('Updating users') + logger.info("Updating users") info = app.client.users_list() args = [] - for m in info['members']: - args.append(( - m['profile']['display_name'], - m['id'], - m['profile'].get( - 'image_72', 'http://fst.slack-edge.com/66f9/img/avatars/ava_0024-32.png' + for m in info["members"]: + args.append( + ( + m["profile"]["display_name"], + m["id"], + m["profile"].get( + "image_72", + "http://fst.slack-edge.com/66f9/img/avatars/ava_0024-32.png", + ), ) - )) + ) cursor.executemany("INSERT INTO users(name, id, avatar) VALUES(?,?,?)", args) conn.commit() + def get_channel_info(channel_id): - channel = app.client.conversations_info(channel=channel_id)['channel'] + channel = app.client.conversations_info(channel=channel_id)["channel"] # Get a list of members for the channel. This will be used when querying private channels. - response = app.client.conversations_members(channel=channel['id']) - members = response['members'] - while response['response_metadata']['next_cursor']: - response = app.client.conversations_members(channel=channel['id']) - members += response['members'] + response = app.client.conversations_members(channel=channel["id"]) + members = response["members"] + while response["response_metadata"]["next_cursor"]: + response = app.client.conversations_members(channel=channel["id"]) + members += response["members"] return ( - channel['id'], channel['name'], channel['is_private'], - [(channel['id'], m) for m in members] + channel["id"], + channel["name"], + channel["is_private"], + [(channel["id"], m) for m in members], ) + def update_channels(conn, cursor): logger.info("Updating channels") - channels = app.client.conversations_list(types='public_channel,private_channel')['channels'] + channels = app.client.conversations_list(types="public_channel,private_channel")[ + "channels" + ] channel_args = [] member_args = [] for channel in channels: - channel_id, channel_name, channel_is_private, members = get_channel_info(channel['id']) + channel_id, channel_name, channel_is_private, members = get_channel_info( + channel["id"] + ) - channel_args.append(( - channel_name, - channel_id, - channel_is_private - )) + channel_args.append((channel_name, channel_id, channel_is_private)) member_args += members - cursor.executemany("INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", channel_args) + cursor.executemany( + "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", channel_args + ) cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", member_args) conn.commit() @@ -108,35 +127,35 @@ def handle_query(event, cursor, say): sort = None limit = 10 - params = event['text'].lower().split() + params = event["text"].lower().split() for p in params: # Handle emoji # usual format is " :smiley_face: " - if len(p) > 2 and p[0] == ':' and p[-1] == ':': + if len(p) > 2 and p[0] == ":" and p[-1] == ":": text.append(p) continue - p = p.split(':') + p = p.split(":") if len(p) == 1: text.append(p[0]) if len(p) == 2: - if p[0] == 'from': + if p[0] == "from": user_name = p[1] - if p[0] == 'in': - channel_name = p[1].replace('#', '').strip() - if p[0] == 'sort': - if p[1] in ['asc', 'desc']: + if p[0] == "in": + channel_name = p[1].replace("#", "").strip() + if p[0] == "sort": + if p[1] in ["asc", "desc"]: sort = p[1] else: - raise ValueError('Invalid sort order %s' % p[1]) - if p[0] == 'limit': + raise ValueError("Invalid sort order %s" % p[1]) + if p[0] == "limit": try: limit = int(p[1]) except: - raise ValueError('%s not a valid number' % p[1]) + raise ValueError("%s not a valid number" % p[1]) - query = f''' + query = f""" SELECT DISTINCT messages.message, messages.user, messages.timestamp, messages.channel FROM messages @@ -153,17 +172,17 @@ def handle_query(event, cursor, say): -- Only return messages that are in public channels or the user is a member of (channels.is_private <> 1 OR members.user = (?)) AND messages.message LIKE (?) - ''' - query_args = [app._bot_user_id, event['user'], "%" + " ".join(text) + "%"] + """ + query_args = [app._bot_user_id, event["user"], "%" + " ".join(text) + "%"] if user_name: - query += ' AND users.name = (?)' + query += " AND users.name = (?)" query_args.append(user_name) if channel_name: - query += ' AND channels.name = (?)' + query += " AND channels.name = (?)" query_args.append(channel_name) if sort: - query += ' ORDER BY messages.timestamp %s' % sort + query += " ORDER BY messages.timestamp %s" % sort logger.debug(query) logger.debug(query_args) @@ -174,127 +193,136 @@ def handle_query(event, cursor, say): res_message = None if res: logger.debug(res) - res_message = '\n'.join( - ['*<@%s>* __ _<#%s>_\n%s\n\n' % ( - i[1], int(float(i[2])), i[3], i[0] - ) for i in res] + res_message = "\n".join( + [ + "*<@%s>* __ _<#%s>_\n%s\n\n" + % (i[1], int(float(i[2])), i[3], i[0]) + for i in res + ] ) if res_message: say(res_message) else: - say('No results found') + say("No results found") except ValueError as e: logger.error(traceback.format_exc()) say(str(e)) -@app.event('member_joined_channel') + +@app.event("member_joined_channel") def handle_join(event): conn, cursor = db_connect(database_path) # If the user added is archive bot, then add the channel too - if event['user'] == app._bot_user_id: - channel_id, channel_name, channel_is_private, members = get_channel_info(event['channel']) + if event["user"] == app._bot_user_id: + channel_id, channel_name, channel_is_private, members = get_channel_info( + event["channel"] + ) cursor.execute( "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", - (channel_id, channel_name, channel_is_private) + (channel_id, channel_name, channel_is_private), ) cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", members) else: cursor.execute( "INSERT INTO members(channel, user) VALUES(?,?)", - (event['channel'], event['user']) + (event["channel"], event["user"]), ) conn.commit() -@app.event('member_left_channel') + +@app.event("member_left_channel") def handle_left(event): conn, cursor = db_connect(database_path) cursor.execute( - "DELETE FROM members WHERE channel = ? AND user = ?", (event['channel'], event['user']) + "DELETE FROM members WHERE channel = ? AND user = ?", + (event["channel"], event["user"]), ) conn.commit() + def handle_rename(event): - channel = event['channel'] + channel = event["channel"] conn, cursor = db_connect(database_path) - cursor.execute("UPDATE channels SET name = ? WHERE id = ?", (channel['name'], channel['id'])) + cursor.execute( + "UPDATE channels SET name = ? WHERE id = ?", (channel["name"], channel["id"]) + ) conn.commit() -@app.event('channel_rename') + +@app.event("channel_rename") def handle_channel_rename(event): handle_rename(event) -@app.event('group_rename') + +@app.event("group_rename") def handle_group_rename(event): handle_rename(event) + # For some reason slack fires off both *_rename and *_name events, so create handlers for them # but don't do anything in the *_name events. -@app.event({ - "type": "message", - "subtype": "group_name" -}) +@app.event({"type": "message", "subtype": "group_name"}) def handle_group_name(): pass -@app.event({ - "type": "message", - "subtype": "channel_name" -}) + +@app.event({"type": "message", "subtype": "channel_name"}) def handle_channel_name(): pass -@app.event('user_change') + +@app.event("user_change") def handle_user_change(event): - user_id = event['user']['id'] - new_username = event['user']['profile']['display_name'] + user_id = event["user"]["id"] + new_username = event["user"]["profile"]["display_name"] conn, cursor = db_connect(database_path) cursor.execute("UPDATE users SET name = ? WHERE id = ?", (new_username, user_id)) conn.commit() -@app.message('') + +@app.message("") def handle_message(message, say): logger.debug(message) - if 'text' not in message or message['user'] == 'USLACKBOT': + if "text" not in message or message["user"] == "USLACKBOT": return conn, cursor = db_connect(database_path) # If it's a DM, treat it as a search query - if message['channel_type'] == 'im': + if message["channel_type"] == "im": handle_query(message, cursor, say) - elif 'user' not in message: + elif "user" not in message: logger.warning("No valid user. Previous event not saved") - else: # Otherwise save the message to the archive. + else: # Otherwise save the message to the archive. cursor.execute( - 'INSERT INTO messages VALUES(?, ?, ?, ?)', - (message['text'], message['user'], message['channel'], message['ts']) + "INSERT INTO messages VALUES(?, ?, ?, ?)", + (message["text"], message["user"], message["channel"], message["ts"]), ) conn.commit() # Ensure that the user exists in the DB - cursor.execute('SELECT * FROM users WHERE id = ?', (message['user'],)) + cursor.execute("SELECT * FROM users WHERE id = ?", (message["user"],)) row = cursor.fetchone() if row is None: update_users(conn, cursor) logger.debug("--------------------------") -@app.event({ - "type": "message", - "subtype": "message_changed" -}) + +@app.event({"type": "message", "subtype": "message_changed"}) def handle_message_changed(event): - message = event['message'] + message = event["message"] conn, cursor = db_connect(database_path) cursor.execute( "UPDATE messages SET message = ? WHERE user = ? AND channel = ? AND timestamp = ?", - (message['text'], message['user'], event['channel'], message['ts']) + (message["text"], message["user"], event["channel"], message["ts"]), ) conn.commit() + def main(): # Initialize the DB if it doesn't exist conn, cursor = db_connect(database_path) @@ -306,5 +334,6 @@ def main(): app.start(port=cmd_args.port) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/export.py b/export.py index 74595cc..c1b2a7d 100644 --- a/export.py +++ b/export.py @@ -20,48 +20,62 @@ def dict_factory(cursor, row): d[column[0]] = row[index] return d + # Turns unicode into text def byteify(inp): if isinstance(inp, dict): - return {byteify(key): byteify(value) - for key, value in iteritems(inp)} + return {byteify(key): byteify(value) for key, value in iteritems(inp)} if isinstance(inp, list): return [byteify(element) for element in inp] - if 'unicode' in vars(globals()['__builtins__']) and isinstance(inp, unicode): - return inp.encode('utf-8') + if "unicode" in vars(globals()["__builtins__"]) and isinstance(inp, unicode): + return inp.encode("utf-8") return inp + def get_channel_name(channel_id): - return ENV['id_channel'].get(channel_id, 'None') + return ENV["id_channel"].get(channel_id, "None") + def get_date(ts): - return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d') + return datetime.datetime.fromtimestamp(int(ts)).strftime("%Y-%m-%d") # Uncomment time in the future if running daily (Used to export last days of messages) -#time = time.time() - 86400 # One full day in seconds +# time = time.time() - 86400 # One full day in seconds parser = argparse.ArgumentParser() -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-a', '--archive_path', default='export', help=( - 'path to export to (default ./export)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-a", + "--archive_path", + default="export", + help=("path to export to (default ./export)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) args = parser.parse_args() database_path = args.database_path archive_path = args.archive_path log_level = args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) time = 0.0 if not os.path.isdir(archive_path): os.makedirs(archive_path) - time = 0.0 # Full export instead of day export + time = 0.0 # Full export instead of day export # Uncomment if you need to export entire archive or make this choice # getAll = raw_input("Do you want to export all messages instead of last day?(y/N) ").lower() @@ -80,49 +94,51 @@ def get_date(ts): cursor.execute("SELECT * FROM users") users = byteify(cursor.fetchall()) for u in users: - u['profile'] = {} - u['profile']['image_72'] = u.pop('avatar') + u["profile"] = {} + u["profile"]["image_72"] = u.pop("avatar") # Save channel and user data files to archive folder -channel_file = os.path.join(archive_path, 'channels.json') -with open(channel_file, 'w') as outfile: +channel_file = os.path.join(archive_path, "channels.json") +with open(channel_file, "w") as outfile: json.dump(channels, outfile) outfile.close() -user_file = os.path.join(archive_path, 'users.json') -with open(user_file, 'w') as outfile: +user_file = os.path.join(archive_path, "users.json") +with open(user_file, "w") as outfile: json.dump(users, outfile) outfile.close() # Define the names associated with each channel id ENV = { - 'channel_id': {}, - 'id_channel': {}, + "channel_id": {}, + "id_channel": {}, } -ENV['channel_id'] = dict([(m['name'], m['id']) for m in channels]) -ENV['id_channel'] = dict([(m['id'], m['name']) for m in channels]) +ENV["channel_id"] = dict([(m["name"], m["id"]) for m in channels]) +ENV["id_channel"] = dict([(m["id"], m["name"]) for m in channels]) # Get all messages after given time (in seconds since the Epoch) -command = ("SELECT * FROM messages WHERE timestamp > %s ORDER BY channel, timestamp") % time +command = ( + "SELECT * FROM messages WHERE timestamp > %s ORDER BY channel, timestamp" +) % time cursor.execute(command) results = byteify(cursor.fetchall()) # Clean and store message results in Slack-ish format -channel_msgs = dict([(c['name'], {}) for c in channels]) +channel_msgs = dict([(c["name"], {}) for c in channels]) for message in results: - message['text'] = message['message'] - message['ts'] = message['timestamp'] - message['type'] = 'message' - message.pop('message') - message.pop('timestamp') + message["text"] = message["message"] + message["ts"] = message["timestamp"] + message["type"] = "message" + message.pop("message") + message.pop("timestamp") - channel_name = get_channel_name(message['channel']) + channel_name = get_channel_name(message["channel"]) if channel_name == "None": continue # timestamp format is #########.###### - day = get_date(message['ts'].split('.')[0]) + day = get_date(message["ts"].split(".")[0]) if channel_msgs[channel_name].get(day, None): channel_msgs[channel_name][day].append(message) else: @@ -148,7 +164,7 @@ def get_date(ts): for day in channel_msgs[channel_name].keys(): file = os.path.join(directory, "%s.json") % day - with open(file, 'w') as outfile: + with open(file, "w") as outfile: json.dump(channel_msgs[channel_name][day], outfile) outfile.close() logger.info("Updated %s channels" % update_count) diff --git a/import.py b/import.py index 1dc9897..9094c73 100644 --- a/import.py +++ b/import.py @@ -8,16 +8,23 @@ parser = argparse.ArgumentParser() -parser.add_argument('directory', help=( - 'path to the downloaded Slack archive')) -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) +parser.add_argument("directory", help=("path to the downloaded Slack archive")) +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) args = parser.parse_args() log_level = args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) @@ -27,39 +34,49 @@ directory = args.directory logger.info("Importing channels..") -with open(os.path.join(directory, 'channels.json')) as f: +with open(os.path.join(directory, "channels.json")) as f: channels = json.load(f) -args = [(c['name'], c['id'], 1) for c in channels] -cursor.executemany('INSERT INTO channels VALUES(?,?,?)', (args)) +args = [(c["name"], c["id"], 1) for c in channels] +cursor.executemany("INSERT INTO channels VALUES(?,?,?)", (args)) logger.info("- Channels imported") logger.info("Importing users..") -with open(os.path.join(directory, 'users.json')) as f: +with open(os.path.join(directory, "users.json")) as f: users = json.load(f) -args = [(u['name'], u['id'], u['profile']['image_72']) for u in users] -cursor.executemany('INSERT INTO users VALUES(?,?,?)', (args)) +args = [(u["name"], u["id"], u["profile"]["image_72"]) for u in users] +cursor.executemany("INSERT INTO users VALUES(?,?,?)", (args)) logger.info("- Users imported") logger.info("Importing messages..") for channel in channels: - files = glob.glob(os.path.join(directory, channel['name'], '*.json')) + files = glob.glob(os.path.join(directory, channel["name"], "*.json")) if not files: - logger.warning("No messages found for #%s" % channel['name']) + logger.warning("No messages found for #%s" % channel["name"]) for file_name in files: - with open(file_name, encoding='utf8') as f: + with open(file_name, encoding="utf8") as f: messages = json.load(f) args = [] for message in messages: - if ('id' in channel and 'ts' in message): - args.append(( - message['text'] if 'text' in message else "~~There is a message ommitted here~~", - message['user'] if 'user' in message else "", channel['id'], message['ts'] - )) + if "id" in channel and "ts" in message: + args.append( + ( + message["text"] + if "text" in message + else "~~There is a message ommitted here~~", + message["user"] if "user" in message else "", + channel["id"], + message["ts"], + ) + ) else: - logger.warning("In "+file_name+": An exception occured, message not added to archive.") + logger.warning( + "In " + + file_name + + ": An exception occured, message not added to archive." + ) - cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', args) + cursor.executemany("INSERT INTO messages VALUES(?, ?, ?, ?)", args) conn.commit() logger.info("- Messages imported") logger.info("Done") diff --git a/utils.py b/utils.py index a18a0b1..95d4a03 100644 --- a/utils.py +++ b/utils.py @@ -1,7 +1,9 @@ import sqlite3 + def migrate_db(conn, cursor): - cursor.execute(''' + cursor.execute( + """ CREATE TABLE IF NOT EXISTS messages ( message TEXT, user TEXT, @@ -9,42 +11,52 @@ def migrate_db(conn, cursor): timestamp TEXT, UNIQUE(channel, timestamp) ON CONFLICT REPLACE ) - ''') - cursor.execute(''' + """ + ) + cursor.execute( + """ CREATE TABLE IF NOT EXISTS users ( name TEXT, id TEXT, avatar TEXT, UNIQUE(id) ON CONFLICT REPLACE - )''') - cursor.execute(''' + )""" + ) + cursor.execute( + """ CREATE TABLE IF NOT EXISTS channels ( name TEXT, id TEXT, is_private BOOLEAN NOT NULL CHECK (is_private IN (0,1)), UNIQUE(id) ON CONFLICT REPLACE - )''') - cursor.execute(''' + )""" + ) + cursor.execute( + """ CREATE TABLE IF NOT EXISTS members ( channel TEXT, user TEXT, FOREIGN KEY (channel) REFERENCES channels(id), FOREIGN KEY (user) REFERENCES users(id) ) - ''') + """ + ) conn.commit() # Add `is_private` to channels for dbs that existed in v0.1 try: - cursor.execute(''' + cursor.execute( + """ ALTER TABLE channels ADD COLUMN is_private BOOLEAN default 1 NOT NULL CHECK (is_private IN (0,1)) - ''') + """ + ) conn.commit() except: pass + def db_connect(database_path): conn = sqlite3.connect(database_path) cursor = conn.cursor() From 7e0619a11143fcf6583cc8ef2de44cde59375487 Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Wed, 27 Jan 2021 15:09:26 -0500 Subject: [PATCH 8/9] only add channels that the bot is a member of --- archivebot.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/archivebot.py b/archivebot.py index 2049181..4929a15 100644 --- a/archivebot.py +++ b/archivebot.py @@ -90,13 +90,14 @@ def update_channels(conn, cursor): channel_args = [] member_args = [] for channel in channels: - channel_id, channel_name, channel_is_private, members = get_channel_info( - channel["id"] - ) + if channel["is_member"]: + channel_id, channel_name, channel_is_private, members = get_channel_info( + channel["id"] + ) - channel_args.append((channel_name, channel_id, channel_is_private)) + channel_args.append((channel_name, channel_id, channel_is_private)) - member_args += members + member_args += members cursor.executemany( "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", channel_args From aa662654785ad7dfa73339704d0e29b00729a9f7 Mon Sep 17 00:00:00 2001 From: Jeremy Neiman Date: Mon, 1 Feb 2021 08:48:48 -0500 Subject: [PATCH 9/9] fix bug with member pagination --- archivebot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/archivebot.py b/archivebot.py index 4929a15..e5861e0 100644 --- a/archivebot.py +++ b/archivebot.py @@ -70,7 +70,9 @@ def get_channel_info(channel_id): response = app.client.conversations_members(channel=channel["id"]) members = response["members"] while response["response_metadata"]["next_cursor"]: - response = app.client.conversations_members(channel=channel["id"]) + response = app.client.conversations_members( + channel=channel["id"], cursor=response["response_metadata"]["next_cursor"] + ) members += response["members"] return (