diff --git a/.gitignore b/.gitignore index 27d9510..ad6efba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ export **.sqlite +**.pyc diff --git a/README.md b/README.md index 459b518..be1cde2 100755 --- a/README.md +++ b/README.md @@ -3,26 +3,60 @@ A bot that can search your slack message history. Makes it possible to search further back than 10,000 messages. +## Requirements + +1. Permission to install new apps to your Slack workspace. +2. python3 +3. A publicly accessible URL to serve the bot from. (Slack recommends using [ngrok](https://ngrok.com/) to get around this.) + ## Installation 1. Clone this repo. -1. Install the requirements: +2. Install the requirements: pip install -r requirements.txt -1. [Export your team's slack history.](https://get.slack.help/hc/en-us/articles/201658943-Export-your-team-s-Slack-history) -Download the archive and export it to a directory. Then run `import_archive.py` +3. If you want to include your existing slack messages, [export your team's slack history.](https://get.slack.help/hc/en-us/articles/201658943-Export-your-team-s-Slack-history) +Download the archive and export it to a directory. Then run `import.py` on the directory. For example: python import.py export This will create a file `slack.sqlite`. -1. Create a new [bot user](https://api.slack.com/bot-users) on your slack -channel and get the API key. Start the bot with: + +4. Create a new [Slack app](https://api.slack.com/start/overview). + +- Add the following bot token oauth scopes and install it to your workspace: + + - `channels:history` + - `channels:read` + - `chat:write` + - `groups:history` (if you want to archive/search private channels) + - `groups:read` (if you want to archive/search private channels) + - `im:history` + - `users:read` + +5. Start archive bot with: + + SLACK_BOT_TOKEN= SLACK_SIGNING_SECRET= python archivebot.py - export SLACK_API_TOKEN= && python archivebot.py +Where `SIGNING_SECRET` is the "Signing Secret" from your app's "Basic Information" page and `BOT_TOKEN` is the +"Bot User OAuth Access Token" from the app's "OAuth & Permissions" page. - Where API_TOKEN is the token you got when creating the bot user. +Use `python archivebot.py -h` for a list of all command line options. + +6. Go to the app's "Event Subscriptions" page and add the url to where archive bot is being served. The default port is `3333`. + +- Then add the following bot events: + + - `channel_rename` + - `group_rename` (if you want to archive/search private channels) + - `member_joined_channel` + - `member_left_channel` + - `message.channels` + - `message.groups` (if you want to archive/search private channels) + - `message.im` + - `user_change` ## Archiving New Messages @@ -50,6 +84,13 @@ to the query. The full usage is: limit: The number of responses to return. Default 10. +## Migrating from slack-archive-bot v0.1 + +`slack-archive-bot` v0.1 used the legacy Slack API which Slack [ended support for in February 2021](https://api.slack.com/changelog/2020-01-deprecating-antecedents-to-the-conversations-api). To migrate to the new version: + +- Follow the installation steps above to create a new slack app with all of the required permissions and event subscriptions. +- The biggest change in requirements with the new version is the move from the [Real Time Messaging API](https://api.slack.com/rtm) to the [Events API](https://api.slack.com/apis/connections/events-api) which necessitates having a publicly-accessible url that Slack can send events to. If you are unable to serve a public endpoint, you can use [ngrok](https://ngrok.com/). + ## Contributing Contributions are more than welcome. From bugs to new features. I threw this diff --git a/archivebot.py b/archivebot.py index eafaf10..e5861e0 100644 --- a/archivebot.py +++ b/archivebot.py @@ -1,120 +1,114 @@ import argparse -import datetime import logging import os -import sqlite3 -import time import traceback -from slackclient import SlackClient -from websocket import WebSocketConnectionClosedException - +from slack_bolt import App +from utils import db_connect, migrate_db parser = argparse.ArgumentParser() -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) -args = parser.parse_args() - -log_level = args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) +parser.add_argument( + "-p", "--port", default=3333, help="Port to serve on. (default = 3333)" +) +cmd_args = parser.parse_args() + +log_level = cmd_args.log_level.upper() +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) -database_path = args.database_path - -# Connects to the previously created SQL database -conn = sqlite3.connect(database_path) -cursor = conn.cursor() -cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') +database_path = cmd_args.database_path -# This token is given when the bot is started in terminal -slack_token = os.environ["SLACK_API_TOKEN"] +app = App( + token=os.environ.get("SLACK_BOT_TOKEN"), + signing_secret=os.environ.get("SLACK_SIGNING_SECRET"), + logger=logger, +) -# Makes bot user active on Slack -# NOTE: terminal must be running for the bot to continue -sc = SlackClient(slack_token) - -# Double naming for better search functionality -# Keys are both the name and unique ID where needed -ENV = { - 'user_id': {}, - 'id_user': {}, - 'channel_id': {}, - 'id_channel': {}, - 'channel_info': {} -} +# Save the bot user's user ID +app._bot_user_id = app.client.auth_test()["user_id"] # Uses slack API to get most recent user list # Necessary for User ID correlation -def update_users(): - logger.info('Updating users') - info = sc.api_call('users.list') - ENV['user_id'] = dict([(m['name'], m['id']) for m in info['members']]) - ENV['id_user'] = dict([(m['id'], m['name']) for m in info['members']]) +def update_users(conn, cursor): + logger.info("Updating users") + info = app.client.users_list() args = [] - for m in info['members']: - args.append(( - m['name'], - m['id'], - m['profile'].get('image_72', 'https://secure.gravatar.com/avatar/c3a07fba0c4787b0ef1d417838eae9c5.jpg?s=32&d=https%3A%2F%2Ffst.slack-edge.com%2F66f9%2Fimg%2Favatars%2Fava_0024-32.png') - )) + for m in info["members"]: + args.append( + ( + m["profile"]["display_name"], + m["id"], + m["profile"].get( + "image_72", + "http://fst.slack-edge.com/66f9/img/avatars/ava_0024-32.png", + ), + ) + ) cursor.executemany("INSERT INTO users(name, id, avatar) VALUES(?,?,?)", args) conn.commit() -def get_user_id(name): - if name not in ENV['user_id']: - update_users() - return ENV['user_id'].get(name, None) +def get_channel_info(channel_id): + channel = app.client.conversations_info(channel=channel_id)["channel"] + + # Get a list of members for the channel. This will be used when querying private channels. + response = app.client.conversations_members(channel=channel["id"]) + members = response["members"] + while response["response_metadata"]["next_cursor"]: + response = app.client.conversations_members( + channel=channel["id"], cursor=response["response_metadata"]["next_cursor"] + ) + members += response["members"] -def update_channels(): + return ( + channel["id"], + channel["name"], + channel["is_private"], + [(channel["id"], m) for m in members], + ) + + +def update_channels(conn, cursor): logger.info("Updating channels") - info = sc.api_call('channels.list')['channels'] + sc.api_call('groups.list')['groups'] - ENV['channel_id'] = dict([(m['name'], m['id']) for m in info]) - ENV['id_channel'] = dict([(m['id'], m['name']) for m in info]) + channels = app.client.conversations_list(types="public_channel,private_channel")[ + "channels" + ] + + channel_args = [] + member_args = [] + for channel in channels: + if channel["is_member"]: + channel_id, channel_name, channel_is_private, members = get_channel_info( + channel["id"] + ) - args = [] - for m in info: - ENV['channel_info'][m['id']] = { - 'is_private': ('is_group' in m) or m['is_private'], - 'members': m['members'] - } - - args.append(( - m['name'], - m['id'] - )) - - cursor.executemany("INSERT INTO channels(name, id) VALUES(?,?)", args) - conn.commit() + channel_args.append((channel_name, channel_id, channel_is_private)) -def get_channel_id(name): - if name not in ENV['channel_id']: - update_channels() - return ENV['channel_id'].get(name, None) + member_args += members -def send_message(message, channel): - sc.api_call( - "chat.postMessage", - channel=channel, - text=message + cursor.executemany( + "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", channel_args ) - -def can_query_channel(channel_id, user_id): - if channel_id in ENV['id_channel']: - return ( - (not ENV['channel_info'][channel_id]['is_private']) or - (user_id in ENV['channel_info'][channel_id]['members']) - ) + cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", member_args) + conn.commit() -def handle_query(event): +def handle_query(event, cursor, say): """ Handles a DM to the bot that is requesting a search of the archives. @@ -131,117 +125,218 @@ def handle_query(event): """ try: text = [] - user = None - channel = None + user_name = None + channel_name = None sort = None limit = 10 - params = event['text'].lower().split() + params = event["text"].lower().split() for p in params: # Handle emoji # usual format is " :smiley_face: " - if len(p) > 2 and p[0] == ':' and p[-1] == ':': + if len(p) > 2 and p[0] == ":" and p[-1] == ":": text.append(p) continue - p = p.split(':') + p = p.split(":") if len(p) == 1: text.append(p[0]) if len(p) == 2: - if p[0] == 'from': - user = get_user_id(p[1].replace('@','').strip()) - if user is None: - raise ValueError('User %s not found' % p[1]) - if p[0] == 'in': - channel = get_channel_id(p[1].replace('#','').strip()) - if channel is None: - raise ValueError('Channel %s not found' % p[1]) - if p[0] == 'sort': - if p[1] in ['asc', 'desc']: + if p[0] == "from": + user_name = p[1] + if p[0] == "in": + channel_name = p[1].replace("#", "").strip() + if p[0] == "sort": + if p[1] in ["asc", "desc"]: sort = p[1] else: - raise ValueError('Invalid sort order %s' % p[1]) - if p[0] == 'limit': + raise ValueError("Invalid sort order %s" % p[1]) + if p[0] == "limit": try: limit = int(p[1]) except: - raise ValueError('%s not a valid number' % p[1]) - - query = 'SELECT message,user,timestamp,channel FROM messages WHERE message LIKE (?)' - query_args=["%"+" ".join(text)+"%"] - - if user: - query += ' AND user=(?)' - query_args.append(user) - if channel: - query += ' AND channel=(?)' - query_args.append(channel) + raise ValueError("%s not a valid number" % p[1]) + + query = f""" + SELECT DISTINCT + messages.message, messages.user, messages.timestamp, messages.channel + FROM messages + INNER JOIN users ON messages.user = users.id + -- Only query channel that archive bot is a part of + INNER JOIN ( + SELECT * FROM channels + INNER JOIN members ON + channels.id = members.channel AND + members.user = (?) + ) as channels ON messages.channel = channels.id + INNER JOIN members ON channels.id = members.channel + WHERE + -- Only return messages that are in public channels or the user is a member of + (channels.is_private <> 1 OR members.user = (?)) AND + messages.message LIKE (?) + """ + query_args = [app._bot_user_id, event["user"], "%" + " ".join(text) + "%"] + + if user_name: + query += " AND users.name = (?)" + query_args.append(user_name) + if channel_name: + query += " AND channels.name = (?)" + query_args.append(channel_name) if sort: - query += ' ORDER BY timestamp %s' % sort - #query_args.append(sort) + query += " ORDER BY messages.timestamp %s" % sort logger.debug(query) logger.debug(query_args) - cursor.execute(query,query_args) + cursor.execute(query, query_args) res = cursor.fetchmany(limit) - res_message=None + res_message = None if res: logger.debug(res) - res_message = '\n'.join( - ['*<@%s>* __ _<#%s>_\n%s\n\n' % ( - i[1], int(float(i[2])), i[3], i[0] - ) for i in res if can_query_channel(i[3], event['user'])] + res_message = "\n".join( + [ + "*<@%s>* __ _<#%s>_\n%s\n\n" + % (i[1], int(float(i[2])), i[3], i[0]) + for i in res + ] ) if res_message: - send_message(res_message, event['channel']) + say(res_message) else: - send_message('No results found', event['channel']) + say("No results found") except ValueError as e: logger.error(traceback.format_exc()) - send_message(str(e), event['channel']) + say(str(e)) -def handle_message(event): - if 'text' not in event: - return - if 'subtype' in event and event['subtype'] == 'bot_message': + +@app.event("member_joined_channel") +def handle_join(event): + conn, cursor = db_connect(database_path) + + # If the user added is archive bot, then add the channel too + if event["user"] == app._bot_user_id: + channel_id, channel_name, channel_is_private, members = get_channel_info( + event["channel"] + ) + cursor.execute( + "INSERT INTO channels(name, id, is_private) VALUES(?,?,?)", + (channel_id, channel_name, channel_is_private), + ) + cursor.executemany("INSERT INTO members(channel, user) VALUES(?,?)", members) + else: + cursor.execute( + "INSERT INTO members(channel, user) VALUES(?,?)", + (event["channel"], event["user"]), + ) + + conn.commit() + + +@app.event("member_left_channel") +def handle_left(event): + conn, cursor = db_connect(database_path) + cursor.execute( + "DELETE FROM members WHERE channel = ? AND user = ?", + (event["channel"], event["user"]), + ) + conn.commit() + + +def handle_rename(event): + channel = event["channel"] + conn, cursor = db_connect(database_path) + cursor.execute( + "UPDATE channels SET name = ? WHERE id = ?", (channel["name"], channel["id"]) + ) + conn.commit() + + +@app.event("channel_rename") +def handle_channel_rename(event): + handle_rename(event) + + +@app.event("group_rename") +def handle_group_rename(event): + handle_rename(event) + + +# For some reason slack fires off both *_rename and *_name events, so create handlers for them +# but don't do anything in the *_name events. +@app.event({"type": "message", "subtype": "group_name"}) +def handle_group_name(): + pass + + +@app.event({"type": "message", "subtype": "channel_name"}) +def handle_channel_name(): + pass + + +@app.event("user_change") +def handle_user_change(event): + user_id = event["user"]["id"] + new_username = event["user"]["profile"]["display_name"] + + conn, cursor = db_connect(database_path) + cursor.execute("UPDATE users SET name = ? WHERE id = ?", (new_username, user_id)) + conn.commit() + + +@app.message("") +def handle_message(message, say): + logger.debug(message) + if "text" not in message or message["user"] == "USLACKBOT": return - logger.debug(event) + conn, cursor = db_connect(database_path) # If it's a DM, treat it as a search query - if event['channel'][0] == 'D': - handle_query(event) - elif 'user' not in event: - logger.warn("No valid user. Previous event not saved") - else: # Otherwise save the message to the archive. - cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', - [(event['text'], event['user'], event['channel'], event['ts'])] + if message["channel_type"] == "im": + handle_query(message, cursor, say) + elif "user" not in message: + logger.warning("No valid user. Previous event not saved") + else: # Otherwise save the message to the archive. + cursor.execute( + "INSERT INTO messages VALUES(?, ?, ?, ?)", + (message["text"], message["user"], message["channel"], message["ts"]), ) conn.commit() + # Ensure that the user exists in the DB + cursor.execute("SELECT * FROM users WHERE id = ?", (message["user"],)) + row = cursor.fetchone() + if row is None: + update_users(conn, cursor) + logger.debug("--------------------------") -# Loop -if sc.rtm_connect(auto_reconnect=True): - update_users() - update_channels() - logger.info('Archive bot online. Messages will now be recorded...') - while sc.server.connected is True: - try: - for event in sc.rtm_read(): - if event['type'] == 'message': - handle_message(event) - if 'subtype' in event and event['subtype'] in ['group_leave']: - update_channels() - elif event['type'] in ['group_joined', 'member_joined_channel', 'channel_created', 'group_left']: - update_channels() - except WebSocketConnectionClosedException: - sc.rtm_connect() - except: - logger.error(traceback.format_exc()) - time.sleep(1) -else: - logger.error('Connection Failed, invalid token?') + +@app.event({"type": "message", "subtype": "message_changed"}) +def handle_message_changed(event): + message = event["message"] + conn, cursor = db_connect(database_path) + cursor.execute( + "UPDATE messages SET message = ? WHERE user = ? AND channel = ? AND timestamp = ?", + (message["text"], message["user"], event["channel"], message["ts"]), + ) + conn.commit() + + +def main(): + # Initialize the DB if it doesn't exist + conn, cursor = db_connect(database_path) + migrate_db(conn, cursor) + + # Update the users and channels in the DB and in the local memory mapping + update_users(conn, cursor) + update_channels(conn, cursor) + + app.start(port=cmd_args.port) + + +if __name__ == "__main__": + main() diff --git a/export.py b/export.py index 346d3dd..c1b2a7d 100644 --- a/export.py +++ b/export.py @@ -20,49 +20,62 @@ def dict_factory(cursor, row): d[column[0]] = row[index] return d + # Turns unicode into text -def byteify(input): - if isinstance(input, dict): - return {byteify(key): byteify(value) - for key, value in iteritems(input)} - elif isinstance(input, list): - return [byteify(element) for element in input] - elif 'unicode' in vars(globals()['__builtins__']) and isinstance(input, unicode): - return input.encode('utf-8') - else: - return input +def byteify(inp): + if isinstance(inp, dict): + return {byteify(key): byteify(value) for key, value in iteritems(inp)} + if isinstance(inp, list): + return [byteify(element) for element in inp] + if "unicode" in vars(globals()["__builtins__"]) and isinstance(inp, unicode): + return inp.encode("utf-8") + return inp + + +def get_channel_name(channel_id): + return ENV["id_channel"].get(channel_id, "None") -def get_channel_name(id): - return ENV['id_channel'].get(id, 'None') -def getDate(ts): - return datetime.datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d') +def get_date(ts): + return datetime.datetime.fromtimestamp(int(ts)).strftime("%Y-%m-%d") # Uncomment time in the future if running daily (Used to export last days of messages) -#time = time.time() - 86400 # One full day in seconds +# time = time.time() - 86400 # One full day in seconds parser = argparse.ArgumentParser() -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-a', '--archive_path', default='export', help=( - 'path to export to (default ./export)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-a", + "--archive_path", + default="export", + help=("path to export to (default ./export)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) args = parser.parse_args() database_path = args.database_path archive_path = args.archive_path log_level = args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) time = 0.0 if not os.path.isdir(archive_path): os.makedirs(archive_path) - time = 0.0 # Full export instead of day export + time = 0.0 # Full export instead of day export # Uncomment if you need to export entire archive or make this choice # getAll = raw_input("Do you want to export all messages instead of last day?(y/N) ").lower() @@ -81,49 +94,51 @@ def getDate(ts): cursor.execute("SELECT * FROM users") users = byteify(cursor.fetchall()) for u in users: - u['profile'] = {} - u['profile']['image_72'] = u.pop('avatar') + u["profile"] = {} + u["profile"]["image_72"] = u.pop("avatar") # Save channel and user data files to archive folder -channel_file = os.path.join(archive_path, 'channels.json') -with open(channel_file, 'w') as outfile: +channel_file = os.path.join(archive_path, "channels.json") +with open(channel_file, "w") as outfile: json.dump(channels, outfile) outfile.close() -user_file = os.path.join(archive_path, 'users.json') -with open(user_file, 'w') as outfile: +user_file = os.path.join(archive_path, "users.json") +with open(user_file, "w") as outfile: json.dump(users, outfile) outfile.close() # Define the names associated with each channel id ENV = { - 'channel_id': {}, - 'id_channel': {}, + "channel_id": {}, + "id_channel": {}, } -ENV['channel_id'] = dict([(m['name'], m['id']) for m in channels]) -ENV['id_channel'] = dict([(m['id'], m['name']) for m in channels]) +ENV["channel_id"] = dict([(m["name"], m["id"]) for m in channels]) +ENV["id_channel"] = dict([(m["id"], m["name"]) for m in channels]) # Get all messages after given time (in seconds since the Epoch) -command = ("SELECT * FROM messages WHERE timestamp > %s ORDER BY channel, timestamp") % time +command = ( + "SELECT * FROM messages WHERE timestamp > %s ORDER BY channel, timestamp" +) % time cursor.execute(command) results = byteify(cursor.fetchall()) # Clean and store message results in Slack-ish format -channel_msgs = dict([(c['name'], {}) for c in channels]) +channel_msgs = dict([(c["name"], {}) for c in channels]) for message in results: - message['text'] = message['message'] - message['ts'] = message['timestamp'] - message['type'] = 'message' - message.pop('message') - message.pop('timestamp') + message["text"] = message["message"] + message["ts"] = message["timestamp"] + message["type"] = "message" + message.pop("message") + message.pop("timestamp") - channel_name = get_channel_name(message['channel']) + channel_name = get_channel_name(message["channel"]) if channel_name == "None": continue # timestamp format is #########.###### - day = getDate(message['ts'].split('.')[0]) + day = get_date(message["ts"].split(".")[0]) if channel_msgs[channel_name].get(day, None): channel_msgs[channel_name][day].append(message) else: @@ -133,23 +148,23 @@ def getDate(ts): update_count = 0 for channel_name in channel_msgs.keys(): # Checks for any messages from today - if len(channel_msgs[channel_name]) == 0: + if not channel_msgs[channel_name]: continue else: update_count += 1 logger.info("%s has been updated" % channel_name) - dir = os.path.join(archive_path, channel_name) - if "None" in dir: - logger.warn("Channel not found: %s") %message['channel'] + directory = os.path.join(archive_path, channel_name) + if "None" in directory: + logger.warning("Channel not found: %s" % channel_name) continue - if not os.path.isdir(dir): - os.makedirs(dir) + if not os.path.isdir(directory): + os.makedirs(directory) for day in channel_msgs[channel_name].keys(): - file = os.path.join(dir, "%s.json") % day - with open(file, 'w') as outfile: + file = os.path.join(directory, "%s.json") % day + with open(file, "w") as outfile: json.dump(channel_msgs[channel_name][day], outfile) outfile.close() logger.info("Updated %s channels" % update_count) diff --git a/import.py b/import.py index ae3b690..9094c73 100644 --- a/import.py +++ b/import.py @@ -3,65 +3,80 @@ import json import logging import os -import sqlite3 + +from utils import db_connect, migrate_db parser = argparse.ArgumentParser() -parser.add_argument('directory', help=( - 'path to the downloaded Slack archive')) -parser.add_argument('-d', '--database-path', default='slack.sqlite', help=( - 'path to the SQLite database. (default = ./slack.sqlite)')) -parser.add_argument('-l', '--log-level', default='debug', help=( - 'CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)')) +parser.add_argument("directory", help=("path to the downloaded Slack archive")) +parser.add_argument( + "-d", + "--database-path", + default="slack.sqlite", + help=("path to the SQLite database. (default = ./slack.sqlite)"), +) +parser.add_argument( + "-l", + "--log-level", + default="debug", + help=("CRITICAL, ERROR, WARNING, INFO or DEBUG (default = DEBUG)"), +) args = parser.parse_args() log_level = args.log_level.upper() -assert log_level in ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'] +assert log_level in ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] logging.basicConfig(level=getattr(logging, log_level)) logger = logging.getLogger(__name__) -conn = sqlite3.connect(args.database_path) -cursor = conn.cursor() -cursor.execute('create table if not exists messages (message text, user text, channel text, timestamp text, UNIQUE(channel, timestamp) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists users (name text, id text, avatar text, UNIQUE(id) ON CONFLICT REPLACE)') -cursor.execute('create table if not exists channels (name text, id text, UNIQUE(id) ON CONFLICT REPLACE)') +conn, cursor = db_connect(args.database_path) +migrate_db(conn, cursor) directory = args.directory logger.info("Importing channels..") -with open(os.path.join(directory, 'channels.json')) as f: +with open(os.path.join(directory, "channels.json")) as f: channels = json.load(f) -args = [(c['name'], c['id']) for c in channels] -cursor.executemany('INSERT INTO channels VALUES(?,?)', (args)) +args = [(c["name"], c["id"], 1) for c in channels] +cursor.executemany("INSERT INTO channels VALUES(?,?,?)", (args)) logger.info("- Channels imported") logger.info("Importing users..") -with open(os.path.join(directory, 'users.json')) as f: +with open(os.path.join(directory, "users.json")) as f: users = json.load(f) -args = [(u['name'], u['id'], u['profile']['image_72']) for u in users] -cursor.executemany('INSERT INTO users VALUES(?,?,?)', (args)) +args = [(u["name"], u["id"], u["profile"]["image_72"]) for u in users] +cursor.executemany("INSERT INTO users VALUES(?,?,?)", (args)) logger.info("- Users imported") logger.info("Importing messages..") for channel in channels: - files = glob.glob(os.path.join(directory, channel['name'], '*.json')) - if len(files) == 0: - logger.warning("No messages found for #%s" % channel['name']) + files = glob.glob(os.path.join(directory, channel["name"], "*.json")) + if not files: + logger.warning("No messages found for #%s" % channel["name"]) for file_name in files: - with open(file_name, encoding='utf8') as f: + with open(file_name, encoding="utf8") as f: messages = json.load(f) args = [] for message in messages: - if ('id' in channel and 'ts' in message): - args.append(( - message['text'] if 'text' in message else "~~There is a message ommitted here~~", - message['user'] if 'user' in message else "", channel['id'], message['ts'] - )) + if "id" in channel and "ts" in message: + args.append( + ( + message["text"] + if "text" in message + else "~~There is a message ommitted here~~", + message["user"] if "user" in message else "", + channel["id"], + message["ts"], + ) + ) else: - logger.warn("In "+file_name+": An exception occured, message not added to archive.") + logger.warning( + "In " + + file_name + + ": An exception occured, message not added to archive." + ) - cursor.executemany('INSERT INTO messages VALUES(?, ?, ?, ?)', args) + cursor.executemany("INSERT INTO messages VALUES(?, ?, ?, ?)", args) conn.commit() logger.info("- Messages imported") logger.info("Done") diff --git a/requirements.txt b/requirements.txt index a7b7cf6..fb68377 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ six>=1.10.0 -slackclient==1.3.2 +slack-bolt==1.2.1 diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..95d4a03 --- /dev/null +++ b/utils.py @@ -0,0 +1,63 @@ +import sqlite3 + + +def migrate_db(conn, cursor): + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS messages ( + message TEXT, + user TEXT, + channel TEXT, + timestamp TEXT, + UNIQUE(channel, timestamp) ON CONFLICT REPLACE + ) + """ + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS users ( + name TEXT, + id TEXT, + avatar TEXT, + UNIQUE(id) ON CONFLICT REPLACE + )""" + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS channels ( + name TEXT, + id TEXT, + is_private BOOLEAN NOT NULL CHECK (is_private IN (0,1)), + UNIQUE(id) ON CONFLICT REPLACE + )""" + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS members ( + channel TEXT, + user TEXT, + FOREIGN KEY (channel) REFERENCES channels(id), + FOREIGN KEY (user) REFERENCES users(id) + ) + """ + ) + conn.commit() + + # Add `is_private` to channels for dbs that existed in v0.1 + try: + cursor.execute( + """ + ALTER TABLE channels + ADD COLUMN is_private BOOLEAN default 1 + NOT NULL CHECK (is_private IN (0,1)) + """ + ) + conn.commit() + except: + pass + + +def db_connect(database_path): + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + return conn, cursor