diff --git a/README.md b/README.md
index 89da080..e2153a1 100644
--- a/README.md
+++ b/README.md
@@ -94,13 +94,14 @@ This would be the main function to use within chat-exporter.
**Optional Argument(s):**
`limit`: Integer value to set the limit (amount of messages) the chat exporter gathers when grabbing the history (default=unlimited).
-`tz_info`: String value of a [TZ Database name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List) to set a custom timezone for the exported messages (default=UTC)
-`guild`: `discord.Guild` object which can be passed in to solve bugs for certain forks
-`military_time`: Boolean value to set a 24h format for times within your exported chat (default=False | 12h format)
-`fancy_times`: Boolean value which toggles the 'fancy times' (Today|Yesterday|Day)
-`before`: `datetime.datetime` object which allows to gather messages from before a certain date
-`after`: `datetime.datetime` object which allows to gather messages from after a certain date
-`bot`: `commands.Bot` object to gather members who are no longer in your guild.
+`tz_info`: String value of a [TZ Database name](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List) to set a custom timezone for the exported messages (default=UTC).
+`guild`: `discord.Guild` object which can be passed in to solve bugs for certain forks.
+`military_time`: Boolean value to set a 24h format for times within your exported chat (default=False | 12h format).
+`fancy_times`: Boolean value which toggles the 'fancy times' (Today|Yesterday|Day).
+`before`: `datetime.datetime` object which allows to gather messages from before a certain date.
+`after`: `datetime.datetime` object which allows to gather messages from after a certain date.
+`bot`: `commands.Bot` object to gather members who are no longer in your guild.
+`attachment_handler`: `chat_exporter.AttachmentHandler` object to export assets to in order to make them available after the `channel` got deleted.
**Return Argument:**
`transcript`: The HTML build-up for you to construct the HTML File with Discord.
@@ -149,6 +150,7 @@ This would be for people who want to filter what content to export.
`military_time`: Boolean value to set a 24h format for times within your exported chat (default=False | 12h format)
`fancy_times`: Boolean value which toggles the 'fancy times' (Today|Yesterday|Day)
`bot`: `commands.Bot` object to gather members who are no longer in your guild.
+`attachment_handler`: `chat_exporter.AttachmentHandler` object to export assets to in order to make them available after the `channel` got deleted.
**Return Argument:**
`transcript`: The HTML build-up for you to construct the HTML File with Discord.
@@ -183,6 +185,178 @@ async def purge(ctx: commands.Context, tz_info: str, military_time: bool):
```
+
+
(back to top)
+
+---
+## Attachment Handler
+
+Due to Discords newly introduced restrictions on to their CDN, we have introduced an Attachment Handler. This handler
+will assist you with circumventing the 'broken' and 'dead-assets' which arise when former attachments hosted by Discord
+reach their expiration date.
+
+The `AttachmentHandler` serves as a template for you to implement your own asset handler. Below are two basic examples on
+how to use the `AttachmentHandler`. One using the example of storing files on a local webserver, with the other being
+an example of storing them on Discord *(the latter merely just being an example, this will still obviously run in to
+the expiration issue)*.
+
+If you do not specify an attachment handler, chat-exporter will continue to use the (proxy) URLs for the assets.
+
+Concept
+
+The concept of implementing such an AttachmentHandler is very easy. In the following a short general procedure is
+described to write your own AttachmentHandler fitting your storage solution. Here we will assume, that we store the
+attachments in a cloud storage.
+
+1. Subclassing
+Start by subclassing `chat_exporter.AttachmentHandler` and implement the `__init__` method if needed. This should look
+something like this:
+
+```python
+from chat_exporter import AttachmentHandler
+from cloud_wrapper import CloudClient
+
+
+class MyAttachmentHandler(AttachmentHandler):
+ def __init__(self, *args, **kwargs):
+ # Your initialization code here
+ # in your case we just create the cloud client
+ self.cloud_client = CloudClient()
+
+```
+
+2. Overwrite process_asset
+The `process_asset` method is the method that is called for each asset in the chat. Here we have to implement the
+upload logic and the generation of the asset url from the uploaded asset.
+
+```python
+import io
+import aiohttp
+from chat_exporter import AttachmentHandler
+from cloud_wrapper import CloudClient
+from discord import Attachment
+
+
+class MyAttachmentHandler(AttachmentHandler):
+ async def process_asset(self, attachment: Attachment):
+ # Your upload logic here, in our example we just upload the asset to the cloud
+
+ # first we need to authorize the client
+ await self.cloud_client.authorize()
+
+ # then we fetch the content of the attachment
+ async with aiohttp.ClientSession() as session:
+ async with session.get(attachment.url) as res:
+ if res.status != 200:
+ res.raise_for_status()
+ data = io.BytesIO(await res.read())
+ data.seek(0)
+
+ # and upload it to the cloud, back we get some sort of identifier for the uploaded file
+ asset_id = await self.cloud_client.upload(data)
+
+ # now we can generate the asset url from the identifier
+ asset_url = await self.cloud_client.get_share_url(asset_id, shared_with="everyone")
+
+ # and set the url attribute of the attachment to the generated url
+ attachment.url = asset_url
+ return attachment
+
+```
+
+Note
+1. The `process_asset` method should return the attachment object with the url attribute set to the generated url.
+2. The `process_asset` method should be an async method, as it is likely that you have to do some async operations
+ like fetching the content of the attachment or uploading it to the cloud.
+3. You are free to add other methods in your class, and call them from `process_asset` if you need to do some
+ operations before or after the upload of the asset. But the `process_asset` method is the only method that is
+called from chat-exporter.
+
+
+
+**Examples:**
+
+
+AttachmentToLocalFileHostHandler
+
+Assuming you have a file server running, which serves the content of the folder `/usr/share/assets/`
+under `https://example.com/assets/`, you can easily use the `AttachmentToLocalFileHostHandler` like this:
+```python
+import io
+import discord
+from discord.ext import commands
+import chat_exporter
+from chat_exporter import AttachmentToLocalFileHostHandler
+
+...
+
+# Establish the file handler
+file_handler = AttachmentToLocalFileHostHandler(
+ base_path="/usr/share/assets",
+ url_base="https://example.com/assets/",
+)
+
+@bot.command()
+async def save(ctx: commands.Context):
+ transcript = await chat_exporter.export(
+ ctx.channel,
+ attachment_handler=file_handler,
+ )
+
+ if transcript is None:
+ return
+
+ transcript_file = discord.File(
+ io.BytesIO(transcript.encode()),
+ filename=f"transcript-{ctx.channel.name}.html",
+ )
+
+ await ctx.send(file=transcript_file)
+
+```
+
+
+AttachmentToDiscordChannel
+
+Assuming you want to store your attachments in a discord channel, you can use the `AttachmentToDiscordChannel`.
+Please note that discord recent changes regarding content links will result in the attachments links being broken
+after 24 hours. While this is therefor not a recommended way to store your attachments, it should give you a good
+idea how to perform asynchronous storing of the attachments.
+
+```python
+import io
+import discord
+from discord.ext import commands
+import chat_exporter
+from chat_exporter import AttachmentToDiscordChannel
+
+...
+
+# Establish the file handler
+channel_handler = AttachmentToDiscordChannel(
+ channel=bot.get_channel(CHANNEL_ID),
+)
+
+@bot.command()
+async def save(ctx: commands.Context):
+ transcript = await chat_exporter.export(
+ ctx.channel,
+ attachment_handler=channel_handler,
+ )
+
+ if transcript is None:
+ return
+
+ transcript_file = discord.File(
+ io.BytesIO(transcript.encode()),
+ filename=f"transcript-{ctx.channel.name}.html",
+ )
+
+ await ctx.send(file=transcript_file)
+
+```
+
+
(back to top)
---
@@ -204,6 +378,7 @@ async def purge(ctx: commands.Context, tz_info: str, military_time: bool):
---
## Additional Functions
+
Link Function
Downloading exported chats can build up a bunch of unwanted files on your PC which can get annoying, additionally - not everyone wants to download content from Discord.
@@ -274,6 +449,8 @@ It simply makes a request to the given URL and echos (prints) the content for yo
+
+
---
## Attributions
diff --git a/chat_exporter/__init__.py b/chat_exporter/__init__.py
index 3514abc..ff4c97b 100644
--- a/chat_exporter/__init__.py
+++ b/chat_exporter/__init__.py
@@ -1,6 +1,14 @@
-from chat_exporter.chat_exporter import export, raw_export, quick_export, link, quick_link
+from chat_exporter.chat_exporter import (
+ export,
+ raw_export,
+ quick_export,
+ link,
+ quick_link,
+ AttachmentHandler,
+ AttachmentToLocalFileHostHandler,
+ AttachmentToDiscordChannelHandler)
-__version__ = "2.6.1"
+__version__ = "2.7.0"
__all__ = (
export,
@@ -8,4 +16,7 @@
quick_export,
link,
quick_link,
+ AttachmentHandler,
+ AttachmentToLocalFileHostHandler,
+ AttachmentToDiscordChannelHandler,
)
diff --git a/chat_exporter/chat_exporter.py b/chat_exporter/chat_exporter.py
index 2b406a7..e1b2028 100644
--- a/chat_exporter/chat_exporter.py
+++ b/chat_exporter/chat_exporter.py
@@ -4,6 +4,7 @@
from chat_exporter.construct.transcript import Transcript
from chat_exporter.ext.discord_import import discord
+from chat_exporter.construct.attachment_handler import AttachmentHandler, AttachmentToLocalFileHostHandler, AttachmentToDiscordChannelHandler
async def quick_export(
@@ -61,6 +62,7 @@ async def export(
before: Optional[datetime.datetime] = None,
after: Optional[datetime.datetime] = None,
support_dev: Optional[bool] = True,
+ attachment_handler: Optional[AttachmentHandler] = None,
):
"""
Create a customised transcript of your Discord channel.
@@ -74,6 +76,7 @@ async def export(
:param fancy_times: (optional) boolean - set javascript around time display
:param before: (optional) datetime.datetime - allows before time for history
:param after: (optional) datetime.datetime - allows after time for history
+ :param attachment_handler: (optional) attachment_handler.AttachmentHandler - allows custom asset handling
:return: string - transcript file make up
"""
if guild:
@@ -91,6 +94,7 @@ async def export(
after=after,
support_dev=support_dev,
bot=bot,
+ attachment_handler=attachment_handler,
).export()
).html
@@ -104,6 +108,7 @@ async def raw_export(
military_time: Optional[bool] = False,
fancy_times: Optional[bool] = True,
support_dev: Optional[bool] = True,
+ attachment_handler: Optional[AttachmentHandler] = None,
):
"""
Create a customised transcript with your own captured Discord messages
@@ -115,6 +120,7 @@ async def raw_export(
:param bot: (optional) discord.Client - set getting member role colour
:param military_time: (optional) boolean - set military time (24hour clock)
:param fancy_times: (optional) boolean - set javascript around time display
+ :param attachment_handler: (optional) AttachmentHandler - allows custom asset handling
:return: string - transcript file make up
"""
if guild:
@@ -132,6 +138,7 @@ async def raw_export(
after=None,
support_dev=support_dev,
bot=bot,
+ attachment_handler=attachment_handler
).export()
).html
diff --git a/chat_exporter/construct/attachment_handler.py b/chat_exporter/construct/attachment_handler.py
new file mode 100644
index 0000000..c24dad8
--- /dev/null
+++ b/chat_exporter/construct/attachment_handler.py
@@ -0,0 +1,68 @@
+import datetime
+import io
+import pathlib
+from typing import Union
+
+import aiohttp
+import discord
+
+
+class AttachmentHandler:
+ """Handle the saving of attachments (images, videos, audio, etc.)
+
+ Subclass this to implement your own asset handler."""
+
+ async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment:
+ """Implement this to process the asset and return a url to the stored attachment.
+ :param attachment: discord.Attachment
+ :return: str
+ """
+ raise NotImplementedError
+
+class AttachmentToLocalFileHostHandler(AttachmentHandler):
+ """Save the assets to a local file host and embed the assets in the transcript from there."""
+
+ def __init__(self, base_path: Union[str, pathlib.Path], url_base: str):
+ if isinstance(base_path, str):
+ base_path = pathlib.Path(base_path)
+ self.base_path = base_path
+ self.url_base = url_base
+
+ async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment:
+ """Implement this to process the asset and return a url to the stored attachment.
+ :param attachment: discord.Attachment
+ :return: str
+ """
+ file_name = f"{int(datetime.datetime.utcnow().timestamp())}_{attachment.filename}".replace(' ', '%20')
+ asset_path = self.base_path / file_name
+ await attachment.save(asset_path)
+ file_url = f"{self.url_base}/{file_name}"
+ attachment.url = file_url
+ attachment.proxy_url = file_url
+ return attachment
+
+
+class AttachmentToDiscordChannelHandler(AttachmentHandler):
+ """Save the attachment to a discord channel and embed the assets in the transcript from there."""
+
+ def __init__(self, channel: discord.TextChannel):
+ self.channel = channel
+
+ async def process_asset(self, attachment: discord.Attachment) -> discord.Attachment:
+ """Implement this to process the asset and return a url to the stored attachment.
+ :param attachment: discord.Attachment
+ :return: str
+ """
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.get(attachment.url) as res:
+ if res.status != 200:
+ res.raise_for_status()
+ data = io.BytesIO(await res.read())
+ data.seek(0)
+ attach = discord.File(data, attachment.filename)
+ msg: discord.Message = await self.channel.send(file=attach)
+ return msg.attachments[0]
+ except discord.errors.HTTPException as e:
+ # discords http errors, including missing permissions
+ raise e
\ No newline at end of file
diff --git a/chat_exporter/construct/message.py b/chat_exporter/construct/message.py
index 9a42e76..b0733d0 100644
--- a/chat_exporter/construct/message.py
+++ b/chat_exporter/construct/message.py
@@ -1,14 +1,19 @@
import html
+import io
+import traceback
from typing import List, Optional, Union
+import aiohttp
from pytz import timezone
from datetime import timedelta
+from chat_exporter.construct.attachment_handler import AttachmentHandler
from chat_exporter.ext.discord_import import discord
from chat_exporter.construct.assets import Attachment, Component, Embed, Reaction
from chat_exporter.ext.discord_utils import DiscordUtils
from chat_exporter.ext.discriminator import discriminator
+from chat_exporter.ext.cache import cache
from chat_exporter.ext.html_generator import (
fill_out,
bot_tag,
@@ -61,7 +66,8 @@ def __init__(
military_time: bool,
guild: discord.Guild,
meta_data: dict,
- message_dict: dict
+ message_dict: dict,
+ attachment_handler: Optional[AttachmentHandler]
):
self.message = message
self.previous_message = previous_message
@@ -69,7 +75,7 @@ def __init__(
self.military_time = military_time
self.guild = guild
self.message_dict = message_dict
-
+ self.attachment_handler = attachment_handler
self.time_format = "%A, %e %B %Y %I:%M %p"
if self.military_time:
self.time_format = "%A, %e %B %Y %H:%M"
@@ -246,6 +252,8 @@ async def build_assets(self):
self.embeds += await Embed(e, self.guild).flow()
for a in self.message.attachments:
+ if self.attachment_handler and isinstance(self.attachment_handler, AttachmentHandler):
+ a = await self.attachment_handler.process_asset(a)
self.attachments += await Attachment(a, self.guild).flow()
for c in self.message.components:
@@ -336,7 +344,7 @@ async def build_pin_template(self):
("NAME", str(html.escape(self.message.author.display_name))),
("NAME_TAG", await discriminator(self.message.author.name, self.message.author.discriminator), PARSE_MODE_NONE),
("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE),
- ("REF_MESSAGE_ID", str(self.message.reference.message_id), PARSE_MODE_NONE)
+ ("REF_MESSAGE_ID", str(self.message.reference.message_id) if self.message.reference else "", PARSE_MODE_NONE)
])
async def build_thread_template(self):
@@ -382,6 +390,7 @@ async def build_add(self):
("MESSAGE_ID", str(self.message.id), PARSE_MODE_NONE),
])
+ @cache()
async def _gather_member(self, author: discord.Member):
member = self.guild.get_member(author.id)
@@ -434,6 +443,7 @@ async def gather_messages(
guild: discord.Guild,
pytz_timezone,
military_time,
+ attachment_handler: Optional[AttachmentHandler],
) -> (str, dict):
message_html: str = ""
meta_data: dict = {}
@@ -441,7 +451,7 @@ async def gather_messages(
message_dict = {message.id: message for message in messages}
- if "thread" in str(messages[0].channel.type) and messages[0].reference:
+ if messages and "thread" in str(messages[0].channel.type) and messages[0].reference:
channel = guild.get_channel(messages[0].reference.channel_id)
if not channel:
@@ -460,7 +470,9 @@ async def gather_messages(
guild,
meta_data,
message_dict,
- ).construct_message()
+ attachment_handler,
+ ).construct_message()
+
message_html += content_html
previous_message = message
diff --git a/chat_exporter/construct/transcript.py b/chat_exporter/construct/transcript.py
index cab6ebf..60ea8a8 100644
--- a/chat_exporter/construct/transcript.py
+++ b/chat_exporter/construct/transcript.py
@@ -7,6 +7,7 @@
import pytz
+from chat_exporter.construct.attachment_handler import AttachmentHandler
from chat_exporter.ext.discord_import import discord
from chat_exporter.construct.message import gather_messages
@@ -35,6 +36,7 @@ def __init__(
after: Optional[datetime.datetime],
support_dev: bool,
bot: Optional[discord.Client],
+ attachment_handler: Optional[AttachmentHandler],
):
self.channel = channel
self.messages = messages
@@ -45,6 +47,7 @@ def __init__(
self.after = after
self.support_dev = support_dev
self.pytz_timezone = pytz_timezone
+ self.attachment_handler = attachment_handler
# This is to pass timezone in to mention.py without rewriting
setattr(discord.Guild, "timezone", self.pytz_timezone)
@@ -58,6 +61,7 @@ async def build_transcript(self):
self.channel.guild,
self.pytz_timezone,
self.military_time,
+ self.attachment_handler
)
await self.export_transcript(message_html, meta_data)
clear_cache()
diff --git a/chat_exporter/ext/cache.py b/chat_exporter/ext/cache.py
index 600155a..2fb96ce 100644
--- a/chat_exporter/ext/cache.py
+++ b/chat_exporter/ext/cache.py
@@ -1,4 +1,5 @@
from functools import wraps
+from typing import Any
_internal_cache: dict = {}
@@ -23,13 +24,19 @@ def clear_cache():
def cache():
def decorator(func):
- def _make_key(args, kwargs):
- key = [f'{func.__module__}.{func.__name__}']
- key.extend(repr(o) for o in args)
+ def _make_key(args: tuple[Any, ...], kwargs: dict[str, Any]) -> str:
+ def _true_repr(o):
+ if o.__class__.__repr__ is object.__repr__:
+ # this is how MessageConstruct can retain
+ # caching across multiple instances
+ return f'<{o.__class__.__module__}.{o.__class__.__name__}>'
+ return repr(o)
+ key = [f'{func.__module__}.{func.__name__}']
+ key.extend(_true_repr(o) for o in args)
for k, v in kwargs.items():
- key.append(repr(k))
- key.append(repr(v))
+ key.append(_true_repr(k))
+ key.append(_true_repr(v))
return ':'.join(key)
diff --git a/chat_exporter/html/base.html b/chat_exporter/html/base.html
index 368a2af..ca09e3e 100644
--- a/chat_exporter/html/base.html
+++ b/chat_exporter/html/base.html
@@ -661,13 +661,14 @@
.markup li {
margin: 0;
- padding: 0;
border: 0;
font-weight: inherit;
font-style: inherit;
font-family: inherit;
font-size: 100%;
vertical-align: baseline;
+ padding-top: 5px;
+ padding-bottom: 5px;
}
diff --git a/chat_exporter/html/message/pin.html b/chat_exporter/html/message/pin.html
index 320e38c..11c0ac8 100644
--- a/chat_exporter/html/message/pin.html
+++ b/chat_exporter/html/message/pin.html
@@ -18,4 +18,3 @@
-
\ No newline at end of file
diff --git a/chat_exporter/parse/markdown.py b/chat_exporter/parse/markdown.py
index bb327f5..dd7f286 100644
--- a/chat_exporter/parse/markdown.py
+++ b/chat_exporter/parse/markdown.py
@@ -6,13 +6,16 @@
class ParseMarkdown:
def __init__(self, content):
self.content = content
+ self.code_blocks_content = []
+
async def standard_message_flow(self):
+ self.parse_code_block_markdown()
self.https_http_links()
self.parse_normal_markdown()
- self.parse_code_block_markdown()
- await self.parse_emoji()
+ await self.parse_emoji()
+ self.reverse_code_block_markdown()
return self.content
async def link_embed_flow(self):
@@ -20,26 +23,29 @@ async def link_embed_flow(self):
await self.parse_emoji()
async def standard_embed_flow(self):
+ self.parse_code_block_markdown()
self.https_http_links()
self.parse_embed_markdown()
self.parse_normal_markdown()
- self.parse_code_block_markdown()
- await self.parse_emoji()
+ await self.parse_emoji()
+ self.reverse_code_block_markdown()
return self.content
async def special_embed_flow(self):
self.https_http_links()
- self.parse_normal_markdown()
self.parse_code_block_markdown()
- await self.parse_emoji()
+ self.parse_normal_markdown()
+ await self.parse_emoji()
+ self.reverse_code_block_markdown()
return self.content
async def message_reference_flow(self):
self.strip_preserve()
- self.parse_normal_markdown()
self.parse_code_block_markdown(reference=True)
+ self.parse_normal_markdown()
+ self.reverse_code_block_markdown()
self.parse_br()
return self.content
@@ -95,7 +101,7 @@ def order_list_markdown_to_html(self):
indent = len(indent)
if started:
- html += '\n'
+ html += '\n'
started = False
if indent % 2 == 0:
while indent < indent_stack[-1]:
@@ -114,25 +120,30 @@ def order_list_markdown_to_html(self):
html += f'- {content.strip()}
\n'
else:
- html += line
+ while len(indent_stack) > 1:
+ html += '
'
+ indent_stack.pop()
+ if not started:
+ html += '
'
+ started = True
+ html += line + '\n'
while len(indent_stack) > 1:
html += '\n'
indent_stack.pop()
- if not started:
- self.content = html
+ self.content = html
def parse_normal_markdown(self):
- # self.order_list_markdown_to_html()
+ self.order_list_markdown_to_html()
holder = (
[r"__(.*?)__", '%s'],
[r"\*\*(.*?)\*\*", '%s'],
[r"\*(.*?)\*", '%s'],
[r"~~(.*?)~~", '%s'],
- # [r"###\s(.*?)\n", '