Skip to content

Commit 4d57c64

Browse files
mripardpsss
authored andcommitted
Create a Public Inbox Plugin
Public Inbox is a mailing-list archive project notably used by the Linux Foundation to host the lore.kernel.org ML archive. This plugins allows to fetch from those archives the threads that the user started or was involved in and integrate them into the report. Signed-off-by: Maxime Ripard <[email protected]>
1 parent 11c7d5c commit 4d57c64

File tree

3 files changed

+437
-45
lines changed

3 files changed

+437
-45
lines changed

did/plugins/__init__.py

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,49 +11,51 @@
1111
1212
This is the default plugin order:
1313
14-
+-------------+-----+
15-
| header | 000 |
16-
+-------------+-----+
17-
| google | 050 |
18-
+-------------+-----+
19-
| nitrate | 100 |
20-
+-------------+-----+
21-
| bugzilla | 200 |
22-
+-------------+-----+
23-
| git | 300 |
24-
+-------------+-----+
25-
| github | 330 |
26-
+-------------+-----+
27-
| gerrit | 350 |
28-
+-------------+-----+
29-
| phabricator | 360 |
30-
+-------------+-----+
31-
| gitlab | 380 |
32-
+-------------+-----+
33-
| pagure | 390 |
34-
+-------------+-----+
35-
| trac | 400 |
36-
+-------------+-----+
37-
| bodhi | 410 |
38-
+-------------+-----+
39-
| koji | 420 |
40-
+-------------+-----+
41-
| trello | 450 |
42-
+-------------+-----+
43-
| rt | 500 |
44-
+-------------+-----+
45-
| redmine | 550 |
46-
+-------------+-----+
47-
| jira | 600 |
48-
+-------------+-----+
49-
| sentry | 650 |
50-
+-------------+-----+
51-
| zammad | 680 |
52-
+-------------+-----+
53-
| wiki | 700 |
54-
+-------------+-----+
55-
| items | 800 |
56-
+-------------+-----+
57-
| footer | 900 |
58-
+-------------+-----+
14+
+--------------+-----+
15+
| header | 000 |
16+
+--------------+-----+
17+
| google | 050 |
18+
+--------------+-----+
19+
| nitrate | 100 |
20+
+--------------+-----+
21+
| bugzilla | 200 |
22+
+--------------+-----+
23+
| git | 300 |
24+
+--------------+-----+
25+
| github | 330 |
26+
+--------------+-----+
27+
| gerrit | 350 |
28+
+--------------+-----+
29+
| phabricator | 360 |
30+
+--------------+-----+
31+
| gitlab | 380 |
32+
+--------------+-----+
33+
| pagure | 390 |
34+
+--------------+-----+
35+
| trac | 400 |
36+
+--------------+-----+
37+
| bodhi | 410 |
38+
+--------------+-----+
39+
| koji | 420 |
40+
+--------------+-----+
41+
| trello | 450 |
42+
+--------------+-----+
43+
| rt | 500 |
44+
+--------------+-----+
45+
| redmine | 550 |
46+
+--------------+-----+
47+
| jira | 600 |
48+
+--------------+-----+
49+
| sentry | 650 |
50+
+--------------+-----+
51+
| zammad | 680 |
52+
+--------------+-----+
53+
| wiki | 700 |
54+
+--------------+-----+
55+
| public-inbox | 750 |
56+
+--------------+-----+
57+
| items | 800 |
58+
+--------------+-----+
59+
| footer | 900 |
60+
+--------------+-----+
5961
"""

did/plugins/public_inbox.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
# coding: utf-8
2+
"""
3+
Public-Inbox stats about mailing lists threads
4+
5+
Config example::
6+
7+
[inbox]
8+
type = public-inbox
9+
url = https://lore.kernel.org
10+
"""
11+
12+
import copy
13+
import datetime
14+
import email.utils
15+
import gzip
16+
import mailbox
17+
import tempfile
18+
import typing
19+
import urllib.parse
20+
21+
import requests
22+
23+
from did.base import Config, Date, ReportError, User
24+
from did.stats import Stats, StatsGroup
25+
from did.utils import item, log
26+
27+
28+
class Message(object):
29+
def __init__(self, msg: mailbox.mboxMessage) -> None:
30+
self.msg = msg
31+
32+
def __msg_id(self, keyid: str) -> str:
33+
msgid = self.msg[keyid]
34+
if msgid is None:
35+
return None
36+
37+
return msgid.lstrip("<").rstrip(">")
38+
39+
def id(self) -> str:
40+
return self.__msg_id("Message-Id")
41+
42+
def parent_id(self) -> str:
43+
return self.__msg_id("In-Reply-To")
44+
45+
def subject(self) -> str:
46+
subject = self.msg["Subject"]
47+
48+
subject = " ".join(subject.splitlines())
49+
subject = " ".join(subject.split())
50+
51+
return subject
52+
53+
def date(self) -> datetime.datetime:
54+
return email.utils.parsedate_to_datetime(self.msg["Date"])
55+
56+
def is_thread_root(self) -> bool:
57+
return self.parent_id() is None
58+
59+
def is_from_user(self, user: str) -> bool:
60+
msg_from = email.utils.parseaddr(self.msg["From"])[1]
61+
62+
return email.utils.parseaddr(user)[1] == msg_from
63+
64+
def is_between_dates(self, since: Date, until: Date) -> bool:
65+
msg_date = self.date().date()
66+
67+
return msg_date >= since.date and msg_date <= until.date
68+
69+
70+
def _unique_messages(mbox: mailbox.mbox) -> typing.Iterable[Message]:
71+
msgs = dict()
72+
for msg in mbox.values():
73+
msg = Message(msg)
74+
id = msg.id()
75+
76+
if id not in msgs:
77+
msgs[id] = msg
78+
yield msg
79+
80+
81+
class PublicInbox(object):
82+
def __init__(self, parent, user: User, url: str) -> None:
83+
self.parent = parent
84+
self.threads_cache = dict()
85+
self.messages_cache = dict()
86+
self.url = url
87+
self.user = user
88+
89+
def __get_url(self, path: str) -> str:
90+
return urllib.parse.urljoin(self.url, path)
91+
92+
def _get_message_url(self, msg: Message) -> str:
93+
return self.__get_url("/r/%s/" % msg.id())
94+
95+
def _print_msg(self, options, msg: Message) -> None:
96+
if options.format == 'markdown':
97+
item("[{0}]({1})".format(msg.subject(), self._get_message_url(msg)),
98+
level=1, options=options)
99+
100+
else:
101+
item(msg.subject(), level=1, options=options)
102+
103+
if options.verbose:
104+
opt = copy.deepcopy(options)
105+
opt.width = 0
106+
item(self._get_message_url(msg), level=2, options=opt)
107+
108+
def __get_mbox_from_content(self, content: bytes) -> mailbox.mbox:
109+
content = gzip.decompress(content)
110+
111+
with tempfile.NamedTemporaryFile() as tmp:
112+
tmp.write(content)
113+
tmp.seek(0)
114+
115+
return mailbox.mbox(tmp.name)
116+
117+
def __get_msgs_from_mbox(self, mbox: mailbox.mbox) -> list[Message]:
118+
msgs = list()
119+
120+
for msg in _unique_messages(mbox):
121+
msg_id = msg.id()
122+
123+
log.debug("Found message %s." % msg_id)
124+
msgs.append(msg)
125+
126+
if msg_id not in self.messages_cache:
127+
log.debug("Message %s is new, adding to the cache." % msg_id)
128+
self.messages_cache[msg_id] = msg
129+
130+
return msgs
131+
132+
def __fetch_thread_root(self, msg: Message) -> Message:
133+
msg_id = msg.id()
134+
url = self.__get_url("/all/%s/t.mbox.gz" % msg_id)
135+
136+
log.debug("Fetching message %s thread (%s)" % (msg_id, url))
137+
resp = requests.get(url)
138+
mbox = self.__get_mbox_from_content(resp.content)
139+
for msg in self.__get_msgs_from_mbox(mbox):
140+
if msg.is_thread_root():
141+
log.debug("Found message %s thread root: %s." % (msg_id, msg.id()))
142+
return msg
143+
144+
def __get_thread_root(self, msg: Message) -> Message:
145+
log.debug("Looking for thread root of message %s" % msg.id())
146+
if msg.is_thread_root():
147+
log.debug("Message is thread root already. Returning.")
148+
return msg
149+
150+
parent_id = msg.parent_id()
151+
if parent_id not in self.messages_cache:
152+
root = self.__fetch_thread_root(msg)
153+
log.debug("Found root message %s for message %s" % (root.id(), msg.id()))
154+
return root
155+
156+
while True:
157+
log.debug("Parent is %s" % parent_id)
158+
assert parent_id in self.messages_cache
159+
parent = self.messages_cache[parent_id]
160+
if parent.is_thread_root():
161+
log.debug("Parent is the thread root, returning.")
162+
return parent
163+
164+
parent_id = parent.parent_id()
165+
if parent_id not in self.messages_cache:
166+
root = self.__fetch_thread_root(msg)
167+
log.debug(
168+
"Found root message %s for message %s" %
169+
(root.id(), msg.id()))
170+
return root
171+
172+
def __fetch_all_threads(self, since: Date, until: Date) -> list[Message]:
173+
since_str = since.date.isoformat()
174+
until_str = until.date.isoformat()
175+
176+
log.info("Fetching all mails on server %s from %s between %s and %s" %
177+
(self.url, self.user, since_str, until_str))
178+
resp = requests.post(
179+
self.__get_url("/all/"),
180+
headers={"Content-Length": "0"},
181+
params={
182+
"q": "(f:%s AND d:%s..%s)"
183+
% (self.user.email, since_str, until_str),
184+
"x": "m",
185+
},
186+
)
187+
188+
if not resp.ok:
189+
return []
190+
191+
mbox = self.__get_mbox_from_content(resp.content)
192+
return self.__get_msgs_from_mbox(mbox)
193+
194+
def get_all_threads(self, since: Date, until: Date):
195+
if (since, until) not in self.threads_cache:
196+
self.threads_cache[(since, until)] = self.__fetch_all_threads(since, until)
197+
198+
assert (since, until) in self.threads_cache
199+
200+
found = list()
201+
for msg in self.threads_cache[(since, until)]:
202+
msg_id = msg.id()
203+
if msg_id in found:
204+
continue
205+
206+
if not msg.is_thread_root():
207+
root = self.__get_thread_root(msg)
208+
root_id = root.id()
209+
if root_id in found:
210+
log.debug("Root message already encountered... Skip.")
211+
continue
212+
213+
found.append(root_id)
214+
yield root
215+
else:
216+
found.append(msg_id)
217+
yield msg
218+
219+
220+
class ThreadsStarted(Stats):
221+
""" Mail threads started """
222+
223+
def fetch(self):
224+
log.info(
225+
"Searching for new threads on {0} started by {1}".format(
226+
self.parent.url,
227+
self.user,
228+
)
229+
)
230+
231+
self.stats = [
232+
msg
233+
for msg in self.parent.public_inbox.get_all_threads(
234+
self.options.since, self.options.until)
235+
if msg.is_from_user(self.user.email)
236+
and msg.is_between_dates(self.options.since, self.options.until)
237+
]
238+
239+
def show(self):
240+
if not self._error and not self.stats:
241+
return
242+
243+
self.header()
244+
for msg in self.stats:
245+
self.parent.public_inbox._print_msg(self.options, msg)
246+
247+
248+
class ThreadsInvolved(Stats):
249+
""" Mail threads involved in """
250+
251+
def fetch(self):
252+
log.info(
253+
"Searching for mail threads on {0} where {1} was involved".format(
254+
self.parent.url,
255+
self.user,
256+
)
257+
)
258+
259+
self.stats = [
260+
msg
261+
for msg in self.parent.public_inbox.get_all_threads(
262+
self.options.since, self.options.until)
263+
if not msg.is_from_user(self.user.email)
264+
or not msg.is_between_dates(self.options.since, self.options.until)
265+
]
266+
267+
def show(self):
268+
if not self._error and not self.stats:
269+
return
270+
271+
self.header()
272+
for msg in self.stats:
273+
self.parent.public_inbox._print_msg(self.options, msg)
274+
275+
276+
class PublicInboxStats(StatsGroup):
277+
""" Public-Inbox Mailing List Archive """
278+
279+
order = 750
280+
281+
def __init__(self, option, name=None, parent=None, user=None):
282+
StatsGroup.__init__(self, option, name, parent, user)
283+
284+
config = dict(Config().section(option))
285+
try:
286+
self.url = config["url"]
287+
except KeyError:
288+
raise ReportError("No url in the [{0}] section.".format(option))
289+
290+
self.public_inbox = PublicInbox(self.parent, self.user, self.url)
291+
self.stats = [
292+
ThreadsStarted(option=option + "-started", parent=self),
293+
ThreadsInvolved(option=option + "-involved", parent=self),
294+
]

0 commit comments

Comments
 (0)